Bitdefender Hypervisor Memory Introspection
codeblocks.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Bitdefender
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #include "codeblocks.h"
6 #include "crc32.h"
7 #include "decoder.h"
8 #include "utils.h"
9 
10 
30 
31 
32 
33 static CHAR gCbLog[512];
34 
35 
36 DWORD
38  _In_reads_(BufSize) const BYTE *Buffer,
39  _In_ size_t BufSize,
40  _In_ IG_CS_TYPE CsType,
41  _In_ CB_EXTRACT_LEVEL ExtractLevel,
42  _Out_ DWORD *Pattern
43  )
62 {
63  *Pattern = codeInsInvalid;
64 
65  if (IG_CS_TYPE_16B == CsType)
66  {
67  return 0;
68  }
69 
70 #define IS_REX_PREFIX(b) ((ND_PREFIX_REX_MIN <= (b)) && ((b) <= ND_PREFIX_REX_MAX))
71 
72  if (0 == BufSize)
73  {
74  return 0;
75  }
76 
77  // Try common patterns (both x86 and x64)
78  if (0x90 == *Buffer || 0xcc == *Buffer)
79  {
80  // NOP/INT3
81  return 1;
82  }
83  else if (BufSize >= 2 &&
84  (0x84 == Buffer[0] || 0x85 == Buffer[0]) &&
85  (0xc0 <= Buffer[1]))
86  {
87  // TEST reg8/32, reg8/32
88  return 2;
89  }
90  else if (BufSize >= 2 &&
91  0xcd == *Buffer)
92  {
93  // INT imm8
94  return 2;
95  }
96  else if (0xc3 == *Buffer || 0xcb == *Buffer)
97  {
98  *Pattern = codeInsRet;
99  return 1;
100  }
101  else if (BufSize >= 5 &&
102  0xe8 == *Buffer)
103  {
104  *Pattern = codeInsCall;
105  return 5;
106  }
107  else if (BufSize >= 6 &&
108  0xff == Buffer[0] && 0x15 == Buffer[1])
109  {
110  *Pattern = codeInsCall;
111  return 6;
112  }
113  else if (BufSize >= 5 &&
114  0xe9 == *Buffer)
115  {
116  *Pattern = codeInsJmp;
117  return 5;
118  }
119  else if (BufSize >= 2 &&
120  0xeb == *Buffer)
121  {
122  *Pattern = codeInsJmp;
123  return 2;
124  }
125  else if (BufSize >= 2 &&
126  (0x70 <= *Buffer && *Buffer <= 0x7f))
127  {
128  *Pattern = codeInsJc;
129  return 2;
130  }
131  else if (BufSize >= 3 &&
132  (0xc2 == *Buffer || 0xca == *Buffer))
133  {
134  *Pattern = codeInsRet;
135  return 3;
136  }
137  else if (0xaa <= *Buffer && *Buffer <= 0xad)
138  {
139  *Pattern = codeInsStr;
140  return 1;
141  }
142  else if (0x9c == *Buffer || 0x9d == *Buffer)
143  {
144  if (ExtractLevel >= cbLevelMedium)
145  {
146  *Pattern = codeInsFlags;
147  }
148  return 1;
149  }
150  else if (0x91 <= *Buffer && *Buffer <= 0x97)
151  {
152  *Pattern = codeInsXchg;
153  return 1;
154  }
155  else if (BufSize >= 2 &&
156  (0xb0 <= *Buffer && *Buffer <= 0xb7))
157  {
158  if (ExtractLevel >= cbLevelMedium)
159  {
160  *Pattern = codeInsMovImm;
161  }
162 
163  return 2;
164  }
165  else if (BufSize >= 5 &&
166  (0xb8 <= *Buffer && *Buffer <= 0xbf))
167  {
168  if (ExtractLevel >= cbLevelMedium)
169  {
170  *Pattern = codeInsMovImm;
171  }
172 
173  return 5;
174  }
175  else if (0x50 <= *Buffer && *Buffer <= 0x5f)
176  {
177  // PUSH/POP
178  return 1;
179  }
180  else if (BufSize >= 2 &&
181  0xa8 == *Buffer)
182  {
183  // TEST al, imm8
184  return 2;
185  }
186  else if (BufSize >= 3 &&
187  0xf6 == Buffer[0] &&
188  (0xc0 <= Buffer[1] && Buffer[1] <= 0xcf))
189  {
190  // TEST reg4, imm8
191  return 3;
192  }
193  else if (0x98 == *Buffer || 0x99 == *Buffer)
194  {
195  // CBW/CWDE or CWD/CDQ
196  return 1;
197  }
198  else if (BufSize >= 5 &&
199  0xa9 == *Buffer)
200  {
201  // TEST eax, imm32
202  return 5;
203  }
204  else if (BufSize >= 2 &&
205  0x00 == Buffer[0] && 0x00 == Buffer[1])
206  {
207  // NULL bytes, very common
208  return 2;
209  }
210  else if (BufSize >= 3 &&
211  (0x83 == Buffer[0] && (0xe0 <= Buffer[1] && Buffer[1] <= 0xe3)))
212  {
213  // AND eax/ecx/edx/ebx, imm8
214  return 3;
215  }
216  else if (BufSize >= 2 &&
217  0x33 == Buffer[0] &&
218  0xc0 <= Buffer[1])
219  {
220  // XOR reg32, reg32
221  return 2;
222  }
223  else if (BufSize >= 4 &&
224  0x89 == Buffer[0] && 0x24 == Buffer[2] &&
225  (0x4c == Buffer[1] ||
226  0x5c == Buffer[1] ||
227  0x6c == Buffer[1] ||
228  0x7c == Buffer[1] ||
229  0x44 == Buffer[1] ||
230  0x54 == Buffer[1] ||
231  0x64 == Buffer[1] ||
232  0x74 == Buffer[1]))
233  {
234  // MOV [rsp + imm8], reg32
235  // MOV reg32, [rsp + imm8]
236  if (ExtractLevel >= cbLevelMedium)
237  {
238  *Pattern = codeInsMovMem;
239  }
240 
241  return 4;
242  }
243  else if (BufSize >= 7 &&
244  0x89 == Buffer[0] && 0x24 == Buffer[2] &&
245  (0x8c == Buffer[1] ||
246  0x9c == Buffer[1] ||
247  0xac == Buffer[1] ||
248  0xbc == Buffer[1] ||
249  0x84 == Buffer[1] ||
250  0x94 == Buffer[1] ||
251  0xa4 == Buffer[1] ||
252  0xb4 == Buffer[1]))
253  {
254  // MOV [rsp + imm32], reg32
255  // MOV reg32, [rsp + imm32]
256  if (ExtractLevel >= cbLevelMedium)
257  {
258  *Pattern = codeInsMovMem;
259  }
260 
261  return 7;
262  }
263  else if (BufSize >= 6 &&
264  0x81 == Buffer[0] && 0xc4 == Buffer[1])
265  {
266  // ADD rsp, imm32
267  return 6;
268  }
269  else if (BufSize >= 3 &&
270  0x83 == Buffer[0] && 0xc4 == Buffer[1])
271  {
272  // ADD rsp, imm8
273  return 3;
274  }
275  else if (BufSize >= 6 &&
276  0x81 == Buffer[0] && 0xec == Buffer[1])
277  {
278  // SUB rsp, imm32
279  return 6;
280  }
281  else if (BufSize >= 3 &&
282  0x83 == Buffer[0] && 0xec == Buffer[1])
283  {
284  // SUB rsp, imm8
285  return 3;
286  }
287  else if (BufSize >= 2 &&
288  (0x8b == Buffer[0] || 0x89 == Buffer[0]) &&
289  0xc0 <= Buffer[1])
290  {
291  // MOV reg32, reg32
292  if (ExtractLevel >= cbLevelMedium)
293  {
294  *Pattern = codeInsMovReg;
295  }
296 
297  return 2;
298  }
299  else if (BufSize >= 6 &&
300  0x0f == *Buffer &&
301  (0x80 <= Buffer[1] && Buffer[1] <= 0x8f))
302  {
303  *Pattern = codeInsJc;
304  return 6;
305  }
306  else if (BufSize >= 2 &&
307  0xff == Buffer[0] &&
308  (0xc0 <= Buffer[1] && Buffer[1] <= 0xcf))
309  {
310  // INC/DEC reg32
311  return 2;
312  }
313  else if (BufSize >= 2 &&
314  0x3b == Buffer[0] &&
315  0xc0 <= Buffer[1])
316  {
317  // CMP reg32, reg32
318  return 2;
319  }
320 
321  if (IG_CS_TYPE_32B == CsType)
322  {
323  if (0x40 <= *Buffer && *Buffer <= 0x4f)
324  {
325  // INC/DEC on 32-bit
326  return 1;
327  }
328  }
329  else if (IG_CS_TYPE_64B == CsType)
330  {
331  if (BufSize >= 4 &&
332  0x48 == Buffer[0] && 0x83 == Buffer[1] && 0xc4 == Buffer[2])
333  {
334  // ADD rsp, imm8
335  return 4;
336  }
337  else if (BufSize >= 7 &&
338  0x48 == Buffer[0] && 0x81 == Buffer[1] && 0xec == Buffer[2])
339  {
340  // SUB rsp, imm32
341  return 7;
342  }
343  else if (BufSize >= 5 &&
344  (0x48 == Buffer[0] || 0x4c == Buffer[0]) &&
345  (0x89 == Buffer[1] || 0x8b == Buffer[1]) &&
346  0x24 == Buffer[3] &&
347  (0x4c == Buffer[2] ||
348  0x5c == Buffer[2] ||
349  0x6c == Buffer[2] ||
350  0x7c == Buffer[2] ||
351  0x44 == Buffer[2] ||
352  0x54 == Buffer[2] ||
353  0x64 == Buffer[2] ||
354  0x74 == Buffer[2]))
355  {
356  // MOV [rsp + imm8], reg64
357  // MOV reg64, [rsp + imm8]
358  if (ExtractLevel >= cbLevelMedium)
359  {
360  *Pattern = codeInsMovMem;
361  }
362 
363  return 5;
364  }
365  else if (BufSize >= 8 &&
366  (0x48 == Buffer[0] || 0x4c == Buffer[0]) &&
367  (0x89 == Buffer[1] || 0x8b == Buffer[1]) &&
368  0x24 == Buffer[3] &&
369  (0x8c == Buffer[2] ||
370  0x9c == Buffer[2] ||
371  0xac == Buffer[2] ||
372  0xbc == Buffer[2] ||
373  0x84 == Buffer[2] ||
374  0x94 == Buffer[2] ||
375  0xa4 == Buffer[2] ||
376  0xb4 == Buffer[2]))
377  {
378  // MOV [rsp + imm32], reg64
379  // MOV reg64, [rsp + imm32]
380  if (ExtractLevel >= cbLevelMedium)
381  {
382  *Pattern = codeInsMovMem;
383  }
384 
385  return 8;
386  }
387  else if (BufSize >= 4
388  && 0x48 == Buffer[0] && 0x83 == Buffer[1] && 0xec == Buffer[2])
389  {
390  // SUB rsp, imm8
391  return 4;
392  }
393  else if (IS_REX_PREFIX(*Buffer))
394  {
395  if (BufSize >= 3 &&
396  0x33 == Buffer[1] &&
397  0xc0 <= Buffer[2])
398  {
399  // XOR reg64, reg64
400  return 3;
401  }
402  else if (BufSize >= 3 &&
403  0x3b == Buffer[1] &&
404  0xc0 <= Buffer[2])
405  {
406  // CMP reg64, reg64
407  return 3;
408  }
409  else if (BufSize >= 3 &&
410  0xff == Buffer[1] &&
411  (0xc0 <= Buffer[2] && Buffer[2] <= 0xcf))
412  {
413  // INC/DEC reg32/reg64
414  return 3;
415  }
416  else if (BufSize >= 6 &&
417  0 == (*Buffer & BIT(3)) &&
418  (0xb8 <= Buffer[1] && Buffer[1] <= 0xbf))
419  {
420  if (ExtractLevel >= cbLevelMedium)
421  {
422  *Pattern = codeInsMovImm;
423  }
424 
425  return 6;
426  }
427  else if (BufSize >= 10 &&
428  0 != (*Buffer & BIT(3)) &&
429  (0xb8 <= Buffer[1] && Buffer[1] <= 0xbf))
430  {
431  if (ExtractLevel >= cbLevelMedium)
432  {
433  *Pattern = codeInsMovImm;
434  }
435 
436  return 10;
437  }
438  else if (BufSize >= 4 &&
439  0x8d == Buffer[1] &&
440  (0x45 == Buffer[2] ||
441  0x55 == Buffer[2]))
442  {
443  // LEA reg64, [reg64 + imm8]
444  return 4;
445  }
446  else if (BufSize >= 5 &&
447  0x8d == Buffer[1] &&
448  0x24 == Buffer[3] &&
449  (0x44 == Buffer[2] ||
450  0x4c == Buffer[2] ||
451  0x54 == Buffer[2] ||
452  0x5c == Buffer[2]))
453  {
454  // LEA reg64, [rsp + imm8]
455  return 5;
456  }
457  else if (BufSize >= 3 &&
458  (0x8b == Buffer[1] || 0x89 == Buffer[1]) &&
459  0xc0 <= Buffer[2])
460  {
461  // MOV reg32/64, reg32/64
462  if (ExtractLevel >= cbLevelMedium)
463  {
464  *Pattern = codeInsMovReg;
465  }
466 
467  return 3;
468  }
469  else if (BufSize >= 3 &&
470  ((0x84 == Buffer[1] || 0x85 == Buffer[1]) &&
471  (0xc0 <= Buffer[2])))
472  {
473  // TEST reg64, reg64
474  return 3;
475  }
476  else if (BufSize >= 6 &&
477  0xa9 == Buffer[1])
478  {
479  // TEST eax/rax, imm32
480  return 6;
481  }
482  else if (BufSize >= 2 &&
483  (0x98 == Buffer[1] || 0x99 == Buffer[1]))
484  {
485  // CBW/CWDE or CWD/CDQ
486  return 2;
487  }
488  else if (BufSize >= 2 &&
489  (0x50 <= Buffer[1] && Buffer[1] <= 0x5f))
490  {
491  // PUSH/POP on 64-bit with REX prefix
492  return 2;
493  }
494  }
495  }
496 
497  return 0;
498 }
499 
500 
501 INTSTATUS
503  _In_reads_(MaxBufferSize) BYTE *Buffer,
504  _In_ DWORD MaxBufferSize,
505  _In_ IG_CS_TYPE CsType,
506  _In_ CB_EXTRACT_LEVEL ExtractLevel,
507  _In_ DWORD PatternSize,
508  _Out_writes_to_(PatternSize, *TotalExtracted) BYTE *Pattern,
509  _Out_ DWORD *TotalExtracted,
510  _Inout_ DWORD *TotalParsed
511  )
535 {
536  INSTRUX instrux;
537  DWORD i;
538  const BYTE *end;
539 
540  if (NULL == Buffer)
541  {
543  }
544 
545  if (0 == PatternSize)
546  {
548  }
549 
550  if (NULL == Pattern)
551  {
553  }
554 
555  if (NULL == TotalExtracted)
556  {
558  }
559 
560  if (NULL == TotalParsed)
561  {
563  }
564 
565  if (MaxBufferSize <= ND_MAX_INSTRUCTION_LENGTH)
566  {
568  }
569 
570  end = Buffer + MaxBufferSize;
571  i = 0;
572 
573  if (IG_CS_TYPE_16B == CsType)
574  {
575  // Log this since it shouldn't really happen
576  WARNING("[WARNING] Extracting codeblocks for 16 bit!\n");
577  }
578 
579  while ((Buffer < end) && (i < PatternSize))
580  {
581  NDSTATUS ndstatus;
582  DWORD pattern, skipSize;
583  CODE_INS oldPattern;
584 
585  skipSize = IntFragHandleCommon(Buffer, end - Buffer, CsType, ExtractLevel, &pattern);
586  if (skipSize)
587  {
588  if (pattern != codeInsInvalid)
589  {
590  Pattern[i++] = (BYTE)pattern;
591  }
592 
593  Buffer += skipSize;
594  *TotalParsed += skipSize;
595 
596  continue;
597  }
598  else
599  {
600  pattern = codeInsInvalid;
601  }
602 
603  ndstatus = IntDecDecodeInstructionFromBuffer(Buffer - skipSize, end - Buffer - skipSize, CsType, &instrux);
604  if (__unlikely(ndstatus == ND_STATUS_BUFFER_TOO_SMALL))
605  {
606  // There is no point in going further, the rest of the instructions will be garbage
607  break;
608  }
609  else if (__unlikely(!ND_SUCCESS(ndstatus)))
610  {
611  Buffer += 1;
612  *TotalParsed += 1;
613  continue;
614  }
615 
616  if (Buffer + instrux.Length >= end)
617  {
618  break;
619  }
620 
621  oldPattern = codeInsInvalid;
622 
623  if (instrux.Instruction == ND_INS_Jcc)
624  {
625  // NOTE: a lot of strings will be interpreted as this! But not that critical,
626  // since it's not used as a pivot...
627  oldPattern = codeInsJc;
628  }
629  else if (instrux.Instruction == ND_INS_JMPE ||
630  instrux.Instruction == ND_INS_JMPFD ||
631  instrux.Instruction == ND_INS_JMPFI ||
632  instrux.Instruction == ND_INS_JMPNI ||
633  instrux.Instruction == ND_INS_JMPNR)
634  {
635  oldPattern = codeInsJmp;
636  }
637  else if (instrux.Instruction == ND_INS_CALLFD ||
638  instrux.Instruction == ND_INS_CALLFI ||
639  instrux.Instruction == ND_INS_CALLNI ||
640  instrux.Instruction == ND_INS_CALLNR)
641  {
642  oldPattern = codeInsCall;
643  }
644  else if (instrux.Instruction == ND_INS_RETF ||
645  instrux.Instruction == ND_INS_RETN)
646  {
647  oldPattern = codeInsRet;
648  }
649  else if (instrux.Instruction == ND_INS_STOS ||
650  instrux.Instruction == ND_INS_LODS)
651  {
652  oldPattern = codeInsStr;
653  }
654  else if (instrux.Instruction == ND_INS_XCHG ||
655  instrux.Instruction == ND_INS_CMPXCHG)
656  {
657  oldPattern = codeInsXchg;
658  }
659  else if (instrux.Instruction == ND_INS_BT ||
660  instrux.Instruction == ND_INS_BTC ||
661  instrux.Instruction == ND_INS_BTR ||
662  instrux.Instruction == ND_INS_BTS)
663  {
664  oldPattern = codeInsBt;
665  }
666 
667  // If we are at the normal level, go to the next instruction, don't extract further
668  if (ExtractLevel == cbLevelNormal)
669  {
670  goto _next_instruction;
671  }
672 
673  if (instrux.Instruction == ND_INS_MOV)
674  {
675  if (instrux.Operands[0].Type == ND_OP_REG &&
676  instrux.Operands[1].Type == ND_OP_REG)
677  {
678  oldPattern = codeInsMovReg;
679  }
680  else if (instrux.Operands[0].Type == ND_OP_MEM ||
681  instrux.Operands[1].Type == ND_OP_MEM)
682  {
683  oldPattern = codeInsMovMem;
684  }
685  else if (instrux.HasImm1)
686  {
687  oldPattern = codeInsMovImm;
688  }
689  else if (instrux.Seg == ND_PREFIX_G2_SEG_FS ||
690  instrux.Seg == ND_PREFIX_G2_SEG_GS)
691  {
692  oldPattern = codeInsMovFsGs;
693  }
694  }
695  else if (instrux.Instruction == ND_INS_PUSHF ||
696  instrux.Instruction == ND_INS_POPF)
697  {
698  oldPattern = codeInsFlags;
699  }
700 
701  // We are done with the medium level
702  if (ExtractLevel == cbLevelMedium)
703  {
704  goto _next_instruction;
705  }
706 
707 _next_instruction:
708  if (skipSize && ((DWORD)oldPattern != pattern))
709  {
710  ERROR("[ERROR] [CRITICAL] Pattern was %02d but we returned %02d.."
711  "The MALWARE / ROOTKIT below is most probably a FP!\n",
712  oldPattern,
713  pattern);
714 
715  IntDumpInstruction(&instrux, 0);
716  }
717 
718  if (!skipSize && oldPattern != codeInsInvalid)
719  {
720  Pattern[i++] = oldPattern;
721  }
722 
723  if (!skipSize)
724  {
725  Buffer += instrux.Length;
726  *TotalParsed += instrux.Length;
727  }
728  }
729 
730  // Caused infinite loop in IntFragExtractBlocks because it remained blocked at 0xffc
731  if (Buffer <= end)
732  {
733  *TotalParsed = MaxBufferSize;
734  }
735 
736  *TotalExtracted = i;
737 
738  return INT_STATUS_SUCCESS;
739 }
740 
741 
742 //
743 // IntFragExtractCodeBlocks
744 //
745 INTSTATUS
747  _In_reads_(MaxBufferSize) BYTE *Buffer,
748  _In_ DWORD MaxBufferSize,
749  _In_ IG_CS_TYPE CsType,
750  _In_ CB_EXTRACT_LEVEL ExtractLevel,
751  _Inout_ DWORD *HashesCount,
752  _Out_writes_(*HashesCount) DWORD *Hashes
753  )
773 {
774  BYTE pattern[PAGE_SIZE / 4];
775  DWORD i, j, totalParsed, currentHash, sizeToParse;
776  INTSTATUS status;
777  BYTE chunks[CODE_BLOCK_CHUNKS_COUNT] = { 0 };
778 
779  if (NULL == Buffer)
780  {
782  }
783 
784  if (MaxBufferSize <= ND_MAX_INSTRUCTION_LENGTH)
785  {
787  }
788 
789  if (NULL == HashesCount || 0 == *HashesCount)
790  {
792  }
793 
794  if (NULL == Hashes)
795  {
797  }
798 
799  currentHash = 0;
800  totalParsed = 0;
801  sizeToParse = MaxBufferSize - ND_MAX_INSTRUCTION_LENGTH;
802 
803  // Don't search for more than 1024 (INTRO_PAGE_SIZE / 4) patterns at a time (maximum in a page filled with
804  // 4-byte instructions). If there are more patterns than this, assume there is something wrong with that page.
805  status = INT_STATUS_NOT_FOUND;
806 
807  // We parse the whole buffer or till we found as many codeblocks as we requested
808  while (totalParsed < sizeToParse && currentHash < *HashesCount)
809  {
810  DWORD patternSize = 0;
811 
812  status = IntFragExtractPattern(Buffer + totalParsed,
813  sizeToParse - totalParsed,
814  CsType,
815  ExtractLevel,
816  PAGE_SIZE / 4,
817  pattern,
818  &patternSize,
819  &totalParsed);
820  if (!INT_SUCCESS(status))
821  {
822  if (status == INT_STATUS_DATA_BUFFER_TOO_SMALL)
823  {
824  WARNING("[WARNNING] Buffer too small to extract codeblocks (size %d): 0x%08x\n",
825  sizeToParse - totalParsed,
826  status);
827 
828  status = INT_STATUS_SUCCESS;
829  }
830  else
831  {
832  ERROR("[ERROR] IntFragExtractCodePattern: 0x%08x\n", status);
833  }
834 
835  goto leave;
836  }
837 
838  for (i = 0; i < patternSize; i++)
839  {
840  // Search for a pivot instruction:
841  // NORMAL - a jmp of any kind or a call
842  // MEDIUM - previous + a move that involves memory, or FS/GS segments
843  if (cbLevelNormal == ExtractLevel &&
844  (codeInsCall != pattern[i] &&
845  codeInsJmp != pattern[i]))
846  {
847  continue;
848  }
849 
850  if (cbLevelMedium == ExtractLevel &&
851  (codeInsCall != pattern[i] &&
852  codeInsJmp != pattern[i] &&
853  codeInsMovMem != pattern[i] &&
854  codeInsMovFsGs != pattern[i]))
855  {
856  continue;
857  }
858 
859  // We found a pivot so extract a codeblock from here
860  for (j = 0; (j < CODE_BLOCK_CHUNKS_COUNT) && ((i + j) < patternSize); j++)
861  {
862  if (chunks[j] != 0)
863  {
864  continue;
865  }
866 
867  chunks[j] = pattern[i + j];
868  }
869 
870  // We didn't fill the last codeblock, so don't extract the next pattern if we can;
871  // fill the rest and only then calculate an hash
872  if (chunks[CODE_BLOCK_CHUNKS_COUNT - 1] == 0)
873  {
874  break;
875  }
876 
877  // We found our chunks, so calculate a hash
878  Hashes[currentHash] = Crc32Compute(chunks, CODE_BLOCK_CHUNKS_COUNT, INITIAL_CRC_VALUE);
879 
880  // Reset the chunks for the next calculation
882 
883  // Advance to the next codeblock or exit if we reached our target
884  if (++currentHash == *HashesCount)
885  {
886  status = INT_STATUS_SUCCESS;
887  goto leave;
888  }
889  }
890  }
891 
892 leave:
893  if (INT_SUCCESS(status))
894  {
895  if (currentHash > 0)
896  {
897  UtilQuickSort(Hashes, currentHash, sizeof(DWORD));
898 
899  *HashesCount = currentHash;
900  }
901  else
902  {
903  status = INT_STATUS_NOT_FOUND;
904  }
905  }
906 
907  return status;
908 }
909 
910 
913  _In_ const DWORD *Hashes,
914  _In_ DWORD CodeBlocksCount,
915  _In_ const SIG_CODEBLOCKS *ExceptionSignature
916  )
931 {
932  DWORD i;
933  const SIG_CODEBLOCK_HASH *pSigHash;
934 
935  if (NULL == Hashes)
936  {
938  }
939 
940  if (0 == CodeBlocksCount)
941  {
943  }
944 
945  if (NULL == ExceptionSignature)
946  {
948  }
949 
950  pSigHash = (const SIG_CODEBLOCK_HASH *)ExceptionSignature->Object;
951  for (i = 0; i < ExceptionSignature->ListsCount; i++)
952  {
953  DWORD hashSize = sizeof(*pSigHash) + pSigHash->Count * sizeof(DWORD);
954  DWORD j, remaining, currentCb;
955 
956  remaining = ExceptionSignature->Score;
957  currentCb = 0;
958 
959  for (j = 0; j < pSigHash->Count; j++)
960  {
961  for (; currentCb < CodeBlocksCount; currentCb++)
962  {
963  if (pSigHash->Hashes[j] < Hashes[currentCb])
964  {
965  break; // no point in going further, since the hashes are sorted
966  }
967  else if (pSigHash->Hashes[j] == Hashes[currentCb])
968  {
969  remaining--;
970  break; // go to the next hash in pSigHashes
971  }
972  }
973  }
974 
975  if ((int)remaining <= 0)
976  {
977  // Found our signature
979  }
980 
981  // advance to the next hash list
982  pSigHash = (const SIG_CODEBLOCK_HASH *)((const BYTE *)pSigHash + hashSize);
983  }
984 
986 }
987 
988 
989 INTSTATUS
991  _In_ PBYTE Buffer,
992  _In_ DWORD StartOffset,
993  _In_ DWORD MaxBufferSize,
994  _In_ IG_CS_TYPE CsType,
995  _In_ CB_EXTRACT_LEVEL ExtractLevel,
996  _In_ DWORD PatternSize,
997  _Out_writes_to_(PatternSize, *TotalExtracted) CODE_BLOCK_PATTERN *Pattern,
998  _Out_ DWORD *TotalExtracted
999  )
1023 {
1024  INSTRUX instrux;
1025  DWORD i, currentOffset;
1026  PBYTE end;
1027 
1028  if (NULL == Buffer)
1029  {
1031  }
1032 
1033  if (NULL == TotalExtracted)
1034  {
1036  }
1037 
1038  if (NULL == Pattern)
1039  {
1041  }
1042 
1043  if (MaxBufferSize <= ND_MAX_INSTRUCTION_LENGTH)
1044  {
1046  }
1047 
1048  end = Buffer + MaxBufferSize - ND_MAX_INSTRUCTION_LENGTH;
1049  i = 0;
1050  currentOffset = StartOffset;
1051 
1052  if (IG_CS_TYPE_16B == CsType)
1053  {
1054  // Log this since it shouldn't really happen
1055  WARNING("[WARNING] Extracting codeblocks for 16 bit!\n");
1056  }
1057 
1058  *TotalExtracted = 0;
1059 
1060  while ((Buffer < end) && (i < PatternSize))
1061  {
1062  NDSTATUS ndstatus;
1063 
1064  ndstatus = IntDecDecodeInstructionFromBuffer(Buffer, (size_t)(end - Buffer), CsType, &instrux);
1065  if (!ND_SUCCESS(ndstatus))
1066  {
1067  Buffer++;
1068  currentOffset++;
1069  continue;
1070  }
1071 
1072  if (Buffer + instrux.Length >= end)
1073  {
1074  break;
1075  }
1076 
1077  if (instrux.Instruction == ND_INS_Jcc)
1078  {
1079  Pattern[i].Value = codeInsJc;
1080  Pattern[i++].Offset = currentOffset;
1081  }
1082  else if (instrux.Instruction == ND_INS_JMPE ||
1083  instrux.Instruction == ND_INS_JMPFD ||
1084  instrux.Instruction == ND_INS_JMPFI ||
1085  instrux.Instruction == ND_INS_JMPNI ||
1086  instrux.Instruction == ND_INS_JMPNR)
1087  {
1088  Pattern[i].Value = codeInsJmp;
1089  Pattern[i++].Offset = currentOffset;
1090  }
1091  else if (instrux.Instruction == ND_INS_CALLFD ||
1092  instrux.Instruction == ND_INS_CALLFI ||
1093  instrux.Instruction == ND_INS_CALLNI ||
1094  instrux.Instruction == ND_INS_CALLNR)
1095  {
1096  Pattern[i].Value = codeInsCall;
1097  Pattern[i++].Offset = currentOffset;
1098  }
1099  else if (instrux.Instruction == ND_INS_RETF ||
1100  instrux.Instruction == ND_INS_RETN)
1101  {
1102  Pattern[i].Value = codeInsRet;
1103  Pattern[i++].Offset = currentOffset;
1104  }
1105  else if (instrux.Instruction == ND_INS_STOS ||
1106  instrux.Instruction == ND_INS_LODS)
1107  {
1108  Pattern[i].Value = codeInsStr;
1109  Pattern[i++].Offset = currentOffset;
1110  }
1111  else if (instrux.Instruction == ND_INS_XCHG ||
1112  instrux.Instruction == ND_INS_CMPXCHG)
1113  {
1114  Pattern[i].Value = codeInsXchg;
1115  Pattern[i++].Offset = currentOffset;
1116  }
1117  else if (instrux.Instruction == ND_INS_BT ||
1118  instrux.Instruction == ND_INS_BTC ||
1119  instrux.Instruction == ND_INS_BTR ||
1120  instrux.Instruction == ND_INS_BTS)
1121  {
1122  Pattern[i].Value = codeInsBt;
1123  Pattern[i++].Offset = currentOffset;
1124  }
1125 
1126  // If we are at the normal level, go to the next instruction, don't extract further
1127  if (ExtractLevel == cbLevelNormal)
1128  {
1129  goto _next_instruction;
1130  }
1131 
1132  if (instrux.Instruction == ND_INS_MOV)
1133  {
1134  if (instrux.Operands[0].Type == ND_OP_REG &&
1135  instrux.Operands[1].Type == ND_OP_REG)
1136  {
1137  Pattern[i].Value = codeInsMovReg;
1138  Pattern[i++].Offset = currentOffset;
1139  }
1140  else if (instrux.Operands[0].Type == ND_OP_MEM ||
1141  instrux.Operands[1].Type == ND_OP_MEM)
1142  {
1143  Pattern[i].Value = codeInsMovMem;
1144  Pattern[i++].Offset = currentOffset;
1145  }
1146  else if (instrux.HasImm1)
1147  {
1148  Pattern[i].Value = codeInsMovImm;
1149  Pattern[i++].Offset = currentOffset;
1150  }
1151  else if (instrux.Seg == ND_PREFIX_G2_SEG_FS ||
1152  instrux.Seg == ND_PREFIX_G2_SEG_GS)
1153  {
1154  Pattern[i].Value = codeInsMovFsGs;
1155  Pattern[i++].Offset = currentOffset;
1156  }
1157  }
1158  else if (instrux.Instruction == ND_INS_PUSHF ||
1159  instrux.Instruction == ND_INS_POPF)
1160  {
1161  Pattern[i].Value = codeInsFlags;
1162  Pattern[i++].Offset = currentOffset;
1163  }
1164 
1165  // We are done with the medium level
1166  if (ExtractLevel == cbLevelMedium)
1167  {
1168  goto _next_instruction;
1169  }
1170 
1171 _next_instruction:
1172  Buffer += instrux.Length;
1173  currentOffset += instrux.Length;
1174  }
1175 
1176  *TotalExtracted = i;
1177 
1178  return INT_STATUS_SUCCESS;
1179 }
1180 
1181 
1182 static void
1184  _In_ CODE_BLOCK *CodeBlock,
1185  _In_ DWORD Count,
1186  _In_ QWORD Rip,
1187  _In_ DWORD RipOffset,
1188  _In_ BOOLEAN ReturnRip,
1189  _In_ DWORD ElemLine
1190  )
1201 {
1202  DWORD previousOffset;
1203  int ret;
1204  int maxLength = sizeof(gCbLog);
1205  CHAR *pCbLine = NULL;
1206  BOOLEAN loggedRip = FALSE;
1207 
1208  if (Count == 0)
1209  {
1210  return;
1211  }
1212 
1213  //
1214  // Compute a hash on the extracted codeblocks
1215  //
1216  previousOffset = CodeBlock[0].OffsetStart;
1217  for (DWORD i = 0; i < Count; i++)
1218  {
1219  if (i % ElemLine == 0)
1220  {
1221  if (i > 0)
1222  {
1223  // Log formatted codeblocks line
1224  // Remove comma
1225  *(pCbLine - 2) = ' ';
1226  LOG("%s\n", gCbLog);
1227  }
1228 
1229  // Start formatting a new codeblocks line
1230  pCbLine = gCbLog;
1231  maxLength = sizeof(gCbLog);
1232 
1233  ret = snprintf(pCbLine, maxLength, "[CODEBLOCKS] ");
1234  if (ret < 0 || ret >= maxLength)
1235  {
1236  ERROR("[ERROR] snprintf error: %d, size %d\n", ret, maxLength);
1237  return;
1238  }
1239  else
1240  {
1241  pCbLine += ret;
1242  maxLength -= ret;
1243  }
1244  }
1245 
1246  CodeBlock[i].Hash = Crc32Compute(CodeBlock[i].Chunks, CODE_BLOCK_CHUNKS_COUNT, INITIAL_CRC_VALUE);
1247 
1248  if (!loggedRip && ((previousOffset <= RipOffset && RipOffset <= CodeBlock[i].OffsetStart) ||
1249  (i == 0 && CodeBlock[i].OffsetStart >= RipOffset) ||
1250  (i == Count - 1)))
1251  {
1252 
1253  ret = snprintf(pCbLine, maxLength, "(%7s->0x%016llx), ", ReturnRip ? "Ret RIP" : "RIP", Rip);
1254  if (ret < 0 || ret >= maxLength)
1255  {
1256  ERROR("[ERROR] snprintf error: %d, size %d\n", ret, maxLength);
1257  return;
1258  }
1259  else
1260  {
1261  pCbLine += ret;
1262  maxLength -= ret;
1263  }
1264 
1265  loggedRip = TRUE;
1266  }
1267 
1268  ret = snprintf(pCbLine, maxLength, "0x%08x (0x%03x, %9s), ", CodeBlock[i].Hash, CodeBlock[i].OffsetStart,
1269  (CodeBlock[i].PivotInstruction == codeInsCall) ? "CALL" :
1270  (CodeBlock[i].PivotInstruction == codeInsJmp) ? "JMP" :
1271  (CodeBlock[i].PivotInstruction == codeInsMovMem) ? "MOV MEM" :
1272  (CodeBlock[i].PivotInstruction == codeInsMovFsGs) ? "MOV FS/GS" : "INVALID");
1273  if (ret < 0 || ret >= maxLength)
1274  {
1275  ERROR("[ERROR] snprintf error: %d, size %d\n", ret, maxLength);
1276  return;
1277  }
1278  else
1279  {
1280  pCbLine += ret;
1281  maxLength -= ret;
1282  }
1283 
1284  previousOffset = CodeBlock[i].OffsetStart;
1285  }
1286 
1287  *(pCbLine - 2) = ' ';
1288  LOG("%s\n", gCbLog);
1289 }
1290 
1291 
1292 INTSTATUS
1294  _In_ PBYTE Buffer,
1295  _In_ QWORD StartAddress,
1296  _In_ DWORD MaxBufferSize,
1297  _In_ IG_CS_TYPE CsType,
1298  _In_ CB_EXTRACT_LEVEL ExtractLevel,
1299  _In_ QWORD Rip,
1300  _In_ BOOLEAN ReturnRip
1301  )
1318 {
1319  PCODE_BLOCK_PATTERN pattern;
1320  PCODE_BLOCK pCdBlk;
1321  DWORD i, j, patternSize, currentCb, previousOffset;
1322  DWORD cbCount, ripOffset;
1323  INTSTATUS status;
1324 
1325  if (NULL == Buffer)
1326  {
1328  }
1329 
1330  if (0 == MaxBufferSize)
1331  {
1333  }
1334 
1335  pCdBlk = NULL;
1336  pattern = NULL;
1337  patternSize = currentCb = previousOffset = 0;
1338  cbCount = PAGE_SIZE / sizeof(CODE_BLOCK);
1339  ripOffset = Rip & PAGE_OFFSET;
1340 
1342  if (NULL == pCdBlk)
1343  {
1345  }
1346 
1347  pattern = HpAllocWithTag(PAGE_SIZE, IC_TAG_CDBK);
1348  if (NULL == pattern)
1349  {
1351 
1353  }
1354 
1355  status = IntFragExtractCodePattern(Buffer,
1356  StartAddress & PAGE_OFFSET,
1357  MaxBufferSize,
1358  CsType,
1359  ExtractLevel,
1360  PAGE_SIZE / sizeof(CODE_BLOCK_PATTERN),
1361  pattern,
1362  &patternSize);
1363  if (!INT_SUCCESS(status))
1364  {
1365  if (status == INT_STATUS_DATA_BUFFER_TOO_SMALL)
1366  {
1367  WARNING("[WARNNING] Buffer too small to extract codeblocks (size %d): 0x%08x\n", MaxBufferSize, status);
1368  }
1369  else
1370  {
1371  ERROR("[ERROR] IntFragExtractCodePattern: 0x%08x\n", status);
1372  }
1373 
1374  goto leave;
1375  }
1376 
1377  if (patternSize < CODE_BLOCK_CHUNKS_COUNT)
1378  {
1379  WARNING("[WARNING] Could not extract enough code-blocks: %d\n", patternSize);
1381  goto leave;
1382  }
1383 
1384  for (i = 0; i < patternSize - CODE_BLOCK_CHUNKS_COUNT; i++)
1385  {
1386  if (cbLevelNormal == ExtractLevel &&
1387  (codeInsCall != pattern[i].Value &&
1388  codeInsJmp != pattern[i].Value))
1389  {
1390  continue;
1391  }
1392 
1393  if (cbLevelMedium == ExtractLevel &&
1394  (codeInsCall != pattern[i].Value &&
1395  codeInsJmp != pattern[i].Value &&
1396  codeInsMovMem != pattern[i].Value &&
1397  codeInsMovFsGs != pattern[i].Value))
1398  {
1399  continue;
1400  }
1401 
1402  pCdBlk[currentCb].PivotInstruction = pattern[i].Value;
1403  pCdBlk[currentCb].OffsetStart = pattern[i].Offset;
1404 
1405  // Extract from offset, CODE_BLOCK_CHUNKS_COUNT forward
1406  for (j = 0; j < CODE_BLOCK_CHUNKS_COUNT; j++)
1407  {
1408  pCdBlk[currentCb].Chunks[j] = pattern[i + j].Value;
1409  pCdBlk[currentCb].Size++;
1410  }
1411 
1412  // Exit if we reached our target
1413  if (++currentCb >= cbCount)
1414  {
1415  break;
1416  }
1417  }
1418 
1419  IntFragLogCodeBlocks(pCdBlk, currentCb, Rip, ripOffset, ReturnRip, 8);
1420 
1421 leave:
1423  HpFreeAndNullWithTag(&pattern, IC_TAG_CDBK);
1424 
1425  return status;
1426 }
#define __unlikely(x)
Definition: common.h:64
INTSTATUS IntFragExtractCodePattern(PBYTE Buffer, DWORD StartOffset, DWORD MaxBufferSize, IG_CS_TYPE CsType, CB_EXTRACT_LEVEL ExtractLevel, DWORD PatternSize, CODE_BLOCK_PATTERN *Pattern, DWORD *TotalExtracted)
Extract a pattern of code-blocks from the given code buffer.
Definition: codeblocks.c:990
_Bool BOOLEAN
Definition: intro_types.h:58
#define _Out_
Definition: intro_sal.h:22
DWORD IntFragHandleCommon(const BYTE *Buffer, size_t BufSize, IG_CS_TYPE CsType, CB_EXTRACT_LEVEL ExtractLevel, DWORD *Pattern)
Extract a pattern of instructions without using the disassembler.
Definition: codeblocks.c:37
A mov using a segment:offset.
Definition: codeblocks.h:37
uint8_t BYTE
Definition: intro_types.h:47
DWORD Crc32Compute(const void *Buffer, size_t Size, DWORD InitialCrc)
Computes the CRC for a byte array.
Definition: crc32.c:103
INTSTATUS IntFragExtractPattern(BYTE *Buffer, DWORD MaxBufferSize, IG_CS_TYPE CsType, CB_EXTRACT_LEVEL ExtractLevel, DWORD PatternSize, BYTE *Pattern, DWORD *TotalExtracted, DWORD *TotalParsed)
Extract a pattern of code-blocks from the given code buffer.
Definition: codeblocks.c:502
#define _In_
Definition: intro_sal.h:21
BYTE PivotInstruction
Definition: codeblocks.h:56
#define INT_STATUS_SUCCESS
Definition: introstatus.h:54
#define BIT(x)
Definition: common.h:68
Non-conditional jump, of any kind.
Definition: codeblocks.h:28
#define IC_TAG_CDBK
Code blocks.
Definition: memtags.h:31
DWORD OffsetStart
The start of the extracted codeblock (not actually relevant)
Definition: codeblocks.h:53
#define INT_SUCCESS(Status)
Definition: introstatus.h:42
WORD Size
Code block size, in patterns.
Definition: codeblocks.h:55
#define PAGE_OFFSET
Definition: pgtable.h:32
#define INT_STATUS_SIGNATURE_MATCHED
Definition: introstatus.h:401
#define _In_reads_(expr)
Definition: intro_sal.h:27
#define __pure
Definition: introtypes.h:46
Ret, of any kind.
Definition: codeblocks.h:30
DWORD Offset
The offset of the instruction in the page.
Definition: codeblocks.h:70
#define ERROR(fmt,...)
Definition: glue.h:62
#define CODE_BLOCK_CHUNKS_COUNT
Number of chunks (CODE_INS) per codeblock.
Definition: codeblocks.h:43
A mov using immediate value.
Definition: codeblocks.h:36
#define HpAllocWithTag(Len, Tag)
Definition: glue.h:516
int INTSTATUS
The status data type.
Definition: introstatus.h:24
#define INT_STATUS_NOT_FOUND
Definition: introstatus.h:284
CB_EXTRACT_LEVEL
Definition: codeblocks.h:14
#define _Out_writes_(expr)
Definition: intro_sal.h:28
Exchange instruction, including xchg, xadd, cmpxchg, cmpxchg8b/16b.
Definition: codeblocks.h:32
Bit manipulation instruction - bt, bts, btr, btc.
Definition: codeblocks.h:33
Push/Pop flags.
Definition: codeblocks.h:38
#define LOG(fmt,...)
Definition: glue.h:61
32-bit selector.
Definition: glueiface.h:187
BYTE Chunks[CODE_BLOCK_CHUNKS_COUNT]
The actual CODE_INS values representing the instruction pattern.
Definition: codeblocks.h:58
IG_CS_TYPE
The type of the code segment.
Definition: glueiface.h:183
static CHAR gCbLog[512]
Used to format log lines containing code-blocks.
Definition: codeblocks.c:33
#define IS_REX_PREFIX(b)
INTSTATUS IntFragDumpBlocks(PBYTE Buffer, QWORD StartAddress, DWORD MaxBufferSize, IG_CS_TYPE CsType, CB_EXTRACT_LEVEL ExtractLevel, QWORD Rip, BOOLEAN ReturnRip)
Dumps code-blocks that can then be used to generate an exception signature.
Definition: codeblocks.c:1293
#define _Inout_
Definition: intro_sal.h:20
TIMER_FRIENDLY void IntDumpInstruction(INSTRUX *Instruction, QWORD Rip)
This function dumps a given instruction (textual disassembly).
Definition: dumper.c:583
#define INITIAL_CRC_VALUE
Definition: introdefs.h:221
uint8_t * PBYTE
Definition: intro_types.h:47
Conditional jump, of any kind, including loop.
Definition: codeblocks.h:27
#define memzero(a, s)
Definition: introcrt.h:35
unsigned long long QWORD
Definition: intro_types.h:53
INTSTATUS IntFragExtractCodeBlocks(BYTE *Buffer, DWORD MaxBufferSize, IG_CS_TYPE CsType, CB_EXTRACT_LEVEL ExtractLevel, DWORD *HashesCount, DWORD *Hashes)
Extract a block of code-block hashes from the given code buffer.
Definition: codeblocks.c:746
#define TRUE
Definition: intro_types.h:30
INTSTATUS IntDecDecodeInstructionFromBuffer(PBYTE Buffer, size_t BufferSize, IG_CS_TYPE CsType, void *Instrux)
Decode an instruction from the provided buffer.
Definition: decoder.c:308
CODE_INS
Definition: codeblocks.h:24
#define HpFreeAndNullWithTag(Add, Tag)
Definition: glue.h:517
This includes instructions until codeInsBt.
Definition: codeblocks.h:16
Not really used, only to signal an error.
Definition: codeblocks.h:26
#define INT_STATUS_INVALID_PARAMETER_5
Definition: introstatus.h:74
#define WARNING(fmt,...)
Definition: glue.h:60
BYTE Value
The CODE_INS value describing the instruction type.
Definition: codeblocks.h:71
static void IntFragLogCodeBlocks(CODE_BLOCK *CodeBlock, DWORD Count, QWORD Rip, DWORD RipOffset, BOOLEAN ReturnRip, DWORD ElemLine)
Log a block of code-blocks.
Definition: codeblocks.c:1183
#define PAGE_SIZE
Definition: common.h:70
This includes instructions until codeInsFlags.
Definition: codeblocks.h:17
#define INT_STATUS_DATA_BUFFER_TOO_SMALL
Definition: introstatus.h:194
uint32_t DWORD
Definition: intro_types.h:49
DWORD Hashes[]
The list of hashes.
Definition: exceptions.h:369
#define INT_STATUS_INVALID_PARAMETER_6
Definition: introstatus.h:77
Some sort of string instruction - lods, stos, scas, movs.
Definition: codeblocks.h:31
BYTE Count
The number of hashes from the list.
Definition: exceptions.h:368
Call, of any kind.
Definition: codeblocks.h:29
__pure INTSTATUS IntFragMatchSignature(const DWORD *Hashes, DWORD CodeBlocksCount, const SIG_CODEBLOCKS *ExceptionSignature)
Match a block of code-block hashes against a list of code-block exception signatures.
Definition: codeblocks.c:912
struct _CODE_BLOCK CODE_BLOCK
#define INT_STATUS_INVALID_PARAMETER_8
Definition: introstatus.h:83
#define INT_STATUS_INVALID_PARAMETER_1
Definition: introstatus.h:62
64-bit selector.
Definition: glueiface.h:188
A mov involving only registers.
Definition: codeblocks.h:34
#define _Out_writes_to_(expr, expr2)
Definition: intro_sal.h:29
A mov involving memory (either as the destination or as the source).
Definition: codeblocks.h:35
char CHAR
Definition: intro_types.h:56
Describes a codeblocks signature.
Definition: exceptions.h:397
#define INT_STATUS_INVALID_PARAMETER_2
Definition: introstatus.h:65
Describe a codeblocks signature hash.
Definition: exceptions.h:366
void UtilQuickSort(void *Array, const DWORD NumberOfElements, const BYTE ElementSize)
Definition: utils.c:267
#define INT_STATUS_SIGNATURE_NOT_FOUND
Definition: introstatus.h:416
16-bit selector.
Definition: glueiface.h:186
#define INT_STATUS_INVALID_PARAMETER_7
Definition: introstatus.h:80
#define FALSE
Definition: intro_types.h:34
#define INT_STATUS_INSUFFICIENT_RESOURCES
Definition: introstatus.h:281
#define INT_STATUS_INVALID_PARAMETER_3
Definition: introstatus.h:68