Bitdefender Hypervisor Memory Introspection
hook_ptwh.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Bitdefender
3  * SPDX-License-Identifier: Apache-2.0
4  */
5 #include "hook_ptwh.h"
6 #include "decoder.h"
7 #include "gpacache.h"
8 #include "hook.h"
9 
10 
13  _In_ QWORD Address
14  )
28 {
29  INTSTATUS status;
30  QWORD entryAddr, entrySize, byteOffs, oldValue, newValue;
31  OPERAND_VALUE writtenValue = { 0 };
32  DWORD writeSize;
33  PIG_ARCH_REGS regs;
34  PINSTRUX instrux;
35 
36  if (gVcpu->PtEmuBuffer.Valid)
37  {
39  }
40 
42 
43  entrySize = (gGuest.Guest64 ? 8 : gGuest.PaeEnabled ? 8 : 4);
44 
45  entryAddr = Address & ~(entrySize - 1);
46 
47  byteOffs = Address & (entrySize - 1);
48 
49  instrux = &gVcpu->Instruction;
50  regs = &gVcpu->Regs;
51 
52  // Accessed size.
53  if (instrux->Operands[0].Size > 8)
54  {
55  ERROR("[ERROR] Unsupported access size: %d at RIP %llx, instruction '%s'!\n",
56  instrux->Operands[0].Size, regs->Rip, instrux->Mnemonic);
57 
58  status = INT_STATUS_NOT_SUPPORTED;
59  goto cleanup_and_exit;
60  }
61 
62  writeSize = instrux->Operands[0].Size;
63 
64  // Check for access that spills in the next entry.
65  if (byteOffs + writeSize > entrySize)
66  {
67  ERROR("[ERROR] Access at %llx spills in the next entry, size %d, instruction '%s'\n",
68  Address, writeSize, instrux->Mnemonic);
69 
70  status = INT_STATUS_NOT_SUPPORTED;
71  goto cleanup_and_exit;
72  }
73 
74  // Fetch the old PT value.
75  status = IntGpaCacheFetchAndAdd(gGuest.GpaCache, entryAddr, (DWORD)entrySize, (PBYTE)&oldValue);
76  if (!INT_SUCCESS(status))
77  {
78  ERROR("[ERROR] IntGpaCacheFetchAndAdd failed at GPA 0x%016llx: 0x%08x\n", entryAddr, status);
79  goto cleanup_and_exit;
80  }
81 
82  // Decode & emulate the PT write at the same time. This is very important:
83  // 1. A CMPXCHG instruction may have different behavior when we decode it versus when it will be emulated.
84  // For example, the MiStealPage tries to modify a PTE using a CMPXCHG instruction (with the PTE NOT having
85  // the A bit set). If the A bit is NOT set when we decode it, we will think the exchange will be made.
86  // However, if the A bit will be set later by another CPU, after we handle the write but before the
87  // instruction is emulated, we would end up protecting the wrong page.
88  // 2. If a swap out operation takes place and another CPU is emulating a write inside the page that is being
89  // swapped out, we may end up with integrity violations, because we will compute the hash on the old page,
90  // and in the time frame until we emulate the PT write, someone may modify the original page content.
91  status = IntDecEmulatePTWrite(&newValue);
92  if (!INT_SUCCESS(status))
93  {
94  // Fallback - use the bigger, slower decoder, which will not emulate the access.
95  PBYTE p = (PBYTE)&oldValue + byteOffs;
96  INTSTATUS status2;
97 
98  ERROR("[ERROR] IntDecEmulatePTWrite failed: 0x%08x\n", status);
99 
100  // Check for MOVNTI [rcx], *.
101  if (instrux->Instruction == ND_INS_MOVNTI && instrux->Operands[0].Info.Memory.Base == NDR_RCX)
102  {
104 
105  LOG("Dumping memory pointed by RCX:\n");
106  IntDumpGva(gVcpu->Regs.Rcx & PAGE_MASK, 0x1000, gVcpu->Regs.Cr3);
107 
108  LOG("Dumping memory pointed by RDX:\n");
109  IntDumpGva(gVcpu->Regs.Rdx & PAGE_MASK, 0x1000, gVcpu->Regs.Cr3);
110 
111  // Dump the PTS hooks state.
112  IntHookPtsDump();
113  }
114 
116 
117  status2 = IntDecGetWrittenValueFromInstruction(instrux, regs, p, &writtenValue);
118  if (!INT_SUCCESS(status2))
119  {
120  ERROR("[ERROR] IntDecEmulatePTWrite failed with 0x%08x, "
121  "IntDecGetWrittenValueFromInstruction failed with 0x%08x\n", status, status2);
122  goto cleanup_and_exit;
123  }
124 
125  newValue = writtenValue.Value.QwordValues[0];
126  writeSize = writtenValue.Size;
127  }
128  else
129  {
131  }
132 
134  gVcpu->PtEmuBuffer.Partial = entrySize > writeSize;
135  gVcpu->PtEmuBuffer.Old = oldValue;
136  gVcpu->PtEmuBuffer.New = newValue;
137 
138  status = INT_STATUS_SUCCESS;
139 
140 cleanup_and_exit:
142 
143  return status;
144 }
145 
146 
147 _Success_(return == INT_STATUS_SUCCESS)
148 INTSTATUS
150  _Inout_ PHOOK_PTEWS WriteState,
151  _In_ QWORD Address,
152  _In_ BYTE EntrySize,
153  _Out_ QWORD *OldValue,
154  _Out_ QWORD *NewValue
155  )
183 {
184  INTSTATUS status;
185  QWORD newValue, oldValue, byteOffs, bitMask, pteAddress;
186  BYTE size, byteMask;
187 
188  if (NULL == WriteState)
189  {
191  }
192 
193  if (NULL == OldValue)
194  {
196  }
197 
198  if (NULL == NewValue)
199  {
201  }
202 
203  pteAddress = Address & ~((QWORD)EntrySize - 1);
204 
205  // This exact PT entry write has already been emulated, and the new value is gVcpu->PtWriteCache.New. In this case,
206  // we want to see if the new calculated value is the same as the current, known, entry inside this write state.
207  // If they are the same, it means we are dealing with a PTE hook placed on an entry that was just written and
208  // emulated, so the hook was placed with the correct, updated memory value - there is no need to call the swap
209  // callbacks, as nothing actually changed.
210  if (gVcpu->PtWriteCache.Valid &&
211  gVcpu->PtWriteCache.PteAddress == pteAddress &&
212  gVcpu->PtWriteCache.Value == WriteState->CurEntry)
213  {
215  }
216 
218 
219  // We need to fetch the current value from the PT. We CAN'T use the intermediate value, because the A or D bits
220  // may have been set (and we don't get notifications on A/D bits modifications made by the CPU, because we would
221  // induce a high impact). Therefore, if the instruction that modifies the entry is a CMPXCHG, we would use a
222  // wrong comparand (without the A/D bits set) and we would think that the source operand is not written into the
223  // PTE, and therefore we would ignore the write. We could do a dirty hack and always assume that CMPXCHG writes
224  // the value in memory, but this is equally risky because the PTE may have changed in the meantime and the value
225  // may not be written, and we would basically have the opposite problem - we would think a write is being made,
226  // when in fact it wouldn't be.
227 
228  // Align the accessed address to the size of one entry & get the offset inside the entry.
229  byteOffs = Address & (EntrySize - 1);
230 
231  if (!gVcpu->PtEmuBuffer.Valid)
232  {
233  ERROR("[ERROR] Unhandled PT write!\n");
236  goto cleanup_and_exit;
237  }
238 
239  oldValue = gVcpu->PtEmuBuffer.Old;
240  newValue = gVcpu->PtEmuBuffer.New;
241 
242  // Accessed size.
243  size = (BYTE)gVcpu->AccessSize; // safe typecast: we know the size is <= 8.
244 
245  // XEN WORKAROUND: Check for duplicate writes.
246  if (!gGuest.Guest64)
247  {
248  PIG_ARCH_REGS regs = &gVcpu->Regs;
249  INSTRUX *instrux = &gVcpu->Instruction;
250 
251  // We don't allow 2 consecutive writes from the same RIP - these would indicate duplicate writes, which
252  // break our internal state. We do allow consecutive writes from the same RIP if the size is 8 bytes, though.
253  if (((regs->Rip & LAST_WRITE_RIP_MASK) == WriteState->LastWriteRip) && (0 == WriteState->LastWriteSize))
254  {
255  CHAR nd[ND_MIN_BUF_SIZE] = {0};
256  NdToText(instrux, regs->Rip, sizeof(nd), nd);
257 
258  LOG("[PTWH] Possible duplicate write from RIP %llx (last RIP: %x), size %d, (last size: %d), "
259  "entry %llx, cur %llx, int %llx, mask %x, instr: %s\n",
260  regs->Rip, WriteState->LastWriteRip, size, WriteState->LastWriteSize, Address,
261  WriteState->CurEntry, WriteState->IntEntry, WriteState->WrittenMask, nd);
262 
264  goto cleanup_and_exit;
265  }
266 
267  WriteState->LastWriteRip = (DWORD)(regs->Rip & LAST_WRITE_RIP_MASK);
268  WriteState->LastWriteSize = (size == 8) ? 1 : 0;
269  }
270 
271  byteMask = ((1UL << size) - 1) << byteOffs;
272 
273  bitMask = gByteMaskToBitMask[byteMask];
274 
275  // Update the old, original value, if this is the first chunk written.
276  if (0 == WriteState->WrittenMask)
277  {
278  WriteState->CurEntry = oldValue;
279  }
280 
281  // Update the written mask.
282  WriteState->WrittenMask |= byteMask;
283 
284  // Update our internal state.
285  WriteState->IntEntry = (WriteState->IntEntry & ~bitMask) | ((newValue << (byteOffs * 8)) & bitMask);
286 
287  if (WriteState->WrittenMask != ((1UL << EntrySize) - 1))
288  {
289  status = INT_STATUS_PARTIAL_WRITE;
290  goto cleanup_and_exit;
291  }
292 
293  // The entire entry has been written, flag this appropriately.
295 
296  *NewValue = WriteState->IntEntry;
297  *OldValue = WriteState->CurEntry;
298 
299  WriteState->CurEntry = WriteState->IntEntry;
300  WriteState->IntEntry = 0;
301 
302  WriteState->WrittenMask = 0;
303 
304  // Fill in the PT entry write cache. If a new hook is placed on this exact same PT entry, on this exact same exit,
305  // we will know not to call the swap in callback, as the memory value used when placing the hook will be the same
306  // as this new, written value.
308  gVcpu->PtWriteCache.PteAddress = pteAddress;
309  gVcpu->PtWriteCache.Value = *NewValue;
310 
311  status = INT_STATUS_SUCCESS;
312 
313 cleanup_and_exit:
314 
316 
317  return status;
318 }
TIMER_FRIENDLY void IntDumpArchRegs(IG_ARCH_REGS const *Registers)
This function dumps the register values in a user friendly format.
Definition: dumper.c:20
#define _Out_
Definition: intro_sal.h:22
INTSTATUS IntHookPtwProcessWrite(PHOOK_PTEWS WriteState, QWORD Address, BYTE EntrySize, QWORD *OldValue, QWORD *NewValue)
Processes a page-table write, returning the old and the new page-table entry value.
Definition: hook_ptwh.c:149
uint8_t BYTE
Definition: intro_types.h:47
IG_ARCH_REGS Regs
The current state of the guest registers.
Definition: guests.h:95
#define _In_
Definition: intro_sal.h:21
Measures page table writes emulation.
Definition: stats.h:45
#define INT_STATUS_SUCCESS
Definition: introstatus.h:54
Measures page table writes.
Definition: stats.h:44
#define STATS_EXIT(id)
Definition: stats.h:148
#define IntEnterDebugger()
Definition: introcore.h:373
#define _Success_(expr)
Definition: intro_sal.h:47
#define INT_SUCCESS(Status)
Definition: introstatus.h:42
#define INT_STATUS_NOT_NEEDED_HINT
Definition: introstatus.h:317
#define ERROR(fmt,...)
Definition: glue.h:62
int INTSTATUS
The status data type.
Definition: introstatus.h:24
BOOLEAN Partial
True if the write is partial and not the entire page table entry is modified.
Definition: guests.h:55
BOOLEAN Valid
Definition: guests.h:68
INSTRUX Instruction
The current instruction, pointed by the guest RIP.
Definition: guests.h:88
BOOLEAN Emulated
True if the access was already emulated; False if it was not emulated.
Definition: guests.h:54
INTSTATUS IntHookPtwEmulateWrite(QWORD Address)
Emulate a write that took place on page table entry at Address.
Definition: hook_ptwh.c:12
#define LOG(fmt,...)
Definition: glue.h:61
DWORD AccessSize
The size of the memory access. Valid only for EPT exits.
Definition: guests.h:103
#define _Inout_
Definition: intro_sal.h:20
Describes an operand value.
Definition: decoder.h:50
#define INT_STATUS_NOT_INITIALIZED
Definition: introstatus.h:266
QWORD QwordValues[ND_MAX_REGISTER_SIZE/8]
Definition: decoder.h:57
#define STATS_ENTER(id)
Definition: stats.h:141
uint8_t * PBYTE
Definition: intro_types.h:47
BOOLEAN PaeEnabled
True if Physical Address Extension is enabled.
Definition: guests.h:291
QWORD New
The new, to be written, value of the page table entry.
Definition: guests.h:52
INTSTATUS IntGpaCacheFetchAndAdd(PGPA_CACHE Cache, QWORD Gpa, DWORD Size, PBYTE Buffer)
Fetch data from a cached entry, or add it to the cache, of not already present.
Definition: gpacache.c:508
BOOLEAN Guest64
True if this is a 64-bit guest, False if it is a 32-bit guest.
Definition: guests.h:286
unsigned long long QWORD
Definition: intro_types.h:53
QWORD Old
The old, original, value of the written page table entry.
Definition: guests.h:51
PTWRITE_CACHE PtWriteCache
The last written PT entry.
Definition: guests.h:170
const QWORD gByteMaskToBitMask[256]
Converts a byte number to a mask having the bits in those bytes set.
Definition: introcore.c:73
void * GpaCache
The currently used GPA cache.
Definition: guests.h:399
#define TRUE
Definition: intro_types.h:30
#define INT_STATUS_INVALID_PARAMETER_4
Definition: introstatus.h:71
union _OPERAND_VALUE::@22 Value
The actual operand value.
#define INT_STATUS_INVALID_PARAMETER_5
Definition: introstatus.h:74
#define LAST_WRITE_RIP_MASK
We keep only the low 32 bits from the RIP.
Definition: hook_ptwh.h:11
#define INT_STATUS_ALREADY_INITIALIZED_HINT
Definition: introstatus.h:323
DWORD Size
The operand size.
Definition: decoder.h:60
INTSTATUS IntDecEmulatePTWrite(QWORD *NewValue)
Emulate a page-table write.
Definition: decoder.c:2720
QWORD Value
Definition: guests.h:67
uint32_t DWORD
Definition: intro_types.h:49
GUEST_STATE gGuest
The current guest state.
Definition: guests.c:48
QWORD PteAddress
Definition: guests.h:66
TIMER_FRIENDLY void IntDumpGva(QWORD Gva, DWORD Length, QWORD Cr3)
This function is a wrapper over IntDumpGvaEx (it uses RowLength = 16, ElementLength = 1...
Definition: dumper.c:249
#define INT_STATUS_PARTIAL_WRITE
Definition: introstatus.h:362
#define INT_STATUS_INVALID_PARAMETER_1
Definition: introstatus.h:62
#define INT_STATUS_NOT_SUPPORTED
Definition: introstatus.h:287
VCPU_STATE * gVcpu
The state of the current VCPU.
Definition: guests.c:57
INTSTATUS IntDecGetWrittenValueFromInstruction(PINSTRUX Instrux, PIG_ARCH_REGS Registers, PBYTE MemoryValue, OPERAND_VALUE *WrittenValue)
Decode a written value from a memory write instruction.
Definition: decoder.c:1861
Holds register state.
Definition: glueiface.h:30
BOOLEAN Valid
True if the information in this structure is valid; False it it is not.
Definition: guests.h:53
char CHAR
Definition: intro_types.h:56
PTEMU_BUFFER PtEmuBuffer
The page table write emulator buffer.
Definition: guests.h:169
#define PAGE_MASK
Definition: pgtable.h:35
void IntHookPtsDump(void)
Prints all the page table hooks.
Definition: hook_pts.c:2452
#define FALSE
Definition: intro_types.h:34