]> git.saurik.com Git - apple/xnu.git/blob - tests/hvtest_x86.m
xnu-7195.81.3.tar.gz
[apple/xnu.git] / tests / hvtest_x86.m
1 #include <darwintest.h>
2 #include <pthread.h>
3 #include <stdatomic.h>
4
5 #include <mach/mach.h>
6 #include <mach/vm_map.h>
7 #include <mach/vm_page_size.h>
8
9 #include <sys/sysctl.h>
10
11 #include "hvtest_x86_guest.h"
12
13 #include <Foundation/Foundation.h>
14 #include <Hypervisor/hv.h>
15 #include <Hypervisor/hv_vmx.h>
16
17 T_GLOBAL_META(
18 T_META_NAMESPACE("xnu.intel.hv"),
19 T_META_RUN_CONCURRENTLY(true),
20 T_META_REQUIRES_SYSCTL_NE("hw.optional.arm64", 1) // Don't run translated.
21 );
22
23 static bool
24 hv_support()
25 {
26 int hv_support;
27 size_t hv_support_size = sizeof(hv_support);
28
29 int err = sysctlbyname("kern.hv_support", &hv_support, &hv_support_size, NULL, 0);
30 if (err) {
31 return false;
32 } else {
33 return hv_support != 0;
34 }
35 }
36
37 static uint64_t get_reg(hv_vcpuid_t vcpu, hv_x86_reg_t reg)
38 {
39 uint64_t val;
40 T_QUIET; T_EXPECT_EQ(hv_vcpu_read_register(vcpu, reg, &val), HV_SUCCESS,
41 "get register");
42 return val;
43 }
44
45 static void set_reg(hv_vcpuid_t vcpu, hv_x86_reg_t reg, uint64_t value)
46 {
47 T_QUIET; T_EXPECT_EQ(hv_vcpu_write_register(vcpu, reg, value), HV_SUCCESS,
48 "set register");
49 }
50
51 static uint64_t get_vmcs(hv_vcpuid_t vcpu, uint32_t field)
52 {
53 uint64_t val;
54 T_QUIET; T_EXPECT_EQ(hv_vmx_vcpu_read_vmcs(vcpu, field, &val), HV_SUCCESS,
55 "get vmcs");
56 return val;
57 }
58
59 static void set_vmcs(hv_vcpuid_t vcpu, uint32_t field, uint64_t value)
60 {
61 T_QUIET; T_EXPECT_EQ(hv_vmx_vcpu_write_vmcs(vcpu, field, value), HV_SUCCESS,
62 "set vmcs");
63 }
64
65 static uint64_t get_cap(uint32_t field)
66 {
67 uint64_t val;
68 T_QUIET; T_ASSERT_EQ(hv_vmx_read_capability(field, &val), HV_SUCCESS,
69 "get capability");
70 return val;
71 }
72
73
74
75 static NSMutableDictionary *page_cache;
76 static NSMutableSet *allocated_phys_pages;
77 static pthread_mutex_t page_cache_lock = PTHREAD_MUTEX_INITIALIZER;
78
79 static uint64_t next_phys = 0x4000000;
80
81 /*
82 * Map a page into guest's physical address space, return gpa of the
83 * page. If *host_uva is NULL, a new host user page is allocated.
84 */
85 static hv_gpaddr_t
86 map_guest_phys(void **host_uva)
87 {
88 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&page_cache_lock),
89 "acquire page lock");
90
91 hv_gpaddr_t gpa = next_phys;
92 next_phys += vm_page_size;
93
94 if (*host_uva == NULL) {
95 *host_uva = valloc(vm_page_size);
96 memset(*host_uva, 0, vm_page_size);
97 [allocated_phys_pages addObject:@((uintptr_t)*host_uva)];
98 }
99
100 T_QUIET; T_ASSERT_EQ(hv_vm_map(*host_uva, gpa, vm_page_size, HV_MEMORY_READ), HV_SUCCESS, "enter hv mapping");
101
102 [page_cache setObject:@((uintptr_t)*host_uva) forKey:@(gpa)];
103
104
105 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&page_cache_lock),
106 "release page lock");
107
108 return gpa;
109 }
110
111 static uint64_t *pml4;
112 static hv_gpaddr_t pml4_gpa;
113
114 /* Stolen from kern/bits.h, which cannot be included outside the kernel. */
115 #define BIT(b) (1ULL << (b))
116
117 #define mask(width) (width >= 64 ? (unsigned long long)-1 : (BIT(width) - 1))
118 #define extract(x, shift, width) ((((uint64_t)(x)) >> (shift)) & mask(width))
119 #define bits(x, hi, lo) extract((x), (lo), (hi) - (lo) + 1)
120
121
122 /*
123 * Enter a page in a level of long mode's PML4 paging structures.
124 * Helper for fault_in_page.
125 */
126 static void *
127 enter_level(uint64_t *table, void *host_va, void *va, int hi, int lo) {
128 uint64_t * const te = &table[bits(va, hi, lo)];
129
130 const uint64_t present = 1;
131 const uint64_t rw = 2;
132
133 const uint64_t addr_mask = mask(47-12) << 12;
134
135 if (!(*te & present)) {
136 hv_gpaddr_t gpa = map_guest_phys(&host_va);
137 *te = (gpa & addr_mask) | rw | present;
138 } else {
139 NSNumber *num = [page_cache objectForKey:@(*te & addr_mask)];
140 T_QUIET; T_ASSERT_NOTNULL(num, "existing page is backed");
141 void *backing = (void*)[num unsignedLongValue];
142 if (host_va != 0) {
143 T_QUIET; T_ASSERT_EQ(va, backing, "backing page matches");
144 } else {
145 host_va = backing;
146 }
147 }
148
149 return host_va;
150 }
151
152 /*
153 * Enters a page both into the guest paging structures and the EPT
154 * (long mode PML4 only, real mode and protected mode support running
155 * without paging, and that's what they use instead.)
156 */
157 static void *
158 map_page(void *host_va, void *va) {
159 uint64_t *pdpt = enter_level(pml4, NULL, va, 47, 39);
160 uint64_t *pd = enter_level(pdpt, NULL, va, 38, 30);
161 uint64_t *pt = enter_level(pd, NULL, va, 29, 21);
162 return enter_level(pt, host_va, va, 20, 12);
163 }
164
165 static void
166 fault_in_page(void *va) {
167 map_page(va, va);
168 }
169
170 static void free_page_cache(void)
171 {
172 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&page_cache_lock),
173 "acquire page lock");
174
175 for (NSNumber *uvaNumber in allocated_phys_pages) {
176 uintptr_t va = [uvaNumber unsignedLongValue];
177 free((void *)va);
178 }
179 [page_cache release];
180 [allocated_phys_pages release];
181
182 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&page_cache_lock),
183 "release page lock");
184 }
185
186 static uint64_t
187 run_to_next_vm_fault(hv_vcpuid_t vcpu, bool on_demand_paging)
188 {
189 bool retry;
190 uint64_t exit_reason, qual, gpa, gla, info, vector_info, error_code;
191 do {
192 retry = false;
193 do {
194 T_QUIET; T_ASSERT_EQ(hv_vcpu_run_until(vcpu, ~(uint64_t)0), HV_SUCCESS, "run VCPU");
195 exit_reason = get_vmcs(vcpu, VMCS_RO_EXIT_REASON);
196
197 } while (exit_reason == VMX_REASON_IRQ);
198
199 qual = get_vmcs(vcpu, VMCS_RO_EXIT_QUALIFIC);
200 gpa = get_vmcs(vcpu, VMCS_GUEST_PHYSICAL_ADDRESS);
201 gla = get_vmcs(vcpu, VMCS_RO_GUEST_LIN_ADDR);
202 info = get_vmcs(vcpu, VMCS_RO_VMEXIT_IRQ_INFO);
203 vector_info = get_vmcs(vcpu, VMCS_RO_IDT_VECTOR_INFO);
204 error_code = get_vmcs(vcpu, VMCS_RO_VMEXIT_IRQ_ERROR);
205
206 if (on_demand_paging) {
207 if (exit_reason == VMX_REASON_EXC_NMI &&
208 (info & 0x800003ff) == 0x8000030e &&
209 (error_code & 0x1) == 0) {
210 // guest paging fault
211 fault_in_page((void*)qual);
212 retry = true;
213 }
214 else if (exit_reason == VMX_REASON_EPT_VIOLATION) {
215 if ((qual & 0x86) == 0x82) {
216 // EPT write fault
217 T_QUIET; T_ASSERT_EQ(hv_vm_protect(gpa & ~(hv_gpaddr_t)PAGE_MASK, vm_page_size,
218 HV_MEMORY_READ | HV_MEMORY_WRITE),
219 HV_SUCCESS, "make page writable");
220 retry = true;
221 }
222 else if ((qual & 0x86) == 0x84) {
223 // EPT exec fault
224 T_QUIET; T_ASSERT_EQ(hv_vm_protect(gpa & ~(hv_gpaddr_t)PAGE_MASK, vm_page_size,
225 HV_MEMORY_READ | HV_MEMORY_EXEC),
226 HV_SUCCESS, "make page executable");
227 retry = true;
228 }
229 }
230 }
231 } while (retry);
232
233 // printf("reason: %lld, qualification: %llx\n", exit_reason, qual);
234 // printf("gpa: %llx, gla: %llx\n", gpa, gla);
235 // printf("RIP: %llx\n", get_reg(vcpu, HV_X86_RIP));
236 // printf("CR3: %llx\n", get_reg(vcpu, HV_X86_CR3));
237 // printf("info: %llx\n", info);
238 // printf("vector_info: %llx\n", vector_info);
239 // printf("error_code: %llx\n", error_code);
240
241 return exit_reason;
242 }
243
244 static uint64_t
245 expect_vmcall(hv_vcpuid_t vcpu, bool on_demand_paging)
246 {
247 uint64_t reason = run_to_next_vm_fault(vcpu, on_demand_paging);
248 T_ASSERT_EQ(reason, (uint64_t)VMX_REASON_VMCALL, "expect vmcall exit");
249
250 // advance RIP to after VMCALL
251 set_vmcs(vcpu, VMCS_GUEST_RIP, get_reg(vcpu, HV_X86_RIP)+get_vmcs(vcpu, VMCS_RO_VMEXIT_INSTR_LEN));
252
253 return get_reg(vcpu, HV_X86_RAX);
254 }
255
256 static uint64_t
257 expect_vmcall_with_value(hv_vcpuid_t vcpu, uint64_t rax, bool on_demand_paging)
258 {
259 uint64_t reason = run_to_next_vm_fault(vcpu, on_demand_paging);
260 T_QUIET; T_ASSERT_EQ(reason, (uint64_t)VMX_REASON_VMCALL, "check for vmcall exit");
261 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RAX), rax, "vmcall exit with expected RAX value %llx", rax);
262
263 // advance RIP to after VMCALL
264 set_vmcs(vcpu, VMCS_GUEST_RIP, get_reg(vcpu, HV_X86_RIP)+get_vmcs(vcpu, VMCS_RO_VMEXIT_INSTR_LEN));
265
266 return reason;
267 }
268
269 typedef void (*vcpu_entry_function)(uint64_t);
270 typedef void *(*vcpu_monitor_function)(void *, hv_vcpuid_t);
271
272 struct test_vcpu {
273 hv_vcpuid_t vcpu;
274 vcpu_entry_function guest_func;
275 uint64_t guest_param;
276 vcpu_monitor_function monitor_func;
277 void *monitor_param;
278 };
279
280 static uint64_t
281 canonicalize(uint64_t ctrl, uint64_t mask)
282 {
283 return (ctrl | (mask & 0xffffffff)) & (mask >> 32);
284 }
285
286 static void
287 setup_real_mode(hv_vcpuid_t vcpu)
288 {
289 uint64_t pin_cap, proc_cap, proc2_cap, entry_cap, exit_cap;
290
291 pin_cap = get_cap(HV_VMX_CAP_PINBASED);
292 proc_cap = get_cap(HV_VMX_CAP_PROCBASED);
293 proc2_cap = get_cap(HV_VMX_CAP_PROCBASED2);
294 entry_cap = get_cap(HV_VMX_CAP_ENTRY);
295 exit_cap = get_cap(HV_VMX_CAP_EXIT);
296
297 set_vmcs(vcpu, VMCS_CTRL_PIN_BASED, canonicalize(0, pin_cap));
298 set_vmcs(vcpu, VMCS_CTRL_CPU_BASED,
299 canonicalize(CPU_BASED_HLT | CPU_BASED_CR8_LOAD | CPU_BASED_CR8_STORE, proc_cap));
300 set_vmcs(vcpu, VMCS_CTRL_CPU_BASED2, canonicalize(0, proc2_cap));
301 set_vmcs(vcpu, VMCS_CTRL_VMENTRY_CONTROLS, canonicalize(0, entry_cap));
302 set_vmcs(vcpu, VMCS_CTRL_VMEXIT_CONTROLS, canonicalize(0, exit_cap));
303
304 set_vmcs(vcpu, VMCS_GUEST_CR0, 0x20);
305 set_vmcs(vcpu, VMCS_CTRL_CR0_MASK, ~0u);
306 set_vmcs(vcpu, VMCS_CTRL_CR0_SHADOW, 0x20);
307 set_vmcs(vcpu, VMCS_GUEST_CR4, 0x2000);
308 set_vmcs(vcpu, VMCS_CTRL_CR4_MASK, ~0u);
309 set_vmcs(vcpu, VMCS_CTRL_CR4_SHADOW, 0x0000);
310 set_vmcs(vcpu, VMCS_GUEST_TR_AR, 0x83);
311 set_vmcs(vcpu, VMCS_GUEST_LDTR_AR, 0x10000);
312 set_vmcs(vcpu, VMCS_GUEST_SS, 0);
313 set_vmcs(vcpu, VMCS_GUEST_SS_BASE, 0);
314 set_vmcs(vcpu, VMCS_GUEST_SS_LIMIT, 0xffff);
315 set_vmcs(vcpu, VMCS_GUEST_SS_AR, 0x93);
316 set_vmcs(vcpu, VMCS_GUEST_CS, 0);
317 set_vmcs(vcpu, VMCS_GUEST_CS_BASE, 0);
318 set_vmcs(vcpu, VMCS_GUEST_CS_LIMIT, 0xffff);
319 set_vmcs(vcpu, VMCS_GUEST_CS_AR, 0x9b);
320 set_vmcs(vcpu, VMCS_GUEST_DS, 0);
321 set_vmcs(vcpu, VMCS_GUEST_DS_BASE, 0);
322 set_vmcs(vcpu, VMCS_GUEST_DS_LIMIT, 0xffff);
323 set_vmcs(vcpu, VMCS_GUEST_DS_AR, 0x93);
324 set_vmcs(vcpu, VMCS_GUEST_ES, 0);
325 set_vmcs(vcpu, VMCS_GUEST_ES_BASE, 0);
326 set_vmcs(vcpu, VMCS_GUEST_ES_LIMIT, 0xffff);
327 set_vmcs(vcpu, VMCS_GUEST_ES_AR, 0x93);
328 set_vmcs(vcpu, VMCS_GUEST_FS, 0);
329 set_vmcs(vcpu, VMCS_GUEST_FS_BASE, 0);
330 set_vmcs(vcpu, VMCS_GUEST_FS_LIMIT, 0xffff);
331 set_vmcs(vcpu, VMCS_GUEST_FS_AR, 0x93);
332 set_vmcs(vcpu, VMCS_GUEST_GS, 0);
333 set_vmcs(vcpu, VMCS_GUEST_GS_BASE, 0);
334 set_vmcs(vcpu, VMCS_GUEST_GS_LIMIT, 0xffff);
335 set_vmcs(vcpu, VMCS_GUEST_GS_AR, 0x93);
336
337 set_vmcs(vcpu, VMCS_GUEST_GDTR_BASE, 0);
338 set_vmcs(vcpu, VMCS_GUEST_GDTR_LIMIT, 0);
339 set_vmcs(vcpu, VMCS_GUEST_IDTR_BASE, 0);
340 set_vmcs(vcpu, VMCS_GUEST_IDTR_LIMIT, 0);
341
342 set_vmcs(vcpu, VMCS_GUEST_RFLAGS, 0x2);
343
344 set_vmcs(vcpu, VMCS_CTRL_EXC_BITMAP, 0xffffffff);
345 }
346
347 static void
348 setup_protected_mode(hv_vcpuid_t vcpu)
349 {
350 uint64_t pin_cap, proc_cap, proc2_cap, entry_cap, exit_cap;
351
352 pin_cap = get_cap(HV_VMX_CAP_PINBASED);
353 proc_cap = get_cap(HV_VMX_CAP_PROCBASED);
354 proc2_cap = get_cap(HV_VMX_CAP_PROCBASED2);
355 entry_cap = get_cap(HV_VMX_CAP_ENTRY);
356 exit_cap = get_cap(HV_VMX_CAP_EXIT);
357
358 set_vmcs(vcpu, VMCS_CTRL_PIN_BASED, canonicalize(0, pin_cap));
359 set_vmcs(vcpu, VMCS_CTRL_CPU_BASED,
360 canonicalize(CPU_BASED_HLT | CPU_BASED_CR8_LOAD | CPU_BASED_CR8_STORE, proc_cap));
361 set_vmcs(vcpu, VMCS_CTRL_CPU_BASED2, canonicalize(0, proc2_cap));
362 set_vmcs(vcpu, VMCS_CTRL_VMENTRY_CONTROLS, canonicalize(0, entry_cap));
363 set_vmcs(vcpu, VMCS_CTRL_VMEXIT_CONTROLS, canonicalize(0, exit_cap));
364
365 set_vmcs(vcpu, VMCS_GUEST_CR0, 0x21);
366 set_vmcs(vcpu, VMCS_CTRL_CR0_MASK, ~0u);
367 set_vmcs(vcpu, VMCS_CTRL_CR0_SHADOW, 0x21);
368 set_vmcs(vcpu, VMCS_GUEST_CR3, 0);
369 set_vmcs(vcpu, VMCS_GUEST_CR4, 0x2000);
370 set_vmcs(vcpu, VMCS_CTRL_CR4_MASK, ~0u);
371 set_vmcs(vcpu, VMCS_CTRL_CR4_SHADOW, 0x0000);
372
373 set_vmcs(vcpu, VMCS_GUEST_TR, 0);
374 set_vmcs(vcpu, VMCS_GUEST_TR_AR, 0x8b);
375
376 set_vmcs(vcpu, VMCS_GUEST_LDTR, 0x0);
377 set_vmcs(vcpu, VMCS_GUEST_LDTR_AR, 0x10000);
378
379 set_vmcs(vcpu, VMCS_GUEST_SS, 0x8);
380 set_vmcs(vcpu, VMCS_GUEST_SS_BASE, 0);
381 set_vmcs(vcpu, VMCS_GUEST_SS_LIMIT, 0xffffffff);
382 set_vmcs(vcpu, VMCS_GUEST_SS_AR, 0xc093);
383
384 set_vmcs(vcpu, VMCS_GUEST_CS, 0x10);
385 set_vmcs(vcpu, VMCS_GUEST_CS_BASE, 0);
386 set_vmcs(vcpu, VMCS_GUEST_CS_LIMIT, 0xffffffff);
387 set_vmcs(vcpu, VMCS_GUEST_CS_AR, 0xc09b);
388
389 set_vmcs(vcpu, VMCS_GUEST_DS, 0x8);
390 set_vmcs(vcpu, VMCS_GUEST_DS_BASE, 0);
391 set_vmcs(vcpu, VMCS_GUEST_DS_LIMIT, 0xffffffff);
392 set_vmcs(vcpu, VMCS_GUEST_DS_AR, 0xc093);
393
394 set_vmcs(vcpu, VMCS_GUEST_ES, 0x8);
395 set_vmcs(vcpu, VMCS_GUEST_ES_BASE, 0);
396 set_vmcs(vcpu, VMCS_GUEST_ES_LIMIT, 0xffffffff);
397 set_vmcs(vcpu, VMCS_GUEST_ES_AR, 0xc093);
398
399 set_vmcs(vcpu, VMCS_GUEST_FS, 0x8);
400 set_vmcs(vcpu, VMCS_GUEST_FS_BASE, 0);
401 set_vmcs(vcpu, VMCS_GUEST_FS_LIMIT, 0xffffffff);
402 set_vmcs(vcpu, VMCS_GUEST_FS_AR, 0xc093);
403
404 set_vmcs(vcpu, VMCS_GUEST_GS, 0x8);
405 set_vmcs(vcpu, VMCS_GUEST_GS_BASE, 0);
406 set_vmcs(vcpu, VMCS_GUEST_GS_LIMIT, 0xffffffff);
407 set_vmcs(vcpu, VMCS_GUEST_GS_AR, 0xc093);
408
409 set_vmcs(vcpu, VMCS_GUEST_GDTR_BASE, 0);
410 set_vmcs(vcpu, VMCS_GUEST_GDTR_LIMIT, 0);
411
412 set_vmcs(vcpu, VMCS_GUEST_IDTR_BASE, 0);
413 set_vmcs(vcpu, VMCS_GUEST_IDTR_LIMIT, 0);
414
415 set_vmcs(vcpu, VMCS_GUEST_RFLAGS, 0x2);
416
417 set_vmcs(vcpu, VMCS_CTRL_EXC_BITMAP, 0xffffffff);
418 }
419
420 static void
421 setup_long_mode(hv_vcpuid_t vcpu)
422 {
423 uint64_t pin_cap, proc_cap, proc2_cap, entry_cap, exit_cap;
424
425 pin_cap = get_cap(HV_VMX_CAP_PINBASED);
426 proc_cap = get_cap(HV_VMX_CAP_PROCBASED);
427 proc2_cap = get_cap(HV_VMX_CAP_PROCBASED2);
428 entry_cap = get_cap(HV_VMX_CAP_ENTRY);
429 exit_cap = get_cap(HV_VMX_CAP_EXIT);
430
431 set_vmcs(vcpu, VMCS_CTRL_PIN_BASED, canonicalize(0, pin_cap));
432 set_vmcs(vcpu, VMCS_CTRL_CPU_BASED,
433 canonicalize(CPU_BASED_HLT | CPU_BASED_CR8_LOAD | CPU_BASED_CR8_STORE, proc_cap));
434 set_vmcs(vcpu, VMCS_CTRL_CPU_BASED2, canonicalize(0, proc2_cap));
435 set_vmcs(vcpu, VMCS_CTRL_VMENTRY_CONTROLS, canonicalize(VMENTRY_GUEST_IA32E, entry_cap));
436 set_vmcs(vcpu, VMCS_CTRL_VMEXIT_CONTROLS, canonicalize(0, exit_cap));
437
438 set_vmcs(vcpu, VMCS_GUEST_CR0, 0x80000021L);
439 set_vmcs(vcpu, VMCS_CTRL_CR0_MASK, ~0u);
440 set_vmcs(vcpu, VMCS_CTRL_CR0_SHADOW, 0x80000021L);
441 set_vmcs(vcpu, VMCS_GUEST_CR4, 0x2020);
442 set_vmcs(vcpu, VMCS_CTRL_CR4_MASK, ~0u);
443 set_vmcs(vcpu, VMCS_CTRL_CR4_SHADOW, 0x2020);
444
445 set_vmcs(vcpu, VMCS_GUEST_IA32_EFER, 0x500);
446
447 T_QUIET; T_ASSERT_EQ(hv_vcpu_enable_native_msr(vcpu, MSR_IA32_KERNEL_GS_BASE, true), HV_SUCCESS, "enable native GS_BASE");
448
449 set_vmcs(vcpu, VMCS_GUEST_TR, 0);
450 set_vmcs(vcpu, VMCS_GUEST_TR_AR, 0x8b);
451
452 set_vmcs(vcpu, VMCS_GUEST_LDTR, 0x0);
453 set_vmcs(vcpu, VMCS_GUEST_LDTR_AR, 0x10000);
454
455 set_vmcs(vcpu, VMCS_GUEST_SS, 0x8);
456 set_vmcs(vcpu, VMCS_GUEST_SS_BASE, 0);
457 set_vmcs(vcpu, VMCS_GUEST_SS_LIMIT, 0xffffffff);
458 set_vmcs(vcpu, VMCS_GUEST_SS_AR, 0xa093);
459
460 set_vmcs(vcpu, VMCS_GUEST_CS, 0x10);
461 set_vmcs(vcpu, VMCS_GUEST_CS_BASE, 0);
462 set_vmcs(vcpu, VMCS_GUEST_CS_LIMIT, 0xffffffff);
463 set_vmcs(vcpu, VMCS_GUEST_CS_AR, 0xa09b);
464
465 set_vmcs(vcpu, VMCS_GUEST_DS, 0x8);
466 set_vmcs(vcpu, VMCS_GUEST_DS_BASE, 0);
467 set_vmcs(vcpu, VMCS_GUEST_DS_LIMIT, 0xffffffff);
468 set_vmcs(vcpu, VMCS_GUEST_DS_AR, 0xa093);
469
470 set_vmcs(vcpu, VMCS_GUEST_ES, 0x8);
471 set_vmcs(vcpu, VMCS_GUEST_ES_BASE, 0);
472 set_vmcs(vcpu, VMCS_GUEST_ES_LIMIT, 0xffffffff);
473 set_vmcs(vcpu, VMCS_GUEST_ES_AR, 0xa093);
474
475 set_vmcs(vcpu, VMCS_GUEST_FS, 0x8);
476 set_vmcs(vcpu, VMCS_GUEST_FS_BASE, 0);
477 set_vmcs(vcpu, VMCS_GUEST_FS_LIMIT, 0xffffffff);
478 set_vmcs(vcpu, VMCS_GUEST_FS_AR, 0xa093);
479
480 set_vmcs(vcpu, VMCS_GUEST_GS, 0x8);
481 set_vmcs(vcpu, VMCS_GUEST_GS_BASE, 0);
482 set_vmcs(vcpu, VMCS_GUEST_GS_LIMIT, 0xffffffff);
483 set_vmcs(vcpu, VMCS_GUEST_GS_AR, 0xa093);
484
485 set_vmcs(vcpu, VMCS_GUEST_RFLAGS, 0x2);
486
487 set_vmcs(vcpu, VMCS_CTRL_EXC_BITMAP, 0xffffffff);
488
489 set_vmcs(vcpu, VMCS_GUEST_CR3, pml4_gpa);
490
491 set_vmcs(vcpu, VMCS_GUEST_GDTR_BASE, 0);
492 set_vmcs(vcpu, VMCS_GUEST_GDTR_LIMIT, 0);
493
494 set_vmcs(vcpu, VMCS_GUEST_IDTR_BASE, 0);
495 set_vmcs(vcpu, VMCS_GUEST_IDTR_LIMIT, 0);
496 }
497
498 static void *
499 wrap_monitor(void *param)
500 {
501 struct test_vcpu *test = (struct test_vcpu *)param;
502
503 T_QUIET; T_ASSERT_EQ(hv_vcpu_create(&test->vcpu, HV_VCPU_DEFAULT), HV_SUCCESS,
504 "created vcpu");
505
506 const size_t stack_size = 0x4000;
507 void *stack_bottom = valloc(stack_size);
508 T_QUIET; T_ASSERT_NOTNULL(stack_bottom, "allocate VCPU stack");
509 vcpu_entry_function entry = test->guest_func;
510
511 set_vmcs(test->vcpu, VMCS_GUEST_RIP, (uintptr_t)entry);
512 set_vmcs(test->vcpu, VMCS_GUEST_RSP, (uintptr_t)stack_bottom + stack_size);
513 set_reg(test->vcpu, HV_X86_RDI, test->guest_param);
514
515 void *result = test->monitor_func(test->monitor_param, test->vcpu);
516
517 T_QUIET; T_ASSERT_EQ(hv_vcpu_destroy(test->vcpu), HV_SUCCESS, "Destroyed vcpu");
518 free(stack_bottom);
519 free(test);
520 return result;
521 }
522
523 static pthread_t
524 create_vcpu_thread(
525 vcpu_entry_function guest_function, uint64_t guest_param,
526 vcpu_monitor_function monitor_func, void *monitor_param)
527 {
528
529 pthread_t thread;
530 struct test_vcpu *test = malloc(sizeof(*test));
531 T_QUIET; T_ASSERT_NOTNULL(test, "malloc test params");
532 test->guest_func = guest_function;
533 test->guest_param = guest_param;
534 test->monitor_func = monitor_func;
535 test->monitor_param = monitor_param;
536 T_ASSERT_POSIX_SUCCESS(pthread_create(&thread, NULL, wrap_monitor, test),
537 "create vcpu pthread");
538 // ownership of test struct moves to the thread
539 test = NULL;
540
541 return thread;
542 }
543
544 static void
545 vm_setup()
546 {
547 T_SETUPBEGIN;
548
549 if (hv_support() < 1) {
550 T_SKIP("Running on non-HV target, skipping...");
551 return;
552 }
553
554 page_cache = [[NSMutableDictionary alloc] init];
555 allocated_phys_pages = [[NSMutableSet alloc] init];
556
557 T_ASSERT_EQ(hv_vm_create(HV_VM_DEFAULT), HV_SUCCESS, "Created vm");
558
559
560 // Set up root paging structures for long mode,
561 // where paging is mandatory.
562
563 pml4_gpa = map_guest_phys((void**)&pml4);
564 memset(pml4, 0, vm_page_size);
565
566 T_SETUPEND;
567 }
568
569 static void
570 vm_cleanup()
571 {
572 T_ASSERT_EQ(hv_vm_destroy(), HV_SUCCESS, "Destroyed vm");
573 free_page_cache();
574 }
575
576 static pthread_cond_t ready_cond = PTHREAD_COND_INITIALIZER;
577 static pthread_mutex_t vcpus_ready_lock = PTHREAD_MUTEX_INITIALIZER;
578 static uint32_t vcpus_initializing;
579 static pthread_mutex_t vcpus_hang_lock = PTHREAD_MUTEX_INITIALIZER;
580
581 static void *
582 multikill_vcpu_thread_function(void __unused *arg)
583 {
584 hv_vcpuid_t *vcpu = (hv_vcpuid_t*)arg;
585
586 T_QUIET; T_ASSERT_EQ(hv_vcpu_create(vcpu, HV_VCPU_DEFAULT), HV_SUCCESS,
587 "created vcpu");
588
589 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&vcpus_ready_lock),
590 "acquire vcpus_ready_lock");
591 T_QUIET; T_ASSERT_NE(vcpus_initializing, 0, "check for vcpus_ready underflow");
592 vcpus_initializing--;
593 if (vcpus_initializing == 0) {
594 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_cond_signal(&ready_cond),
595 "signaling all VCPUs ready");
596 }
597 T_QUIET; T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&vcpus_ready_lock),
598 "release vcpus_ready_lock");
599
600 // To cause the VCPU pointer to be cleared from the wrong thread, we need
601 // to get threads onto the thread deallocate queue. One way to accomplish
602 // this is to die while waiting for a lock.
603 T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&vcpus_hang_lock),
604 "acquire vcpus_hang_lock");
605
606 // Do not allow the thread to terminate. Exactly one thread will acquire
607 // the above lock successfully.
608 while (true) {
609 pause();
610 }
611
612 return NULL;
613 }
614
615 T_DECL(regression_55524541,
616 "kill task with multiple VCPU threads waiting for lock")
617 {
618 if (!hv_support()) {
619 T_SKIP("no HV support");
620 }
621
622 int pipedesc[2];
623 T_ASSERT_POSIX_SUCCESS(pipe(pipedesc), "create pipe");
624
625 pid_t child = fork();
626 if (child == 0) {
627 const uint32_t vcpu_count = 8;
628 pthread_t vcpu_threads[8];
629 T_ASSERT_EQ(hv_vm_create(HV_VM_DEFAULT), HV_SUCCESS, "created vm");
630 vcpus_initializing = vcpu_count;
631 for (uint32_t i = 0; i < vcpu_count; i++) {
632 hv_vcpuid_t vcpu;
633
634 T_ASSERT_POSIX_SUCCESS(pthread_create(&vcpu_threads[i], NULL,
635 multikill_vcpu_thread_function, (void *)&vcpu),
636 "create vcpu_threads[%u]", i);
637 }
638
639 T_ASSERT_POSIX_SUCCESS(pthread_mutex_lock(&vcpus_ready_lock),
640 "acquire vcpus_ready_lock");
641 while (vcpus_initializing != 0) {
642 T_ASSERT_POSIX_SUCCESS(pthread_cond_wait(&ready_cond,
643 &vcpus_ready_lock), "wait for all threads ready");
644 }
645 T_ASSERT_POSIX_SUCCESS(pthread_mutex_unlock(&vcpus_ready_lock),
646 "release vcpus_ready_lock");
647
648 // Indicate readiness to die, meditiate peacefully.
649 uint8_t byte = 0;
650 T_ASSERT_EQ_LONG(write(pipedesc[1], &byte, 1), 1L, "notifying on pipe");
651 while (true) {
652 pause();
653 }
654 } else {
655 T_ASSERT_GT(child, 0, "successful fork");
656 // Wait for child to prepare.
657 uint8_t byte;
658 T_ASSERT_EQ_LONG(read(pipedesc[0], &byte, 1), 1L, "waiting on pipe");
659 T_ASSERT_POSIX_SUCCESS(kill(child, SIGTERM), "kill child");
660 // Hope for no panic...
661 T_ASSERT_POSIX_SUCCESS(wait(NULL), "reap child");
662 }
663 T_ASSERT_POSIX_SUCCESS(close(pipedesc[0]), "close pipedesc[0]");
664 T_ASSERT_POSIX_SUCCESS(close(pipedesc[1]), "close pipedesc[1]");
665 }
666
667 static void *
668 simple_long_mode_monitor(void *arg __unused, hv_vcpuid_t vcpu)
669 {
670 setup_long_mode(vcpu);
671
672 expect_vmcall_with_value(vcpu, 0x33456, true);
673
674 return NULL;
675 }
676
677 T_DECL(simple_long_mode_guest, "simple long mode guest")
678 {
679 vm_setup();
680
681 pthread_t vcpu_thread = create_vcpu_thread(simple_long_mode_vcpu_entry, 0x10000, simple_long_mode_monitor, 0);
682 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
683
684 vm_cleanup();
685 }
686
687 static void *
688 smp_test_monitor(void *arg __unused, hv_vcpuid_t vcpu)
689 {
690 setup_long_mode(vcpu);
691
692 uint64_t value = expect_vmcall(vcpu, true);
693 return (void *)(uintptr_t)value;
694 }
695
696 T_DECL(smp_sanity, "Multiple VCPUs in the same VM")
697 {
698 vm_setup();
699
700 // Use this region as shared memory between the VCPUs.
701 void *shared = NULL;
702 map_guest_phys((void**)&shared);
703
704 atomic_uint *count_word = (atomic_uint *)shared;
705 atomic_init(count_word, 0);
706
707 pthread_t vcpu1_thread = create_vcpu_thread(smp_vcpu_entry,
708 (uintptr_t)count_word, smp_test_monitor, count_word);
709 pthread_t vcpu2_thread = create_vcpu_thread(smp_vcpu_entry,
710 (uintptr_t)count_word, smp_test_monitor, count_word);
711
712 void *r1, *r2;
713 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu1_thread, &r1), "join vcpu1");
714 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu2_thread, &r2), "join vcpu2");
715 uint64_t v1 = (uint64_t)r1;
716 uint64_t v2 = (uint64_t)r2;
717 if (v1 == 0) {
718 T_ASSERT_EQ_ULLONG(v2, 1ULL, "check count");
719 } else if (v1 == 1) {
720 T_ASSERT_EQ_ULLONG(v2, 0ULL, "check count");
721 } else {
722 T_FAIL("unexpected count: %llu", v1);
723 }
724
725 vm_cleanup();
726 }
727
728
729 extern void *hvtest_begin;
730 extern void *hvtest_end;
731
732 static void *
733 simple_protected_mode_test_monitor(void *arg __unused, hv_vcpuid_t vcpu)
734 {
735 setup_protected_mode(vcpu);
736
737 size_t guest_pages_size = round_page((uintptr_t)&hvtest_end - (uintptr_t)&hvtest_begin);
738
739 const size_t mem_size = 1 * 1024 * 1024;
740 uint8_t *guest_pages_shadow = valloc(mem_size);
741
742 bzero(guest_pages_shadow, mem_size);
743 memcpy(guest_pages_shadow+0x1000, &hvtest_begin, guest_pages_size);
744
745 T_ASSERT_EQ(hv_vm_map(guest_pages_shadow, 0x40000000, mem_size, HV_MEMORY_READ | HV_MEMORY_EXEC),
746 HV_SUCCESS, "map guest memory");
747
748 expect_vmcall_with_value(vcpu, 0x23456, false);
749
750 free(guest_pages_shadow);
751
752 return NULL;
753 }
754
755 T_DECL(simple_protected_mode_guest, "simple protected mode guest")
756 {
757 vm_setup();
758
759 pthread_t vcpu_thread = create_vcpu_thread((vcpu_entry_function)
760 (((uintptr_t)simple_protected_mode_vcpu_entry & PAGE_MASK) +
761 0x40000000 + 0x1000),
762 0, simple_protected_mode_test_monitor, 0);
763 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
764
765 vm_cleanup();
766 }
767
768 static void *
769 simple_real_mode_monitor(void *arg __unused, hv_vcpuid_t vcpu)
770 {
771 setup_real_mode(vcpu);
772
773 size_t guest_pages_size = round_page((uintptr_t)&hvtest_end - (uintptr_t)&hvtest_begin);
774
775 const size_t mem_size = 1 * 1024 * 1024;
776 uint8_t *guest_pages_shadow = valloc(mem_size);
777
778 bzero(guest_pages_shadow, mem_size);
779 memcpy(guest_pages_shadow+0x1000, &hvtest_begin, guest_pages_size);
780
781 T_ASSERT_EQ(hv_vm_map(guest_pages_shadow, 0x0, mem_size, HV_MEMORY_READ | HV_MEMORY_EXEC), HV_SUCCESS,
782 "map guest memory");
783
784 expect_vmcall_with_value(vcpu, 0x23456, false);
785
786 free(guest_pages_shadow);
787
788 return NULL;
789 }
790
791 T_DECL(simple_real_mode_guest, "simple real mode guest")
792 {
793 vm_setup();
794
795 pthread_t vcpu_thread = create_vcpu_thread((vcpu_entry_function)
796 (((uintptr_t)simple_real_mode_vcpu_entry & PAGE_MASK) +
797 0x1000),
798 0, simple_real_mode_monitor, 0);
799 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
800
801 vm_cleanup();
802 }
803
804 static void *
805 radar61961809_monitor(void *gpaddr, hv_vcpuid_t vcpu)
806 {
807 uint32_t const gdt_template[] = {
808 0, 0, /* Empty */
809 0x0000ffff, 0x00cf9200, /* 0x08 CPL0 4GB writable data, 32bit */
810 0x0000ffff, 0x00cf9a00, /* 0x10 CPL0 4GB readable code, 32bit */
811 0x0000ffff, 0x00af9200, /* 0x18 CPL0 4GB writable data, 64bit */
812 0x0000ffff, 0x00af9a00, /* 0x20 CPL0 4GB readable code, 64bit */
813 };
814
815 // We start the test in protected mode.
816 setup_protected_mode(vcpu);
817
818 // SAVE_EFER makes untrapped CR0.PG work.
819 uint64_t exit_cap = get_cap(HV_VMX_CAP_EXIT);
820 set_vmcs(vcpu, VMCS_CTRL_VMEXIT_CONTROLS, canonicalize(VMEXIT_SAVE_EFER, exit_cap));
821
822 // Start with CR0.PG disabled.
823 set_vmcs(vcpu, VMCS_GUEST_CR0, 0x00000021);
824 set_vmcs(vcpu, VMCS_CTRL_CR0_SHADOW, 0x00000021);
825 /*
826 * Don't trap on modifying CR0.PG to reproduce the problem.
827 * Otherwise, we'd have to handle the switch ourselves, and would
828 * just do it right.
829 */
830 set_vmcs(vcpu, VMCS_CTRL_CR0_MASK, ~0x80000000UL);
831
832 // PAE must be enabled for a switch into long mode to work.
833 set_vmcs(vcpu, VMCS_GUEST_CR4, 0x2020);
834 set_vmcs(vcpu, VMCS_CTRL_CR4_MASK, ~0u);
835 set_vmcs(vcpu, VMCS_CTRL_CR4_SHADOW, 0x2020);
836
837 // Will use the harness managed page tables in long mode.
838 set_vmcs(vcpu, VMCS_GUEST_CR3, pml4_gpa);
839
840 // Hypervisor fw wants this (for good, but unrelated reason).
841 T_QUIET; T_ASSERT_EQ(hv_vcpu_enable_native_msr(vcpu, MSR_IA32_KERNEL_GS_BASE, true), HV_SUCCESS, "enable native GS_BASE");
842
843 // Far pointer array for our far jumps.
844 uint32_t *far_ptr = NULL;
845 hv_gpaddr_t far_ptr_gpaddr = map_guest_phys((void**)&far_ptr);
846 map_page(far_ptr, (void*)far_ptr_gpaddr);
847
848 far_ptr[0] = (uint32_t)(((uintptr_t)&radar61961809_prepare - (uintptr_t)&hvtest_begin) + (uintptr_t)gpaddr);
849 far_ptr[1] = 0x0010; // 32bit CS
850 far_ptr[2] = (uint32_t)(((uintptr_t)&radar61961809_loop64 - (uintptr_t)&hvtest_begin) + (uintptr_t)gpaddr);
851 far_ptr[3] = 0x0020; // 64bit CS
852
853 set_reg(vcpu, HV_X86_RDI, far_ptr_gpaddr);
854
855 // Setup GDT.
856 uint32_t *gdt = valloc(vm_page_size);
857 hv_gpaddr_t gdt_gpaddr = 0x70000000;
858 map_page(gdt, (void*)gdt_gpaddr);
859 bzero(gdt, vm_page_size);
860 memcpy(gdt, gdt_template, sizeof(gdt_template));
861
862 set_vmcs(vcpu, VMCS_GUEST_GDTR_BASE, gdt_gpaddr);
863 set_vmcs(vcpu, VMCS_GUEST_GDTR_LIMIT, sizeof(gdt_template)+1);
864
865 // Map test code (because we start in protected mode without
866 // paging, we cannot use the harness's fault management yet.)
867 size_t guest_pages_size = round_page((uintptr_t)&hvtest_end - (uintptr_t)&hvtest_begin);
868
869 const size_t mem_size = 1 * 1024 * 1024;
870 uint8_t *guest_pages_shadow = valloc(mem_size);
871
872 bzero(guest_pages_shadow, mem_size);
873 memcpy(guest_pages_shadow, &hvtest_begin, guest_pages_size);
874
875 T_ASSERT_EQ(hv_vm_map(guest_pages_shadow, (hv_gpaddr_t)gpaddr, mem_size, HV_MEMORY_READ | HV_MEMORY_EXEC),
876 HV_SUCCESS, "map guest memory");
877
878 // Create entries in PML4.
879 uint8_t *host_va = guest_pages_shadow;
880 uint8_t *va = (uint8_t*)gpaddr;
881 for (unsigned long i = 0; i < guest_pages_size / vm_page_size; i++, va += vm_page_size, host_va += vm_page_size) {
882 map_page(host_va, va);
883 }
884
885 uint64_t reason = run_to_next_vm_fault(vcpu, false);
886 T_ASSERT_EQ(reason, (uint64_t)VMX_REASON_RDMSR, "check for rdmsr");
887 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RCX), 0xc0000080LL, "expected EFER rdmsr");
888
889 set_reg(vcpu, HV_X86_RDX, 0);
890 set_reg(vcpu, HV_X86_RAX, 0);
891 set_vmcs(vcpu, VMCS_GUEST_RIP, get_reg(vcpu, HV_X86_RIP)+get_vmcs(vcpu, VMCS_RO_VMEXIT_INSTR_LEN));
892
893 reason = run_to_next_vm_fault(vcpu, false);
894 T_ASSERT_EQ(reason, (uint64_t)VMX_REASON_WRMSR, "check for wrmsr");
895 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RCX), 0xc0000080LL, "expected EFER wrmsr");
896 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RDX), 0x0LL, "expected EFER wrmsr higher bits 0");
897 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RAX), 0x100LL, "expected EFER wrmsr lower bits LME");
898
899 set_vmcs(vcpu, VMCS_GUEST_IA32_EFER, 0x100);
900 set_vmcs(vcpu, VMCS_GUEST_RIP, get_reg(vcpu, HV_X86_RIP)+get_vmcs(vcpu, VMCS_RO_VMEXIT_INSTR_LEN));
901
902 // See assembly part of the test for checkpoints.
903 expect_vmcall_with_value(vcpu, 0x100, false /* PG disabled =>
904 * no PFs expected */);
905 expect_vmcall_with_value(vcpu, 0x1111, true /* PG now enabled */);
906 expect_vmcall_with_value(vcpu, 0x2222, true);
907
908 free(guest_pages_shadow);
909 free(gdt);
910
911 return NULL;
912 }
913
914 T_DECL(radar61961809_guest,
915 "rdar://61961809 (Unexpected guest faults with hv_vcpu_run_until, dropping out of long mode)")
916 {
917 vm_setup();
918
919 hv_gpaddr_t gpaddr = 0x80000000;
920 pthread_t vcpu_thread = create_vcpu_thread((vcpu_entry_function)
921 (((uintptr_t)radar61961809_entry & PAGE_MASK) +
922 gpaddr),
923 0, radar61961809_monitor, (void*)gpaddr);
924 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
925
926 vm_cleanup();
927 }
928
929 static void *
930 superpage_2mb_backed_guest_monitor(void *arg __unused, hv_vcpuid_t vcpu)
931 {
932 setup_protected_mode(vcpu);
933
934 size_t guest_pages_size = round_page((uintptr_t)&hvtest_end - (uintptr_t)&hvtest_begin);
935
936 const size_t mem_size = 2 * 1024 * 1024;
937
938 uint8_t *guest_pages_shadow = mmap(NULL, mem_size,
939 PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,
940 VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
941
942 if (guest_pages_shadow == MAP_FAILED) {
943 /* Getting a 2MB superpage is hard in practice, because memory gets fragmented
944 * easily.
945 * T_META_REQUIRES_REBOOT in the T_DECL helps a lot in actually getting a page,
946 * but in the case that it still fails, we don't want the test to fail through
947 * no fault of the hypervisor.
948 */
949 T_SKIP("Unable to attain a 2MB superpage. Skipping.");
950 }
951
952 bzero(guest_pages_shadow, mem_size);
953 memcpy(guest_pages_shadow+0x1000, &hvtest_begin, guest_pages_size);
954
955 T_ASSERT_EQ(hv_vm_map(guest_pages_shadow, 0x40000000, mem_size, HV_MEMORY_READ | HV_MEMORY_EXEC),
956 HV_SUCCESS, "map guest memory");
957
958 expect_vmcall_with_value(vcpu, 0x23456, false);
959
960 munmap(guest_pages_shadow, mem_size);
961
962 return NULL;
963 }
964
965 T_DECL(superpage_2mb_backed_guest, "guest backed by a 2MB superpage",
966 T_META_REQUIRES_REBOOT(true)) // Helps actually getting a superpage
967 {
968 vm_setup();
969
970 pthread_t vcpu_thread = create_vcpu_thread((vcpu_entry_function)
971 (((uintptr_t)simple_protected_mode_vcpu_entry & PAGE_MASK) +
972 0x40000000 + 0x1000),
973 0, superpage_2mb_backed_guest_monitor, 0);
974 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
975
976 vm_cleanup();
977 }
978
979 static void *
980 save_restore_regs_monitor(void *arg __unused, hv_vcpuid_t vcpu)
981 {
982
983 setup_long_mode(vcpu);
984
985 uint64_t rsp = get_reg(vcpu, HV_X86_RSP);
986
987 set_reg(vcpu, HV_X86_RAX, 0x0101010101010101);
988 set_reg(vcpu, HV_X86_RBX, 0x0202020202020202);
989 set_reg(vcpu, HV_X86_RCX, 0x0303030303030303);
990 set_reg(vcpu, HV_X86_RDX, 0x0404040404040404);
991 set_reg(vcpu, HV_X86_RSI, 0x0505050505050505);
992 set_reg(vcpu, HV_X86_RDI, 0x0606060606060606);
993
994 set_reg(vcpu, HV_X86_RBP, 0x0707070707070707);
995
996 set_reg(vcpu, HV_X86_R8, 0x0808080808080808);
997 set_reg(vcpu, HV_X86_R9, 0x0909090909090909);
998 set_reg(vcpu, HV_X86_R10, 0x0a0a0a0a0a0a0a0a);
999 set_reg(vcpu, HV_X86_R11, 0x0b0b0b0b0b0b0b0b);
1000 set_reg(vcpu, HV_X86_R12, 0x0c0c0c0c0c0c0c0c);
1001 set_reg(vcpu, HV_X86_R13, 0x0d0d0d0d0d0d0d0d);
1002 set_reg(vcpu, HV_X86_R14, 0x0e0e0e0e0e0e0e0e);
1003 set_reg(vcpu, HV_X86_R15, 0x0f0f0f0f0f0f0f0f);
1004
1005 // invalid selectors: ok as long as we don't try to use them
1006 set_reg(vcpu, HV_X86_DS, 0x1010);
1007 set_reg(vcpu, HV_X86_ES, 0x2020);
1008 set_reg(vcpu, HV_X86_FS, 0x3030);
1009 set_reg(vcpu, HV_X86_GS, 0x4040);
1010
1011 expect_vmcall_with_value(vcpu, (uint64_t)~0x0101010101010101LL, true);
1012
1013 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RSP), rsp-8, "check if push happened");
1014
1015 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RAX), (uint64_t)~0x0101010101010101LL, "check if RAX negated");
1016 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RBX), (uint64_t)~0x0202020202020202LL, "check if RBX negated");
1017 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RCX), (uint64_t)~0x0303030303030303LL, "check if RCX negated");
1018 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RDX), (uint64_t)~0x0404040404040404LL, "check if RDX negated");
1019 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RSI), (uint64_t)~0x0505050505050505LL, "check if RSI negated");
1020 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RDI), (uint64_t)~0x0606060606060606LL, "check if RDI negated");
1021
1022 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RBP), (uint64_t)~0x0707070707070707LL, "check if RBP negated");
1023
1024 T_ASSERT_EQ(get_reg(vcpu, HV_X86_R8), (uint64_t)~0x0808080808080808LL, "check if R8 negated");
1025 T_ASSERT_EQ(get_reg(vcpu, HV_X86_R9), (uint64_t)~0x0909090909090909LL, "check if R9 negated");
1026 T_ASSERT_EQ(get_reg(vcpu, HV_X86_R10), (uint64_t)~0x0a0a0a0a0a0a0a0aLL, "check if R10 negated");
1027 T_ASSERT_EQ(get_reg(vcpu, HV_X86_R11), (uint64_t)~0x0b0b0b0b0b0b0b0bLL, "check if R11 negated");
1028 T_ASSERT_EQ(get_reg(vcpu, HV_X86_R12), (uint64_t)~0x0c0c0c0c0c0c0c0cLL, "check if R12 negated");
1029 T_ASSERT_EQ(get_reg(vcpu, HV_X86_R13), (uint64_t)~0x0d0d0d0d0d0d0d0dLL, "check if R13 negated");
1030 T_ASSERT_EQ(get_reg(vcpu, HV_X86_R14), (uint64_t)~0x0e0e0e0e0e0e0e0eLL, "check if R14 negated");
1031 T_ASSERT_EQ(get_reg(vcpu, HV_X86_R15), (uint64_t)~0x0f0f0f0f0f0f0f0fLL, "check if R15 negated");
1032
1033 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RAX), (uint64_t)~0x0101010101010101LL, "check if RAX negated");
1034 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RBX), (uint64_t)~0x0202020202020202LL, "check if RBX negated");
1035 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RCX), (uint64_t)~0x0303030303030303LL, "check if RCX negated");
1036 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RDX), (uint64_t)~0x0404040404040404LL, "check if RDX negated");
1037
1038 // Cannot set selector to arbitrary value from the VM, but we have the RPL field to play with
1039 T_ASSERT_EQ(get_reg(vcpu, HV_X86_DS), 1ULL, "check if DS == 1");
1040 T_ASSERT_EQ(get_reg(vcpu, HV_X86_ES), 2ULL, "check if ES == 2");
1041 T_ASSERT_EQ(get_reg(vcpu, HV_X86_FS), 3ULL, "check if FS == 3");
1042 T_ASSERT_EQ(get_reg(vcpu, HV_X86_GS), 1ULL, "check if GS == 1");
1043
1044 expect_vmcall_with_value(vcpu, (uint64_t)~0x0101010101010101LL, true);
1045
1046 T_ASSERT_EQ(get_reg(vcpu, HV_X86_RSP), rsp-16, "check if push happened again");
1047
1048 return NULL;
1049 }
1050
1051 T_DECL(save_restore_regs, "check if general purpose and segment registers are properly saved and restored")
1052 {
1053 vm_setup();
1054
1055 pthread_t vcpu_thread = create_vcpu_thread(save_restore_regs_entry, 0x10000, save_restore_regs_monitor, 0);
1056 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1057
1058 vm_cleanup();
1059 }
1060
1061 static void *
1062 save_restore_debug_regs_monitor(void *arg __unused, hv_vcpuid_t vcpu)
1063 {
1064
1065 setup_long_mode(vcpu);
1066
1067 set_reg(vcpu, HV_X86_RAX, 0x0101010101010101);
1068
1069 set_reg(vcpu, HV_X86_DR0, 0x1111111111111111);
1070 set_reg(vcpu, HV_X86_DR1, 0x2222222222222222);
1071 set_reg(vcpu, HV_X86_DR2, 0x3333333333333333);
1072 set_reg(vcpu, HV_X86_DR3, 0x4444444444444444);
1073
1074 // debug status and control regs (some bits are reserved, one other bit would generate an exception)
1075 const uint64_t dr6_force_clear = 0xffffffff00001000ULL;
1076 const uint64_t dr6_force_set = 0xffff0ff0ULL;
1077 const uint64_t dr7_force_clear = 0xffffffff0000f000ULL;
1078 const uint64_t dr7_force_set = 0x0400ULL;
1079
1080 set_reg(vcpu, HV_X86_DR6, (0x5555555555555555ULL | dr6_force_set) & ~(dr6_force_clear));
1081 set_reg(vcpu, HV_X86_DR7, (0x5555555555555555ULL | dr7_force_set) & ~(dr7_force_clear));
1082
1083 expect_vmcall_with_value(vcpu, 0x0101010101010101LL, true);
1084
1085 T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR0), (uint64_t)~0x1111111111111111LL, "check if DR0 negated");
1086 T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR1), (uint64_t)~0x2222222222222222LL, "check if DR1 negated");
1087 T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR2), (uint64_t)~0x3333333333333333LL, "check if DR2 negated");
1088 T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR3), (uint64_t)~0x4444444444444444LL, "check if DR3 negated");
1089
1090 T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR6), (0xaaaaaaaaaaaaaaaaULL | dr6_force_set) & ~(dr6_force_clear), "check if DR6 negated");
1091 T_ASSERT_EQ(get_reg(vcpu, HV_X86_DR7), (0xaaaaaaaaaaaaaaaaULL | dr7_force_set) & ~(dr7_force_clear), "check if DR7 negated");
1092
1093 expect_vmcall_with_value(vcpu, 0x0101010101010101LL, true);
1094
1095 return NULL;
1096 }
1097
1098 T_DECL(save_restore_debug_regs, "check if debug registers are properly saved and restored",
1099 T_META_EXPECTFAIL("rdar://57433961 (SEED: Web: Writes to debug registers (DR0 etc.) are not saved)"))
1100 {
1101 vm_setup();
1102
1103 pthread_t vcpu_thread = create_vcpu_thread(save_restore_debug_regs_entry, 0x10000, save_restore_debug_regs_monitor, 0);
1104 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1105
1106 vm_cleanup();
1107 }
1108
1109 #define T_NATIVE_MSR(msr)
1110
1111 static void *
1112 native_msr_monitor(void *arg __unused, hv_vcpuid_t vcpu)
1113 {
1114 const uint32_t msrs[] = {
1115 MSR_IA32_STAR,
1116 MSR_IA32_LSTAR,
1117 MSR_IA32_CSTAR,
1118 MSR_IA32_FMASK,
1119 MSR_IA32_KERNEL_GS_BASE,
1120 MSR_IA32_TSC,
1121 MSR_IA32_TSC_AUX,
1122
1123 MSR_IA32_SYSENTER_CS,
1124 MSR_IA32_SYSENTER_ESP,
1125 MSR_IA32_SYSENTER_EIP,
1126 MSR_IA32_FS_BASE,
1127 MSR_IA32_GS_BASE,
1128 };
1129 const int msr_count = sizeof(msrs)/sizeof(uint32_t);
1130
1131 setup_long_mode(vcpu);
1132
1133 for (int i = 0; i < msr_count; i++) {
1134 T_ASSERT_EQ(hv_vcpu_enable_native_msr(vcpu, msrs[i], true), HV_SUCCESS, "enable native MSR %x", msrs[i]);
1135 }
1136
1137 expect_vmcall_with_value(vcpu, 0x23456, true);
1138
1139 return NULL;
1140 }
1141
1142 T_DECL(native_msr_clobber, "enable and clobber native MSRs in the guest")
1143 {
1144 vm_setup();
1145
1146 pthread_t vcpu_thread = create_vcpu_thread(native_msr_vcpu_entry, 0x10000, native_msr_monitor, 0);
1147 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1148
1149 vm_cleanup();
1150 }
1151
1152 static void *
1153 radar60691363_monitor(void *arg __unused, hv_vcpuid_t vcpu)
1154 {
1155 setup_long_mode(vcpu);
1156
1157 uint64_t proc2_cap = get_cap(HV_VMX_CAP_PROCBASED2);
1158 set_vmcs(vcpu, VMCS_CTRL_CPU_BASED2, canonicalize(CPU_BASED2_VMCS_SHADOW, proc2_cap));
1159
1160 T_ASSERT_EQ(hv_vmx_vcpu_set_shadow_access(vcpu, VMCS_GUEST_ES,
1161 HV_SHADOW_VMCS_READ | HV_SHADOW_VMCS_WRITE), HV_SUCCESS,
1162 "enable VMCS_GUEST_ES shadow access");
1163 T_ASSERT_EQ(hv_vmx_vcpu_write_shadow_vmcs(vcpu, VMCS_GUEST_ES, 0x1234), HV_SUCCESS,
1164 "set VMCS_GUEST_ES in shadow");
1165
1166 T_ASSERT_EQ(hv_vmx_vcpu_set_shadow_access(vcpu, VMCS_RO_EXIT_QUALIFIC,
1167 HV_SHADOW_VMCS_READ | HV_SHADOW_VMCS_WRITE), HV_SUCCESS,
1168 "enable VMCS_RO_EXIT_QUALIFIC shadow access");
1169 T_ASSERT_EQ(hv_vmx_vcpu_write_shadow_vmcs(vcpu, VMCS_RO_EXIT_QUALIFIC, 0x111), HV_SUCCESS,
1170 "set VMCS_RO_EXIT_QUALIFIC in shadow");
1171
1172 T_ASSERT_EQ(hv_vmx_vcpu_set_shadow_access(vcpu, VMCS_RO_IO_RCX,
1173 HV_SHADOW_VMCS_READ | HV_SHADOW_VMCS_WRITE), HV_SUCCESS,
1174 "enable VMCS_RO_IO_RCX shadow access");
1175 T_ASSERT_EQ(hv_vmx_vcpu_write_shadow_vmcs(vcpu, VMCS_RO_IO_RCX, 0x2323), HV_SUCCESS,
1176 "set VMCS_RO_IO_RCX in shadow");
1177
1178 expect_vmcall_with_value(vcpu, 0x1234, true);
1179 expect_vmcall_with_value(vcpu, 0x111, true);
1180 expect_vmcall_with_value(vcpu, 0x2323, true);
1181
1182 expect_vmcall_with_value(vcpu, 0x4567, true);
1183
1184 uint64_t value;
1185 T_ASSERT_EQ(hv_vmx_vcpu_read_shadow_vmcs(vcpu, VMCS_GUEST_ES, &value), HV_SUCCESS,
1186 "read updated VMCS_GUEST_ES in shadow");
1187 T_ASSERT_EQ(value, 0x9191LL, "VMCS_GUEST_ES value is updated");
1188 T_ASSERT_EQ(hv_vmx_vcpu_read_shadow_vmcs(vcpu, VMCS_RO_EXIT_QUALIFIC, &value), HV_SUCCESS,
1189 "read updated VMCS_RO_EXIT_QUALIFIC in shadow");
1190 T_ASSERT_EQ(value, 0x9898LL, "VMCS_RO_EXIT_QUALIFIC value is updated");
1191 T_ASSERT_EQ(hv_vmx_vcpu_read_shadow_vmcs(vcpu, VMCS_RO_IO_RCX, &value), HV_SUCCESS,
1192 "read updated VMCS_RO_IO_RCX in shadow");
1193 T_ASSERT_EQ(value, 0x7979LL, "VMCS_RO_IO_RCX value is updated");
1194
1195 // This must not work.
1196 T_ASSERT_EQ(hv_vmx_vcpu_set_shadow_access(vcpu, VMCS_CTRL_EPTP,
1197 HV_SHADOW_VMCS_READ | HV_SHADOW_VMCS_WRITE), HV_SUCCESS,
1198 "enable VMCS_CTRL_EPTP shadow access");
1199 T_ASSERT_EQ(hv_vmx_vcpu_read_vmcs(vcpu, VMCS_CTRL_EPTP, &value), HV_BAD_ARGUMENT,
1200 "accessing EPTP in ordinary VMCS fails");
1201
1202 return NULL;
1203 }
1204
1205 T_DECL(radar60691363, "rdar://60691363 (SEED: Web: Allow shadowing of read only VMCS fields)")
1206 {
1207 vm_setup();
1208
1209 uint64_t proc2_cap = get_cap(HV_VMX_CAP_PROCBASED2);
1210
1211 if (!(proc2_cap & ((uint64_t)CPU_BASED2_VMCS_SHADOW << 32))) {
1212 T_SKIP("Device does not support shadow VMCS, skipping.");
1213 }
1214
1215 pthread_t vcpu_thread = create_vcpu_thread(radar60691363_entry, 0x10000, radar60691363_monitor, 0);
1216 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1217
1218 vm_cleanup();
1219 }
1220
1221 T_DECL(radar63641279, "rdar://63641279 (Evaluate \"no SMT\" scheduling option/sidechannel security mitigation for Hypervisor.framework VMs)")
1222 {
1223 const uint64_t ALL_MITIGATIONS =
1224 HV_VM_MITIGATION_A_ENABLE |
1225 HV_VM_MITIGATION_B_ENABLE |
1226 HV_VM_MITIGATION_C_ENABLE |
1227 HV_VM_MITIGATION_D_ENABLE |
1228 HV_VM_MITIGATION_E_ENABLE; // NO_SMT
1229
1230 T_SETUPBEGIN;
1231
1232 if (hv_support() < 1) {
1233 T_SKIP("Running on non-HV target, skipping...");
1234 return;
1235 }
1236
1237 T_ASSERT_EQ(hv_vm_create( HV_VM_SPECIFY_MITIGATIONS | ALL_MITIGATIONS),
1238 HV_SUCCESS, "Created vm");
1239
1240 T_SETUPEND;
1241
1242 pthread_t vcpu_thread = create_vcpu_thread(
1243 (vcpu_entry_function) (((uintptr_t)simple_real_mode_vcpu_entry & PAGE_MASK) + 0x1000),
1244 0, simple_real_mode_monitor, 0);
1245 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
1246
1247 vm_cleanup();
1248 }