]> git.saurik.com Git - apple/xnu.git/blame - tests/hvbench.c
xnu-7195.50.7.100.1.tar.gz
[apple/xnu.git] / tests / hvbench.c
CommitLineData
f427ee49
A
1#include "hvtest_arm64.h"
2#include "hvtest_guest.h"
3
4#include <ptrauth.h>
5#include <darwintest.h>
6#include <darwintest_perf.h>
7#include <mach/mach.h>
8#include <stdatomic.h>
9#include <stdlib.h>
10
11T_GLOBAL_META(
12 T_META_NAMESPACE("xnu.arm.hv"),
13 T_META_REQUIRES_SYSCTL_EQ("kern.hv_support", 1),
14 // Temporary workaround for not providing an x86_64 slice
15 T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm64", 1)
16 );
17
18#define SET_PC(vcpu, symbol) \
19{ \
20 vcpu_entry_function entry = ptrauth_strip(&symbol, 0); \
21 uint64_t entry_addr = (uintptr_t)entry; \
22 (void)hv_vcpu_set_reg(vcpu, HV_REG_PC, entry_addr); \
23}
24
25// Note that expect_*(), set_reg(), and get_reg() cannot be used in benchmarks,
26// as the T_ASSERT() checks they perform are severely detrimental to results.
27//
28// The helpers below should be used in their place.
29
30static void
31quick_bump_pc(hv_vcpu_t vcpu, const bool forward)
32{
33 uint64_t pc;
34 (void)hv_vcpu_get_reg(vcpu, HV_REG_PC, &pc);
35 pc = forward ? pc + 4 : pc - 4;
36 (void)hv_vcpu_set_reg(vcpu, HV_REG_PC, pc);
37}
38
39static void
40vtimer_benchmark(hv_vcpu_t vcpu, hv_vcpu_exit_t *exit)
41{
42 dt_stat_thread_cycles_t stat = dt_stat_thread_cycles_create(
43 "VTimer interruption");
44 SET_PC(vcpu, spin_vcpu_entry);
45 set_sys_reg(vcpu, HV_SYS_REG_CNTV_CVAL_EL0, 0);
46 set_sys_reg(vcpu, HV_SYS_REG_CNTV_CTL_EL0, 1);
47 // Dry-run twice to ensure that the timer is re-armed.
48 run_to_next_vm_fault(vcpu, exit);
49 T_ASSERT_EQ_UINT(exit->reason, HV_EXIT_REASON_VTIMER_ACTIVATED,
50 "check for timer");
51 hv_vcpu_set_vtimer_mask(vcpu, false);
52 run_to_next_vm_fault(vcpu, exit);
53 T_ASSERT_EQ_UINT(exit->reason, HV_EXIT_REASON_VTIMER_ACTIVATED,
54 "check for timer");
55 hv_vcpu_set_vtimer_mask(vcpu, false);
56 T_STAT_MEASURE_LOOP(stat) {
57 hv_vcpu_run(vcpu);
58 hv_vcpu_set_vtimer_mask(vcpu, false);
59 }
60 dt_stat_finalize(stat);
61 // Disable the timer before running other benchmarks, otherwise they will be
62 // interrupted.
63 set_sys_reg(vcpu, HV_SYS_REG_CNTV_CTL_EL0, 0);
64}
65
66static void
67trap_benchmark(dt_stat_thread_cycles_t trap_stat, hv_vcpu_t vcpu,
68 hv_vcpu_exit_t *exit, const uint64_t batch, const bool increment_pc)
69{
70 while (!dt_stat_stable(trap_stat)) {
71 set_reg(vcpu, HV_REG_X0, batch);
72 dt_stat_token start = dt_stat_thread_cycles_begin(trap_stat);
73 for (uint32_t i = 0; i < batch; i++) {
74 hv_vcpu_run(vcpu);
75 if (increment_pc) {
76 quick_bump_pc(vcpu, true);
77 }
78 }
79 dt_stat_thread_cycles_end_batch(trap_stat, (int)batch, start);
80 expect_hvc(vcpu, exit, 2);
81 }
82 dt_stat_finalize(trap_stat);
83}
84
85static void
86mrs_bench_kernel(hv_vcpu_t vcpu, hv_vcpu_exit_t *exit, const char *name)
87{
88 const uint64_t batch = 1000;
89 SET_PC(vcpu, mrs_actlr_bench_loop);
90 set_control(vcpu, _HV_CONTROL_FIELD_HCR,
91 get_control(vcpu, _HV_CONTROL_FIELD_HCR) & ~HCR_TACR);
92 dt_stat_thread_cycles_t stat = dt_stat_thread_cycles_create(name);
93 while (!dt_stat_stable(stat)) {
94 set_reg(vcpu, HV_REG_X0, batch);
95 dt_stat_token start = dt_stat_thread_cycles_begin(stat);
96 hv_vcpu_run(vcpu);
97 dt_stat_thread_cycles_end_batch(stat, (int)batch, start);
98 T_QUIET; T_ASSERT_EQ_UINT(exit->reason, HV_EXIT_REASON_EXCEPTION,
99 "check for exception");
100 T_QUIET; T_ASSERT_EQ(exit->exception.syndrome >> 26, 0x16,
101 "check for HVC64");
102 }
103 dt_stat_finalize(stat);
104}
105
106static void *
107trap_bench_monitor(void *arg __unused, hv_vcpu_t vcpu, hv_vcpu_exit_t *exit)
108{
109 // In all benchmark testcases using quick_run_vcpu(), dry run all guest code
110 // to fault in pages so that run_to_next_vm_fault() isn't needed while
111 // recording measurements.
112
113 vtimer_benchmark(vcpu, exit);
114
115 // dry-run hvc_bench_loop
116 SET_PC(vcpu, hvc_bench_loop);
117 set_reg(vcpu, HV_REG_X0, 1);
118 expect_hvc(vcpu, exit, 1);
119 expect_hvc(vcpu, exit, 2);
120
121 SET_PC(vcpu, hvc_bench_loop);
122 trap_benchmark(dt_stat_thread_cycles_create("HVC handled by VMM"),
123 vcpu, exit, 1000, false);
124
125 // dry-run data_abort_bench_loop
126 SET_PC(vcpu, data_abort_bench_loop);
127 set_reg(vcpu, HV_REG_X0, 1);
128 expect_trapped_store(vcpu, exit, get_reserved_start());
129 expect_hvc(vcpu, exit, 2);
130
131 SET_PC(vcpu, data_abort_bench_loop);
132 trap_benchmark(dt_stat_thread_cycles_create("data abort handled by VMM"),
133 vcpu, exit, 1000, true);
134
135 // dry-run mrs_actlr_bench_loop
136 SET_PC(vcpu, mrs_actlr_bench_loop);
137 set_reg(vcpu, HV_REG_X0, 1);
138 set_control(vcpu, _HV_CONTROL_FIELD_HCR,
139 get_control(vcpu, _HV_CONTROL_FIELD_HCR) & ~HCR_TACR);
140 // Confirm no visible trap from MRS
141 expect_hvc(vcpu, exit, 2);
142
143 mrs_bench_kernel(vcpu, exit, "MRS trap handled by kernel");
144
145 SET_PC(vcpu, mrs_actlr_bench_loop);
146 set_reg(vcpu, HV_REG_X0, 1);
147 set_control(vcpu, _HV_CONTROL_FIELD_HCR,
148 get_control(vcpu, _HV_CONTROL_FIELD_HCR) | HCR_TACR);
149 // Confirm MRS trap from test loop
150 expect_exception(vcpu, exit, 0x18);
151 quick_bump_pc(vcpu, true);
152 expect_hvc(vcpu, exit, 2);
153 SET_PC(vcpu, mrs_actlr_bench_loop);
154 trap_benchmark(dt_stat_thread_cycles_create("MRS trap handled by VMM"),
155 vcpu, exit, 1000, true);
156
157 SET_PC(vcpu, activate_debug);
158 expect_hvc(vcpu, exit, 0);
159
160 SET_PC(vcpu, hvc_bench_loop);
161 trap_benchmark(dt_stat_thread_cycles_create(
162 "debug-enabled HVC handled by VMM"), vcpu, exit, 1000, false);
163
164 mrs_bench_kernel(vcpu, exit, "debug-enabled MRS trap handled by kernel");
165
166 return NULL;
167}
168
169T_DECL(trap_benchmark, "trap-processing benchmark")
170{
171 vm_setup();
172 pthread_t vcpu_thread = create_vcpu_thread(hvc_bench_loop, 0,
173 trap_bench_monitor, NULL);
174 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu");
175 vm_cleanup();
176}
177
178static semaphore_t sem1;
179static semaphore_t sem2;
180static _Atomic uint32_t stage;
181
182static void
183switch_and_return(bool leader)
184{
185 // wait_semaphore, signal_semaphore
186 (void)semaphore_wait_signal(leader ? sem2 : sem1, leader ? sem1 : sem2);
187}
188
189static void *
190vcpu_switch_leader(void *arg __unused, hv_vcpu_t vcpu, hv_vcpu_exit_t *exit)
191{
192 dt_stat_thread_cycles_t baseline = dt_stat_thread_cycles_create(
193 "baseline VCPU run, no switch");
194 dt_stat_thread_cycles_t thread = dt_stat_thread_cycles_create(
195 "VCPU-thread switch");
196 dt_stat_thread_cycles_t basic = dt_stat_thread_cycles_create(
197 "basic VCPU-VCPU switch");
198 dt_stat_thread_cycles_t baseline_debug = dt_stat_thread_cycles_create(
199 "baseline debug-enabled VCPU run, no switch");
200 dt_stat_thread_cycles_t basic_debug = dt_stat_thread_cycles_create(
201 "basic VCPU <-> debug-enabled VCPU switch");
202 dt_stat_thread_cycles_t debug_debug = dt_stat_thread_cycles_create(
203 "debug-enabled VCPU <-> debug-enabled VCPU switch");
204
205 bind_to_cpu(0);
206
207 // Activate minimal VCPU state
208 SET_PC(vcpu, hvc_loop);
209 expect_hvc(vcpu, exit, 0);
210 T_STAT_MEASURE_LOOP(baseline) {
211 hv_vcpu_run(vcpu);
212 }
213 dt_stat_finalize(baseline);
214
215 T_STAT_MEASURE_LOOP(thread) {
216 hv_vcpu_run(vcpu);
217 switch_and_return(true);
218 }
219 dt_stat_finalize(thread);
220 atomic_store_explicit(&stage, 1, memory_order_relaxed);
221
222 T_STAT_MEASURE_LOOP(basic) {
223 hv_vcpu_run(vcpu);
224 switch_and_return(true);
225 }
226 dt_stat_finalize(basic);
227 atomic_store_explicit(&stage, 2, memory_order_relaxed);
228
229 T_STAT_MEASURE_LOOP(basic_debug) {
230 hv_vcpu_run(vcpu);
231 switch_and_return(true);
232 }
233 dt_stat_finalize(basic_debug);
234 atomic_store_explicit(&stage, 3, memory_order_relaxed);
235
236 SET_PC(vcpu, activate_debug);
237 expect_hvc(vcpu, exit, 0);
238 SET_PC(vcpu, hvc_loop);
239 T_STAT_MEASURE_LOOP(baseline_debug) {
240 hv_vcpu_run(vcpu);
241 }
242 dt_stat_finalize(baseline_debug);
243
244 T_STAT_MEASURE_LOOP(debug_debug) {
245 hv_vcpu_run(vcpu);
246 switch_and_return(true);
247 }
248 dt_stat_finalize(debug_debug);
249 atomic_store_explicit(&stage, 4, memory_order_relaxed);
250
251 T_ASSERT_MACH_SUCCESS(semaphore_signal(sem1), "final signal to follower");
252
253 return NULL;
254}
255
256static void *
257vcpu_switch_follower(void *arg __unused, hv_vcpu_t vcpu, hv_vcpu_exit_t *exit)
258{
259 bind_to_cpu(0);
260
261 // Don't signal until we've been signaled once.
262 T_ASSERT_MACH_SUCCESS(semaphore_wait(sem1),
263 "wait for first signal from leader");
264
265 // For a baseline, don't enter the VCPU at all. This should result in a
266 // negligible VCPU switch cost.
267 while (atomic_load_explicit(&stage, memory_order_relaxed) == 0) {
268 switch_and_return(false);
269 }
270
271 // Enter the VCPU once to activate a minimal amount of state.
272 SET_PC(vcpu, hvc_loop);
273 expect_hvc(vcpu, exit, 0);
274
275 while (atomic_load_explicit(&stage, memory_order_relaxed) == 1) {
276 hv_vcpu_run(vcpu);
277 switch_and_return(false);
278 }
279
280 // Use debug state
281 SET_PC(vcpu, activate_debug);
282 expect_hvc(vcpu, exit, 0);
283 SET_PC(vcpu, hvc_loop);
284
285 while (atomic_load_explicit(&stage, memory_order_relaxed) == 2) {
286 hv_vcpu_run(vcpu);
287 switch_and_return(false);
288 }
289
290 while (atomic_load_explicit(&stage, memory_order_relaxed) == 3) {
291 hv_vcpu_run(vcpu);
292 switch_and_return(false);
293 }
294
295 return NULL;
296}
297
298T_DECL(vcpu_switch_benchmark, "vcpu state-switching benchmarks",
299 T_META_BOOTARGS_SET("enable_skstb=1"))
300{
301 bind_to_cpu(0);
302
303 T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &sem1,
304 SYNC_POLICY_FIFO, 0), "semaphore_create 1");
305 T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &sem2,
306 SYNC_POLICY_FIFO, 0), "semaphore_create 2");
307
308 vm_setup();
309 pthread_t vcpu1_thread = create_vcpu_thread(hvc_loop, 0,
310 vcpu_switch_leader, NULL);
311 pthread_t vcpu2_thread = create_vcpu_thread(hvc_loop, 0,
312 vcpu_switch_follower, NULL);
313
314 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu1_thread, NULL), "join vcpu1");
315 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu2_thread, NULL), "join vcpu2");
316
317 vm_cleanup();
318}
319
320struct thread_params {
321 uint32_t id;
322 uint32_t iter;
323 pthread_t thread;
324};
325
326static void *
327run_cancel_monitor(void *arg, hv_vcpu_t vcpu, hv_vcpu_exit_t *exit __unused)
328{
329 struct thread_params *param = (struct thread_params *)arg;
330 dt_stat_time_t s = dt_stat_time_create("hv_vcpus_exit time vcpu%u",
331 param->id);
332 while (!dt_stat_stable(s)) {
333 dt_stat_token start = dt_stat_time_begin(s);
334 for (uint32_t i = 0; i < param->iter; i++) {
335 hv_vcpus_exit(&vcpu, 1);
336 }
337 dt_stat_time_end_batch(s, (int)param->iter, start);
338 }
339 dt_stat_finalize(s);
340 return NULL;
341}
342
343static void
344run_cancel_call(uint32_t vcpu_count, uint32_t iter)
345{
346 struct thread_params *threads = calloc(vcpu_count, sizeof(*threads));
347 vm_setup();
348 for (uint32_t i = 0; i < vcpu_count; i++) {
349 threads[i].id = i;
350 threads[i].iter = iter;
351 threads[i].thread = create_vcpu_thread(hvc_loop, 0, run_cancel_monitor,
352 &threads[i]);
353 }
354 for (uint32_t i = 0; i < vcpu_count; i++) {
355 T_ASSERT_POSIX_SUCCESS(pthread_join(threads[i].thread, NULL),
356 "join vcpu%u", i);
357 }
358 free(threads);
359 vm_cleanup();
360}
361
362T_DECL(api_benchmarks, "API call parallel performance")
363{
364 run_cancel_call(1, 1000);
365 run_cancel_call(4, 1000);
366}