]>
Commit | Line | Data |
---|---|---|
f427ee49 A |
1 | #include "hvtest_arm64.h" |
2 | #include "hvtest_guest.h" | |
3 | ||
4 | #include <ptrauth.h> | |
5 | #include <darwintest.h> | |
6 | #include <darwintest_perf.h> | |
7 | #include <mach/mach.h> | |
8 | #include <stdatomic.h> | |
9 | #include <stdlib.h> | |
10 | ||
11 | T_GLOBAL_META( | |
12 | T_META_NAMESPACE("xnu.arm.hv"), | |
13 | T_META_REQUIRES_SYSCTL_EQ("kern.hv_support", 1), | |
14 | // Temporary workaround for not providing an x86_64 slice | |
15 | T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm64", 1) | |
16 | ); | |
17 | ||
18 | #define SET_PC(vcpu, symbol) \ | |
19 | { \ | |
20 | vcpu_entry_function entry = ptrauth_strip(&symbol, 0); \ | |
21 | uint64_t entry_addr = (uintptr_t)entry; \ | |
22 | (void)hv_vcpu_set_reg(vcpu, HV_REG_PC, entry_addr); \ | |
23 | } | |
24 | ||
25 | // Note that expect_*(), set_reg(), and get_reg() cannot be used in benchmarks, | |
26 | // as the T_ASSERT() checks they perform are severely detrimental to results. | |
27 | // | |
28 | // The helpers below should be used in their place. | |
29 | ||
30 | static void | |
31 | quick_bump_pc(hv_vcpu_t vcpu, const bool forward) | |
32 | { | |
33 | uint64_t pc; | |
34 | (void)hv_vcpu_get_reg(vcpu, HV_REG_PC, &pc); | |
35 | pc = forward ? pc + 4 : pc - 4; | |
36 | (void)hv_vcpu_set_reg(vcpu, HV_REG_PC, pc); | |
37 | } | |
38 | ||
39 | static void | |
40 | vtimer_benchmark(hv_vcpu_t vcpu, hv_vcpu_exit_t *exit) | |
41 | { | |
42 | dt_stat_thread_cycles_t stat = dt_stat_thread_cycles_create( | |
43 | "VTimer interruption"); | |
44 | SET_PC(vcpu, spin_vcpu_entry); | |
45 | set_sys_reg(vcpu, HV_SYS_REG_CNTV_CVAL_EL0, 0); | |
46 | set_sys_reg(vcpu, HV_SYS_REG_CNTV_CTL_EL0, 1); | |
47 | // Dry-run twice to ensure that the timer is re-armed. | |
48 | run_to_next_vm_fault(vcpu, exit); | |
49 | T_ASSERT_EQ_UINT(exit->reason, HV_EXIT_REASON_VTIMER_ACTIVATED, | |
50 | "check for timer"); | |
51 | hv_vcpu_set_vtimer_mask(vcpu, false); | |
52 | run_to_next_vm_fault(vcpu, exit); | |
53 | T_ASSERT_EQ_UINT(exit->reason, HV_EXIT_REASON_VTIMER_ACTIVATED, | |
54 | "check for timer"); | |
55 | hv_vcpu_set_vtimer_mask(vcpu, false); | |
56 | T_STAT_MEASURE_LOOP(stat) { | |
57 | hv_vcpu_run(vcpu); | |
58 | hv_vcpu_set_vtimer_mask(vcpu, false); | |
59 | } | |
60 | dt_stat_finalize(stat); | |
61 | // Disable the timer before running other benchmarks, otherwise they will be | |
62 | // interrupted. | |
63 | set_sys_reg(vcpu, HV_SYS_REG_CNTV_CTL_EL0, 0); | |
64 | } | |
65 | ||
66 | static void | |
67 | trap_benchmark(dt_stat_thread_cycles_t trap_stat, hv_vcpu_t vcpu, | |
68 | hv_vcpu_exit_t *exit, const uint64_t batch, const bool increment_pc) | |
69 | { | |
70 | while (!dt_stat_stable(trap_stat)) { | |
71 | set_reg(vcpu, HV_REG_X0, batch); | |
72 | dt_stat_token start = dt_stat_thread_cycles_begin(trap_stat); | |
73 | for (uint32_t i = 0; i < batch; i++) { | |
74 | hv_vcpu_run(vcpu); | |
75 | if (increment_pc) { | |
76 | quick_bump_pc(vcpu, true); | |
77 | } | |
78 | } | |
79 | dt_stat_thread_cycles_end_batch(trap_stat, (int)batch, start); | |
80 | expect_hvc(vcpu, exit, 2); | |
81 | } | |
82 | dt_stat_finalize(trap_stat); | |
83 | } | |
84 | ||
85 | static void | |
86 | mrs_bench_kernel(hv_vcpu_t vcpu, hv_vcpu_exit_t *exit, const char *name) | |
87 | { | |
88 | const uint64_t batch = 1000; | |
89 | SET_PC(vcpu, mrs_actlr_bench_loop); | |
90 | set_control(vcpu, _HV_CONTROL_FIELD_HCR, | |
91 | get_control(vcpu, _HV_CONTROL_FIELD_HCR) & ~HCR_TACR); | |
92 | dt_stat_thread_cycles_t stat = dt_stat_thread_cycles_create(name); | |
93 | while (!dt_stat_stable(stat)) { | |
94 | set_reg(vcpu, HV_REG_X0, batch); | |
95 | dt_stat_token start = dt_stat_thread_cycles_begin(stat); | |
96 | hv_vcpu_run(vcpu); | |
97 | dt_stat_thread_cycles_end_batch(stat, (int)batch, start); | |
98 | T_QUIET; T_ASSERT_EQ_UINT(exit->reason, HV_EXIT_REASON_EXCEPTION, | |
99 | "check for exception"); | |
100 | T_QUIET; T_ASSERT_EQ(exit->exception.syndrome >> 26, 0x16, | |
101 | "check for HVC64"); | |
102 | } | |
103 | dt_stat_finalize(stat); | |
104 | } | |
105 | ||
106 | static void * | |
107 | trap_bench_monitor(void *arg __unused, hv_vcpu_t vcpu, hv_vcpu_exit_t *exit) | |
108 | { | |
109 | // In all benchmark testcases using quick_run_vcpu(), dry run all guest code | |
110 | // to fault in pages so that run_to_next_vm_fault() isn't needed while | |
111 | // recording measurements. | |
112 | ||
113 | vtimer_benchmark(vcpu, exit); | |
114 | ||
115 | // dry-run hvc_bench_loop | |
116 | SET_PC(vcpu, hvc_bench_loop); | |
117 | set_reg(vcpu, HV_REG_X0, 1); | |
118 | expect_hvc(vcpu, exit, 1); | |
119 | expect_hvc(vcpu, exit, 2); | |
120 | ||
121 | SET_PC(vcpu, hvc_bench_loop); | |
122 | trap_benchmark(dt_stat_thread_cycles_create("HVC handled by VMM"), | |
123 | vcpu, exit, 1000, false); | |
124 | ||
125 | // dry-run data_abort_bench_loop | |
126 | SET_PC(vcpu, data_abort_bench_loop); | |
127 | set_reg(vcpu, HV_REG_X0, 1); | |
128 | expect_trapped_store(vcpu, exit, get_reserved_start()); | |
129 | expect_hvc(vcpu, exit, 2); | |
130 | ||
131 | SET_PC(vcpu, data_abort_bench_loop); | |
132 | trap_benchmark(dt_stat_thread_cycles_create("data abort handled by VMM"), | |
133 | vcpu, exit, 1000, true); | |
134 | ||
135 | // dry-run mrs_actlr_bench_loop | |
136 | SET_PC(vcpu, mrs_actlr_bench_loop); | |
137 | set_reg(vcpu, HV_REG_X0, 1); | |
138 | set_control(vcpu, _HV_CONTROL_FIELD_HCR, | |
139 | get_control(vcpu, _HV_CONTROL_FIELD_HCR) & ~HCR_TACR); | |
140 | // Confirm no visible trap from MRS | |
141 | expect_hvc(vcpu, exit, 2); | |
142 | ||
143 | mrs_bench_kernel(vcpu, exit, "MRS trap handled by kernel"); | |
144 | ||
145 | SET_PC(vcpu, mrs_actlr_bench_loop); | |
146 | set_reg(vcpu, HV_REG_X0, 1); | |
147 | set_control(vcpu, _HV_CONTROL_FIELD_HCR, | |
148 | get_control(vcpu, _HV_CONTROL_FIELD_HCR) | HCR_TACR); | |
149 | // Confirm MRS trap from test loop | |
150 | expect_exception(vcpu, exit, 0x18); | |
151 | quick_bump_pc(vcpu, true); | |
152 | expect_hvc(vcpu, exit, 2); | |
153 | SET_PC(vcpu, mrs_actlr_bench_loop); | |
154 | trap_benchmark(dt_stat_thread_cycles_create("MRS trap handled by VMM"), | |
155 | vcpu, exit, 1000, true); | |
156 | ||
157 | SET_PC(vcpu, activate_debug); | |
158 | expect_hvc(vcpu, exit, 0); | |
159 | ||
160 | SET_PC(vcpu, hvc_bench_loop); | |
161 | trap_benchmark(dt_stat_thread_cycles_create( | |
162 | "debug-enabled HVC handled by VMM"), vcpu, exit, 1000, false); | |
163 | ||
164 | mrs_bench_kernel(vcpu, exit, "debug-enabled MRS trap handled by kernel"); | |
165 | ||
166 | return NULL; | |
167 | } | |
168 | ||
169 | T_DECL(trap_benchmark, "trap-processing benchmark") | |
170 | { | |
171 | vm_setup(); | |
172 | pthread_t vcpu_thread = create_vcpu_thread(hvc_bench_loop, 0, | |
173 | trap_bench_monitor, NULL); | |
174 | T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread, NULL), "join vcpu"); | |
175 | vm_cleanup(); | |
176 | } | |
177 | ||
178 | static semaphore_t sem1; | |
179 | static semaphore_t sem2; | |
180 | static _Atomic uint32_t stage; | |
181 | ||
182 | static void | |
183 | switch_and_return(bool leader) | |
184 | { | |
185 | // wait_semaphore, signal_semaphore | |
186 | (void)semaphore_wait_signal(leader ? sem2 : sem1, leader ? sem1 : sem2); | |
187 | } | |
188 | ||
189 | static void * | |
190 | vcpu_switch_leader(void *arg __unused, hv_vcpu_t vcpu, hv_vcpu_exit_t *exit) | |
191 | { | |
192 | dt_stat_thread_cycles_t baseline = dt_stat_thread_cycles_create( | |
193 | "baseline VCPU run, no switch"); | |
194 | dt_stat_thread_cycles_t thread = dt_stat_thread_cycles_create( | |
195 | "VCPU-thread switch"); | |
196 | dt_stat_thread_cycles_t basic = dt_stat_thread_cycles_create( | |
197 | "basic VCPU-VCPU switch"); | |
198 | dt_stat_thread_cycles_t baseline_debug = dt_stat_thread_cycles_create( | |
199 | "baseline debug-enabled VCPU run, no switch"); | |
200 | dt_stat_thread_cycles_t basic_debug = dt_stat_thread_cycles_create( | |
201 | "basic VCPU <-> debug-enabled VCPU switch"); | |
202 | dt_stat_thread_cycles_t debug_debug = dt_stat_thread_cycles_create( | |
203 | "debug-enabled VCPU <-> debug-enabled VCPU switch"); | |
204 | ||
205 | bind_to_cpu(0); | |
206 | ||
207 | // Activate minimal VCPU state | |
208 | SET_PC(vcpu, hvc_loop); | |
209 | expect_hvc(vcpu, exit, 0); | |
210 | T_STAT_MEASURE_LOOP(baseline) { | |
211 | hv_vcpu_run(vcpu); | |
212 | } | |
213 | dt_stat_finalize(baseline); | |
214 | ||
215 | T_STAT_MEASURE_LOOP(thread) { | |
216 | hv_vcpu_run(vcpu); | |
217 | switch_and_return(true); | |
218 | } | |
219 | dt_stat_finalize(thread); | |
220 | atomic_store_explicit(&stage, 1, memory_order_relaxed); | |
221 | ||
222 | T_STAT_MEASURE_LOOP(basic) { | |
223 | hv_vcpu_run(vcpu); | |
224 | switch_and_return(true); | |
225 | } | |
226 | dt_stat_finalize(basic); | |
227 | atomic_store_explicit(&stage, 2, memory_order_relaxed); | |
228 | ||
229 | T_STAT_MEASURE_LOOP(basic_debug) { | |
230 | hv_vcpu_run(vcpu); | |
231 | switch_and_return(true); | |
232 | } | |
233 | dt_stat_finalize(basic_debug); | |
234 | atomic_store_explicit(&stage, 3, memory_order_relaxed); | |
235 | ||
236 | SET_PC(vcpu, activate_debug); | |
237 | expect_hvc(vcpu, exit, 0); | |
238 | SET_PC(vcpu, hvc_loop); | |
239 | T_STAT_MEASURE_LOOP(baseline_debug) { | |
240 | hv_vcpu_run(vcpu); | |
241 | } | |
242 | dt_stat_finalize(baseline_debug); | |
243 | ||
244 | T_STAT_MEASURE_LOOP(debug_debug) { | |
245 | hv_vcpu_run(vcpu); | |
246 | switch_and_return(true); | |
247 | } | |
248 | dt_stat_finalize(debug_debug); | |
249 | atomic_store_explicit(&stage, 4, memory_order_relaxed); | |
250 | ||
251 | T_ASSERT_MACH_SUCCESS(semaphore_signal(sem1), "final signal to follower"); | |
252 | ||
253 | return NULL; | |
254 | } | |
255 | ||
256 | static void * | |
257 | vcpu_switch_follower(void *arg __unused, hv_vcpu_t vcpu, hv_vcpu_exit_t *exit) | |
258 | { | |
259 | bind_to_cpu(0); | |
260 | ||
261 | // Don't signal until we've been signaled once. | |
262 | T_ASSERT_MACH_SUCCESS(semaphore_wait(sem1), | |
263 | "wait for first signal from leader"); | |
264 | ||
265 | // For a baseline, don't enter the VCPU at all. This should result in a | |
266 | // negligible VCPU switch cost. | |
267 | while (atomic_load_explicit(&stage, memory_order_relaxed) == 0) { | |
268 | switch_and_return(false); | |
269 | } | |
270 | ||
271 | // Enter the VCPU once to activate a minimal amount of state. | |
272 | SET_PC(vcpu, hvc_loop); | |
273 | expect_hvc(vcpu, exit, 0); | |
274 | ||
275 | while (atomic_load_explicit(&stage, memory_order_relaxed) == 1) { | |
276 | hv_vcpu_run(vcpu); | |
277 | switch_and_return(false); | |
278 | } | |
279 | ||
280 | // Use debug state | |
281 | SET_PC(vcpu, activate_debug); | |
282 | expect_hvc(vcpu, exit, 0); | |
283 | SET_PC(vcpu, hvc_loop); | |
284 | ||
285 | while (atomic_load_explicit(&stage, memory_order_relaxed) == 2) { | |
286 | hv_vcpu_run(vcpu); | |
287 | switch_and_return(false); | |
288 | } | |
289 | ||
290 | while (atomic_load_explicit(&stage, memory_order_relaxed) == 3) { | |
291 | hv_vcpu_run(vcpu); | |
292 | switch_and_return(false); | |
293 | } | |
294 | ||
295 | return NULL; | |
296 | } | |
297 | ||
298 | T_DECL(vcpu_switch_benchmark, "vcpu state-switching benchmarks", | |
299 | T_META_BOOTARGS_SET("enable_skstb=1")) | |
300 | { | |
301 | bind_to_cpu(0); | |
302 | ||
303 | T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &sem1, | |
304 | SYNC_POLICY_FIFO, 0), "semaphore_create 1"); | |
305 | T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &sem2, | |
306 | SYNC_POLICY_FIFO, 0), "semaphore_create 2"); | |
307 | ||
308 | vm_setup(); | |
309 | pthread_t vcpu1_thread = create_vcpu_thread(hvc_loop, 0, | |
310 | vcpu_switch_leader, NULL); | |
311 | pthread_t vcpu2_thread = create_vcpu_thread(hvc_loop, 0, | |
312 | vcpu_switch_follower, NULL); | |
313 | ||
314 | T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu1_thread, NULL), "join vcpu1"); | |
315 | T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu2_thread, NULL), "join vcpu2"); | |
316 | ||
317 | vm_cleanup(); | |
318 | } | |
319 | ||
320 | struct thread_params { | |
321 | uint32_t id; | |
322 | uint32_t iter; | |
323 | pthread_t thread; | |
324 | }; | |
325 | ||
326 | static void * | |
327 | run_cancel_monitor(void *arg, hv_vcpu_t vcpu, hv_vcpu_exit_t *exit __unused) | |
328 | { | |
329 | struct thread_params *param = (struct thread_params *)arg; | |
330 | dt_stat_time_t s = dt_stat_time_create("hv_vcpus_exit time vcpu%u", | |
331 | param->id); | |
332 | while (!dt_stat_stable(s)) { | |
333 | dt_stat_token start = dt_stat_time_begin(s); | |
334 | for (uint32_t i = 0; i < param->iter; i++) { | |
335 | hv_vcpus_exit(&vcpu, 1); | |
336 | } | |
337 | dt_stat_time_end_batch(s, (int)param->iter, start); | |
338 | } | |
339 | dt_stat_finalize(s); | |
340 | return NULL; | |
341 | } | |
342 | ||
343 | static void | |
344 | run_cancel_call(uint32_t vcpu_count, uint32_t iter) | |
345 | { | |
346 | struct thread_params *threads = calloc(vcpu_count, sizeof(*threads)); | |
347 | vm_setup(); | |
348 | for (uint32_t i = 0; i < vcpu_count; i++) { | |
349 | threads[i].id = i; | |
350 | threads[i].iter = iter; | |
351 | threads[i].thread = create_vcpu_thread(hvc_loop, 0, run_cancel_monitor, | |
352 | &threads[i]); | |
353 | } | |
354 | for (uint32_t i = 0; i < vcpu_count; i++) { | |
355 | T_ASSERT_POSIX_SUCCESS(pthread_join(threads[i].thread, NULL), | |
356 | "join vcpu%u", i); | |
357 | } | |
358 | free(threads); | |
359 | vm_cleanup(); | |
360 | } | |
361 | ||
362 | T_DECL(api_benchmarks, "API call parallel performance") | |
363 | { | |
364 | run_cancel_call(1, 1000); | |
365 | run_cancel_call(4, 1000); | |
366 | } |