1 #include "hvtest_arm64.h"
2 #include "hvtest_guest.h"
5 #include <darwintest.h>
6 #include <darwintest_perf.h>
12 T_META_NAMESPACE("xnu.arm.hv"),
13 T_META_REQUIRES_SYSCTL_EQ("kern.hv_support", 1),
14 // Temporary workaround for not providing an x86_64 slice
15 T_META_REQUIRES_SYSCTL_EQ("hw.optional.arm64", 1)
18 #define SET_PC(vcpu, symbol) \
20 vcpu_entry_function entry = ptrauth_strip(&symbol, 0); \
21 uint64_t entry_addr = (uintptr_t)entry; \
22 (void)hv_vcpu_set_reg(vcpu, HV_REG_PC, entry_addr); \
25 // Note that expect_*(), set_reg(), and get_reg() cannot be used in benchmarks,
26 // as the T_ASSERT() checks they perform are severely detrimental to results.
28 // The helpers below should be used in their place.
31 quick_bump_pc(hv_vcpu_t vcpu
, const bool forward
)
34 (void)hv_vcpu_get_reg(vcpu
, HV_REG_PC
, &pc
);
35 pc
= forward
? pc
+ 4 : pc
- 4;
36 (void)hv_vcpu_set_reg(vcpu
, HV_REG_PC
, pc
);
40 vtimer_benchmark(hv_vcpu_t vcpu
, hv_vcpu_exit_t
*exit
)
42 dt_stat_thread_cycles_t stat
= dt_stat_thread_cycles_create(
43 "VTimer interruption");
44 SET_PC(vcpu
, spin_vcpu_entry
);
45 set_sys_reg(vcpu
, HV_SYS_REG_CNTV_CVAL_EL0
, 0);
46 set_sys_reg(vcpu
, HV_SYS_REG_CNTV_CTL_EL0
, 1);
47 // Dry-run twice to ensure that the timer is re-armed.
48 run_to_next_vm_fault(vcpu
, exit
);
49 T_ASSERT_EQ_UINT(exit
->reason
, HV_EXIT_REASON_VTIMER_ACTIVATED
,
51 hv_vcpu_set_vtimer_mask(vcpu
, false);
52 run_to_next_vm_fault(vcpu
, exit
);
53 T_ASSERT_EQ_UINT(exit
->reason
, HV_EXIT_REASON_VTIMER_ACTIVATED
,
55 hv_vcpu_set_vtimer_mask(vcpu
, false);
56 T_STAT_MEASURE_LOOP(stat
) {
58 hv_vcpu_set_vtimer_mask(vcpu
, false);
60 dt_stat_finalize(stat
);
61 // Disable the timer before running other benchmarks, otherwise they will be
63 set_sys_reg(vcpu
, HV_SYS_REG_CNTV_CTL_EL0
, 0);
67 trap_benchmark(dt_stat_thread_cycles_t trap_stat
, hv_vcpu_t vcpu
,
68 hv_vcpu_exit_t
*exit
, const uint64_t batch
, const bool increment_pc
)
70 while (!dt_stat_stable(trap_stat
)) {
71 set_reg(vcpu
, HV_REG_X0
, batch
);
72 dt_stat_token start
= dt_stat_thread_cycles_begin(trap_stat
);
73 for (uint32_t i
= 0; i
< batch
; i
++) {
76 quick_bump_pc(vcpu
, true);
79 dt_stat_thread_cycles_end_batch(trap_stat
, (int)batch
, start
);
80 expect_hvc(vcpu
, exit
, 2);
82 dt_stat_finalize(trap_stat
);
86 mrs_bench_kernel(hv_vcpu_t vcpu
, hv_vcpu_exit_t
*exit
, const char *name
)
88 const uint64_t batch
= 1000;
89 SET_PC(vcpu
, mrs_actlr_bench_loop
);
90 set_control(vcpu
, _HV_CONTROL_FIELD_HCR
,
91 get_control(vcpu
, _HV_CONTROL_FIELD_HCR
) & ~HCR_TACR
);
92 dt_stat_thread_cycles_t stat
= dt_stat_thread_cycles_create(name
);
93 while (!dt_stat_stable(stat
)) {
94 set_reg(vcpu
, HV_REG_X0
, batch
);
95 dt_stat_token start
= dt_stat_thread_cycles_begin(stat
);
97 dt_stat_thread_cycles_end_batch(stat
, (int)batch
, start
);
98 T_QUIET
; T_ASSERT_EQ_UINT(exit
->reason
, HV_EXIT_REASON_EXCEPTION
,
99 "check for exception");
100 T_QUIET
; T_ASSERT_EQ(exit
->exception
.syndrome
>> 26, 0x16,
103 dt_stat_finalize(stat
);
107 trap_bench_monitor(void *arg __unused
, hv_vcpu_t vcpu
, hv_vcpu_exit_t
*exit
)
109 // In all benchmark testcases using quick_run_vcpu(), dry run all guest code
110 // to fault in pages so that run_to_next_vm_fault() isn't needed while
111 // recording measurements.
113 vtimer_benchmark(vcpu
, exit
);
115 // dry-run hvc_bench_loop
116 SET_PC(vcpu
, hvc_bench_loop
);
117 set_reg(vcpu
, HV_REG_X0
, 1);
118 expect_hvc(vcpu
, exit
, 1);
119 expect_hvc(vcpu
, exit
, 2);
121 SET_PC(vcpu
, hvc_bench_loop
);
122 trap_benchmark(dt_stat_thread_cycles_create("HVC handled by VMM"),
123 vcpu
, exit
, 1000, false);
125 // dry-run data_abort_bench_loop
126 SET_PC(vcpu
, data_abort_bench_loop
);
127 set_reg(vcpu
, HV_REG_X0
, 1);
128 expect_trapped_store(vcpu
, exit
, get_reserved_start());
129 expect_hvc(vcpu
, exit
, 2);
131 SET_PC(vcpu
, data_abort_bench_loop
);
132 trap_benchmark(dt_stat_thread_cycles_create("data abort handled by VMM"),
133 vcpu
, exit
, 1000, true);
135 // dry-run mrs_actlr_bench_loop
136 SET_PC(vcpu
, mrs_actlr_bench_loop
);
137 set_reg(vcpu
, HV_REG_X0
, 1);
138 set_control(vcpu
, _HV_CONTROL_FIELD_HCR
,
139 get_control(vcpu
, _HV_CONTROL_FIELD_HCR
) & ~HCR_TACR
);
140 // Confirm no visible trap from MRS
141 expect_hvc(vcpu
, exit
, 2);
143 mrs_bench_kernel(vcpu
, exit
, "MRS trap handled by kernel");
145 SET_PC(vcpu
, mrs_actlr_bench_loop
);
146 set_reg(vcpu
, HV_REG_X0
, 1);
147 set_control(vcpu
, _HV_CONTROL_FIELD_HCR
,
148 get_control(vcpu
, _HV_CONTROL_FIELD_HCR
) | HCR_TACR
);
149 // Confirm MRS trap from test loop
150 expect_exception(vcpu
, exit
, 0x18);
151 quick_bump_pc(vcpu
, true);
152 expect_hvc(vcpu
, exit
, 2);
153 SET_PC(vcpu
, mrs_actlr_bench_loop
);
154 trap_benchmark(dt_stat_thread_cycles_create("MRS trap handled by VMM"),
155 vcpu
, exit
, 1000, true);
157 SET_PC(vcpu
, activate_debug
);
158 expect_hvc(vcpu
, exit
, 0);
160 SET_PC(vcpu
, hvc_bench_loop
);
161 trap_benchmark(dt_stat_thread_cycles_create(
162 "debug-enabled HVC handled by VMM"), vcpu
, exit
, 1000, false);
164 mrs_bench_kernel(vcpu
, exit
, "debug-enabled MRS trap handled by kernel");
169 T_DECL(trap_benchmark
, "trap-processing benchmark")
172 pthread_t vcpu_thread
= create_vcpu_thread(hvc_bench_loop
, 0,
173 trap_bench_monitor
, NULL
);
174 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu_thread
, NULL
), "join vcpu");
178 static semaphore_t sem1
;
179 static semaphore_t sem2
;
180 static _Atomic
uint32_t stage
;
183 switch_and_return(bool leader
)
185 // wait_semaphore, signal_semaphore
186 (void)semaphore_wait_signal(leader
? sem2
: sem1
, leader
? sem1
: sem2
);
190 vcpu_switch_leader(void *arg __unused
, hv_vcpu_t vcpu
, hv_vcpu_exit_t
*exit
)
192 dt_stat_thread_cycles_t baseline
= dt_stat_thread_cycles_create(
193 "baseline VCPU run, no switch");
194 dt_stat_thread_cycles_t thread
= dt_stat_thread_cycles_create(
195 "VCPU-thread switch");
196 dt_stat_thread_cycles_t basic
= dt_stat_thread_cycles_create(
197 "basic VCPU-VCPU switch");
198 dt_stat_thread_cycles_t baseline_debug
= dt_stat_thread_cycles_create(
199 "baseline debug-enabled VCPU run, no switch");
200 dt_stat_thread_cycles_t basic_debug
= dt_stat_thread_cycles_create(
201 "basic VCPU <-> debug-enabled VCPU switch");
202 dt_stat_thread_cycles_t debug_debug
= dt_stat_thread_cycles_create(
203 "debug-enabled VCPU <-> debug-enabled VCPU switch");
207 // Activate minimal VCPU state
208 SET_PC(vcpu
, hvc_loop
);
209 expect_hvc(vcpu
, exit
, 0);
210 T_STAT_MEASURE_LOOP(baseline
) {
213 dt_stat_finalize(baseline
);
215 T_STAT_MEASURE_LOOP(thread
) {
217 switch_and_return(true);
219 dt_stat_finalize(thread
);
220 atomic_store_explicit(&stage
, 1, memory_order_relaxed
);
222 T_STAT_MEASURE_LOOP(basic
) {
224 switch_and_return(true);
226 dt_stat_finalize(basic
);
227 atomic_store_explicit(&stage
, 2, memory_order_relaxed
);
229 T_STAT_MEASURE_LOOP(basic_debug
) {
231 switch_and_return(true);
233 dt_stat_finalize(basic_debug
);
234 atomic_store_explicit(&stage
, 3, memory_order_relaxed
);
236 SET_PC(vcpu
, activate_debug
);
237 expect_hvc(vcpu
, exit
, 0);
238 SET_PC(vcpu
, hvc_loop
);
239 T_STAT_MEASURE_LOOP(baseline_debug
) {
242 dt_stat_finalize(baseline_debug
);
244 T_STAT_MEASURE_LOOP(debug_debug
) {
246 switch_and_return(true);
248 dt_stat_finalize(debug_debug
);
249 atomic_store_explicit(&stage
, 4, memory_order_relaxed
);
251 T_ASSERT_MACH_SUCCESS(semaphore_signal(sem1
), "final signal to follower");
257 vcpu_switch_follower(void *arg __unused
, hv_vcpu_t vcpu
, hv_vcpu_exit_t
*exit
)
261 // Don't signal until we've been signaled once.
262 T_ASSERT_MACH_SUCCESS(semaphore_wait(sem1
),
263 "wait for first signal from leader");
265 // For a baseline, don't enter the VCPU at all. This should result in a
266 // negligible VCPU switch cost.
267 while (atomic_load_explicit(&stage
, memory_order_relaxed
) == 0) {
268 switch_and_return(false);
271 // Enter the VCPU once to activate a minimal amount of state.
272 SET_PC(vcpu
, hvc_loop
);
273 expect_hvc(vcpu
, exit
, 0);
275 while (atomic_load_explicit(&stage
, memory_order_relaxed
) == 1) {
277 switch_and_return(false);
281 SET_PC(vcpu
, activate_debug
);
282 expect_hvc(vcpu
, exit
, 0);
283 SET_PC(vcpu
, hvc_loop
);
285 while (atomic_load_explicit(&stage
, memory_order_relaxed
) == 2) {
287 switch_and_return(false);
290 while (atomic_load_explicit(&stage
, memory_order_relaxed
) == 3) {
292 switch_and_return(false);
298 T_DECL(vcpu_switch_benchmark
, "vcpu state-switching benchmarks",
299 T_META_BOOTARGS_SET("enable_skstb=1"))
303 T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &sem1
,
304 SYNC_POLICY_FIFO
, 0), "semaphore_create 1");
305 T_ASSERT_MACH_SUCCESS(semaphore_create(mach_task_self(), &sem2
,
306 SYNC_POLICY_FIFO
, 0), "semaphore_create 2");
309 pthread_t vcpu1_thread
= create_vcpu_thread(hvc_loop
, 0,
310 vcpu_switch_leader
, NULL
);
311 pthread_t vcpu2_thread
= create_vcpu_thread(hvc_loop
, 0,
312 vcpu_switch_follower
, NULL
);
314 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu1_thread
, NULL
), "join vcpu1");
315 T_ASSERT_POSIX_SUCCESS(pthread_join(vcpu2_thread
, NULL
), "join vcpu2");
320 struct thread_params
{
327 run_cancel_monitor(void *arg
, hv_vcpu_t vcpu
, hv_vcpu_exit_t
*exit __unused
)
329 struct thread_params
*param
= (struct thread_params
*)arg
;
330 dt_stat_time_t s
= dt_stat_time_create("hv_vcpus_exit time vcpu%u",
332 while (!dt_stat_stable(s
)) {
333 dt_stat_token start
= dt_stat_time_begin(s
);
334 for (uint32_t i
= 0; i
< param
->iter
; i
++) {
335 hv_vcpus_exit(&vcpu
, 1);
337 dt_stat_time_end_batch(s
, (int)param
->iter
, start
);
344 run_cancel_call(uint32_t vcpu_count
, uint32_t iter
)
346 struct thread_params
*threads
= calloc(vcpu_count
, sizeof(*threads
));
348 for (uint32_t i
= 0; i
< vcpu_count
; i
++) {
350 threads
[i
].iter
= iter
;
351 threads
[i
].thread
= create_vcpu_thread(hvc_loop
, 0, run_cancel_monitor
,
354 for (uint32_t i
= 0; i
< vcpu_count
; i
++) {
355 T_ASSERT_POSIX_SUCCESS(pthread_join(threads
[i
].thread
, NULL
),
362 T_DECL(api_benchmarks
, "API call parallel performance")
364 run_cancel_call(1, 1000);
365 run_cancel_call(4, 1000);