]> git.saurik.com Git - apple/libpthread.git/blob - tests/perf_contended_mutex_rwlock.c
libpthread-454.100.8.tar.gz
[apple/libpthread.git] / tests / perf_contended_mutex_rwlock.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <stdatomic.h>
4 #include <math.h>
5 #include <unistd.h>
6 #include <sys/sysctl.h>
7 #include <mach/mach.h>
8 #include <pthread.h>
9 #include <pthread/pthread_spis.h>
10 #include <os/lock.h>
11 #include <darwintest.h>
12
13 // number of times the lock is taken per dt_stat batch
14 #define ITERATIONS_PER_DT_STAT_BATCH 10000ull
15 // number of times the contended mutex is taken per dt_stat batch
16 #define ITERATIONS_PER_DT_STAT_BATCH_CONTENDED_MUTEX 1000ull
17 // shift determining power of 2 factor of time spent by worker threads in the
18 // busy() function while outside of the lock vs inside the lock
19 #define OUTER_VS_INNER_SHIFT 4
20 // fraction of read lock vs write lock acquires
21 #define RDLOCK_FRACTION 0.99f
22 // maintain and print progress counters in between measurement batches
23 #define COUNTERS 0
24
25 // move the darwintest assertion code out of the straight line execution path
26 // since it is has non-trivial overhead and codegen impact even if the assertion
27 // is never triggered.
28 #define iferr(_e) if(__builtin_expect(!!(_e), 0))
29
30 #pragma mark -
31
32 uint64_t
33 random_busy_counts(unsigned int *seed, uint64_t *inner, uint64_t *outer)
34 {
35 uint64_t random = rand_r(seed);
36 const uint64_t of = (1 << OUTER_VS_INNER_SHIFT);
37 *inner = 0x4 + (random & (0x10 - 1));
38 *outer = 0x4 * of + ((random >> 4) & (0x10 * of - 1));
39 return random;
40 }
41
42 // By default busy() does cpu busy work for a passed in number of iterations
43 enum {
44 busy_is_nothing = 0,
45 busy_is_cpu_busy,
46 busy_is_cpu_yield,
47 };
48 static int busy_select = busy_is_cpu_busy;
49
50 static double
51 cpu_busy(uint64_t n)
52 {
53 double d = M_PI;
54 uint64_t i;
55 for (i = 0; i < n; i++) d *= M_PI;
56 return d;
57 }
58
59 static double
60 cpu_yield(uint64_t n)
61 {
62 uint64_t i;
63 for (i = 0; i < n; i++) {
64 #if defined(__arm__) || defined(__arm64__)
65 asm volatile("yield");
66 #elif defined(__x86_64__) || defined(__i386__)
67 asm volatile("pause");
68 #else
69 #error Unrecognized architecture
70 #endif
71 }
72 return 0;
73 }
74
75 __attribute__((noinline))
76 static double
77 busy(uint64_t n)
78 {
79 switch(busy_select) {
80 case busy_is_cpu_busy:
81 return cpu_busy(n);
82 case busy_is_cpu_yield:
83 return cpu_yield(n);
84 default:
85 return 0;
86 }
87 }
88
89 #pragma mark -
90
91 static semaphore_t ready_sem, start_sem, end_sem;
92 static uint32_t nthreads;
93 static _Atomic uint32_t active_thr;
94 static _Atomic int64_t todo;
95 uint64_t iterations_per_dt_stat_batch = ITERATIONS_PER_DT_STAT_BATCH;
96
97 #if COUNTERS
98 static _Atomic uint64_t total_locks, total_rdlocks, total_wrlocks;
99 #define ctr_inc(_t) atomic_fetch_add_explicit(&(_t), 1, memory_order_relaxed)
100 #else
101 #define ctr_inc(_t)
102 #endif
103
104 static uint32_t
105 ncpu(void)
106 {
107 static uint32_t activecpu, physicalcpu;
108 if (!activecpu) {
109 uint32_t n;
110 size_t s = sizeof(n);
111 sysctlbyname("hw.activecpu", &n, &s, NULL, 0);
112 activecpu = n;
113 s = sizeof(n);
114 sysctlbyname("hw.physicalcpu", &n, &s, NULL, 0);
115 physicalcpu = n;
116 }
117 return MIN(activecpu, physicalcpu);
118 }
119
120 __attribute__((noinline))
121 static void
122 threaded_bench(dt_stat_time_t s, int batch_size)
123 {
124 kern_return_t kr;
125 for (int i = 0; i < nthreads; i++) {
126 kr = semaphore_wait(ready_sem);
127 iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");}
128 }
129 atomic_init(&active_thr, nthreads);
130 atomic_init(&todo, batch_size * iterations_per_dt_stat_batch);
131 dt_stat_token t = dt_stat_begin(s);
132 kr = semaphore_signal_all(start_sem);
133 iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");}
134 kr = semaphore_wait(end_sem);
135 iferr (kr) {T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");}
136 dt_stat_end_batch(s, batch_size, t);
137 }
138
139 static void
140 setup_threaded_bench(void* (*thread_fn)(void*), bool singlethreaded)
141 {
142 kern_return_t kr;
143 int r;
144 char *e;
145
146 if (singlethreaded) {
147 nthreads = 1;
148 } else {
149 if ((e = getenv("DT_STAT_NTHREADS"))) nthreads = strtoul(e, NULL, 0);
150 if (nthreads < 2) nthreads = ncpu();
151 }
152 if ((e = getenv("DT_STAT_CPU_BUSY"))) busy_select = strtoul(e, NULL, 0);
153
154 kr = semaphore_create(mach_task_self(), &ready_sem, SYNC_POLICY_FIFO, 0);
155 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
156 kr = semaphore_create(mach_task_self(), &start_sem, SYNC_POLICY_FIFO, 0);
157 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
158 kr = semaphore_create(mach_task_self(), &end_sem, SYNC_POLICY_FIFO, 0);
159 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
160
161 pthread_attr_t attr;
162 r = pthread_attr_init(&attr);
163 T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_attr_init");
164 r = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
165 T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_attr_setdetachstate");
166
167 for (int i = 0; i < nthreads; i++) {
168 pthread_t th;
169 r = pthread_create(&th, &attr, thread_fn, (void *)(uintptr_t)(i+1));
170 T_QUIET; T_ASSERT_POSIX_ZERO(r, "pthread_create");
171 }
172 }
173
174 #pragma mark -
175
176 static pthread_mutex_t mutex;
177
178 static void *
179 mutex_bench_thread(void * arg)
180 {
181 kern_return_t kr;
182 int r;
183 unsigned int seed;
184 volatile double dummy;
185
186 restart:
187 seed = (uintptr_t)arg; // each thread repeats its own sequence
188 kr = semaphore_wait_signal(start_sem, ready_sem);
189 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
190
191 while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
192 uint64_t inner, outer;
193 random_busy_counts(&seed, &inner, &outer);
194 dummy = busy(outer);
195 r = pthread_mutex_lock(&mutex);
196 iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_lock");}
197 dummy = busy(inner);
198 r = pthread_mutex_unlock(&mutex);
199 iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_unlock");}
200 ctr_inc(total_locks);
201 }
202
203 if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
204 kr = semaphore_signal(end_sem);
205 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
206 }
207 goto restart;
208 }
209
210 static void
211 mutex_bench(bool singlethreaded)
212 {
213 int r;
214 int batch_size;
215 #if COUNTERS
216 uint64_t batch = 0;
217 #endif
218
219 setup_threaded_bench(mutex_bench_thread, singlethreaded);
220
221 pthread_mutexattr_t attr;
222 r = pthread_mutexattr_init(&attr);
223 T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_init");
224 pthread_mutexattr_setpolicy_np(&attr, _PTHREAD_MUTEX_POLICY_FAIRSHARE);
225 T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_setpolicy_np");
226 r = pthread_mutex_init(&mutex, &attr);
227 T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_init");
228
229 dt_stat_time_t s = dt_stat_time_create("%llu pthread_mutex_lock & "
230 "pthread_mutex_unlock (fairshare) on %u thread%s",
231 iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
232 do {
233 batch_size = dt_stat_batch_size(s);
234 threaded_bench(s, batch_size);
235 #if COUNTERS
236 fprintf(stderr, "\rbatch: %4llu\t size: %4d\tmutexes: %8llu",
237 ++batch, batch_size,
238 atomic_load_explicit(&total_locks, memory_order_relaxed));
239 #endif
240 } while (!dt_stat_stable(s));
241 #if COUNTERS
242 fprintf(stderr, "\n");
243 #endif
244 dt_stat_finalize(s);
245 }
246
247 T_DECL(perf_uncontended_mutex_bench, "Uncontended fairshare mutex",
248 T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
249 T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
250 {
251 mutex_bench(true);
252 }
253
254 T_DECL(perf_contended_mutex_bench, "Contended fairshare mutex",
255 T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
256 T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
257 {
258 iterations_per_dt_stat_batch = ITERATIONS_PER_DT_STAT_BATCH_CONTENDED_MUTEX;
259 mutex_bench(false);
260 }
261
262 #pragma mark -
263
264 static pthread_rwlock_t rwlock;
265
266 static void *
267 rwlock_bench_thread(void * arg)
268 {
269 kern_return_t kr;
270 int r;
271 unsigned int seed;
272 volatile double dummy;
273 const uint64_t rand_rdlock_max = (double)RAND_MAX * RDLOCK_FRACTION;
274
275 restart:
276 seed = (uintptr_t)arg; // each thread repeats its own sequence
277 kr = semaphore_wait_signal(start_sem, ready_sem);
278 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
279
280 while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
281 uint64_t inner, outer;
282 uint64_t random = random_busy_counts(&seed, &inner, &outer);
283 dummy = busy(outer);
284 if (random < rand_rdlock_max) {
285 r = pthread_rwlock_rdlock(&rwlock);
286 iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_rdlock");}
287 dummy = busy(inner);
288 r = pthread_rwlock_unlock(&rwlock);
289 iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_unlock");}
290 ctr_inc(total_rdlocks);
291 } else {
292 r = pthread_rwlock_wrlock(&rwlock);
293 iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_wrlock");}
294 dummy = busy(inner);
295 r = pthread_rwlock_unlock(&rwlock);
296 iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_unlock");}
297 ctr_inc(total_wrlocks);
298 }
299 ctr_inc(total_locks);
300 }
301
302 if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
303 kr = semaphore_signal(end_sem);
304 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
305 }
306 goto restart;
307 }
308
309 static void
310 rwlock_bench(bool singlethreaded)
311 {
312 int r;
313 int batch_size;
314 #if COUNTERS
315 uint64_t batch = 0;
316 #endif
317
318 setup_threaded_bench(rwlock_bench_thread, singlethreaded);
319
320 r = pthread_rwlock_init(&rwlock, NULL);
321 T_QUIET; T_ASSERT_POSIX_ZERO(r, "rwlock_init");
322
323 dt_stat_time_t s = dt_stat_time_create("%llu pthread_rwlock_rd/wrlock & "
324 "pthread_rwlock_unlock (%.0f%% rdlock) on %u thread%s",
325 iterations_per_dt_stat_batch, RDLOCK_FRACTION * 100, nthreads,
326 nthreads > 1 ? "s" : "");
327 do {
328 batch_size = dt_stat_batch_size(s);
329 threaded_bench(s, batch_size);
330 #if COUNTERS
331 fprintf(stderr, "\rbatch: %4llu\t size: %4d\trwlocks: %8llu\t"
332 "rd: %8llu\twr: %8llu", ++batch, batch_size,
333 atomic_load_explicit(&total_locks, memory_order_relaxed),
334 atomic_load_explicit(&total_rdlocks, memory_order_relaxed),
335 atomic_load_explicit(&total_wrlocks, memory_order_relaxed));
336 #endif
337 } while (!dt_stat_stable(s));
338 #if COUNTERS
339 fprintf(stderr, "\n");
340 #endif
341 dt_stat_finalize(s);
342 }
343
344 T_DECL(perf_uncontended_rwlock_bench, "Uncontended rwlock",
345 T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
346 T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
347 {
348 rwlock_bench(true);
349 }
350
351 T_DECL(perf_contended_rwlock_bench, "Contended rwlock",
352 T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
353 T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
354 {
355 rwlock_bench(false);
356 }
357
358 #pragma mark -
359
360 static os_unfair_lock unfair_lock;
361
362 static void *
363 unfair_lock_bench_thread(void * arg)
364 {
365 kern_return_t kr;
366 unsigned int seed;
367 volatile double dummy;
368
369 restart:
370 seed = (uintptr_t)arg; // each thread repeats its own sequence
371 kr = semaphore_wait_signal(start_sem, ready_sem);
372 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
373
374 while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
375 uint64_t inner, outer;
376 random_busy_counts(&seed, &inner, &outer);
377 dummy = busy(outer);
378 os_unfair_lock_lock(&unfair_lock);
379 dummy = busy(inner);
380 os_unfair_lock_unlock(&unfair_lock);
381 ctr_inc(total_locks);
382 }
383
384 if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
385 kr = semaphore_signal(end_sem);
386 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
387 }
388 goto restart;
389 }
390
391 static void
392 unfair_lock_bench(bool singlethreaded)
393 {
394 int r;
395 int batch_size;
396 #if COUNTERS
397 uint64_t batch = 0;
398 #endif
399
400 setup_threaded_bench(unfair_lock_bench_thread, singlethreaded);
401
402 dt_stat_time_t s = dt_stat_time_create("%llu os_unfair_lock_lock & "
403 "os_unfair_lock_unlock on %u thread%s",
404 iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
405 do {
406 batch_size = dt_stat_batch_size(s);
407 threaded_bench(s, batch_size);
408 #if COUNTERS
409 fprintf(stderr, "\rbatch: %4llu\t size: %4d\tunfair_locks: %8llu",
410 ++batch, batch_size,
411 atomic_load_explicit(&total_locks, memory_order_relaxed));
412 #endif
413 } while (!dt_stat_stable(s));
414 #if COUNTERS
415 fprintf(stderr, "\n");
416 #endif
417 dt_stat_finalize(s);
418 }
419
420 T_DECL(perf_uncontended_unfair_lock_bench, "Unontended unfair lock",
421 T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
422 T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
423 {
424 unfair_lock_bench(true);
425 }
426
427 T_DECL(perf_contended_unfair_lock_bench, "Contended unfair lock",
428 T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
429 T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
430 {
431 unfair_lock_bench(false);
432 }
433
434 #pragma mark -
435
436 static pthread_mutex_t ffmutex;
437
438 static void *
439 ffmutex_bench_thread(void * arg)
440 {
441 kern_return_t kr;
442 int r;
443 unsigned int seed;
444 volatile double dummy;
445
446 restart:
447 seed = (uintptr_t)arg; // each thread repeats its own sequence
448 kr = semaphore_wait_signal(start_sem, ready_sem);
449 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
450
451 while (atomic_fetch_sub_explicit(&todo, 1, memory_order_relaxed) > 0) {
452 uint64_t inner, outer;
453 random_busy_counts(&seed, &inner, &outer);
454 dummy = busy(outer);
455 r = pthread_mutex_lock(&ffmutex);
456 iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_lock");}
457 dummy = busy(inner);
458 r = pthread_mutex_unlock(&ffmutex);
459 iferr (r) {T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_unlock");}
460 ctr_inc(total_locks);
461 }
462
463 if (atomic_fetch_sub_explicit(&active_thr, 1, memory_order_relaxed) == 1) {
464 kr = semaphore_signal(end_sem);
465 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal");
466 }
467 goto restart;
468 }
469
470 static void
471 ffmutex_bench(bool singlethreaded)
472 {
473 int r;
474 int batch_size;
475 #if COUNTERS
476 uint64_t batch = 0;
477 #endif
478
479 setup_threaded_bench(ffmutex_bench_thread, singlethreaded);
480
481 pthread_mutexattr_t attr;
482 r = pthread_mutexattr_init(&attr);
483 T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_init");
484 pthread_mutexattr_setpolicy_np(&attr, _PTHREAD_MUTEX_POLICY_FIRSTFIT);
485 T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutexattr_setpolicy_np");
486 r = pthread_mutex_init(&ffmutex, &attr);
487 T_QUIET; T_ASSERT_POSIX_ZERO(r, "mutex_init");
488
489 dt_stat_time_t s = dt_stat_time_create("%llu pthread_mutex_lock & "
490 "pthread_mutex_unlock (first-fit) on %u thread%s",
491 iterations_per_dt_stat_batch, nthreads, nthreads > 1 ? "s" : "");
492 do {
493 batch_size = dt_stat_batch_size(s);
494 threaded_bench(s, batch_size);
495 #if COUNTERS
496 fprintf(stderr, "\rbatch: %4llu\t size: %4d\tffmutexes: %8llu",
497 ++batch, batch_size,
498 atomic_load_explicit(&total_locks, memory_order_relaxed));
499 #endif
500 } while (!dt_stat_stable(s));
501 #if COUNTERS
502 fprintf(stderr, "\n");
503 #endif
504 dt_stat_finalize(s);
505 }
506
507 T_DECL(perf_uncontended_ffmutex_bench, "Uncontended first-fit mutex",
508 T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
509 T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
510 {
511 ffmutex_bench(true);
512 }
513
514 T_DECL(perf_contended_ffmutex_bench, "Contended first-fit mutex",
515 T_META_TYPE_PERF, T_META_ALL_VALID_ARCHS(NO),
516 T_META_LTEPHASE(LTE_POSTINIT), T_META_CHECK_LEAKS(false))
517 {
518 ffmutex_bench(false);
519 }