2 * Copyright (c) 2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 #include <sys/syscall.h>
33 #include <sys/types.h>
34 #include <sys/ptrace.h>
35 #include <semaphore.h>
42 #include <libkern/OSAtomic.h>
44 #include <mach/mach_time.h>
45 #include <mach/mach.h>
46 #include <mach/task.h>
47 #include <mach/semaphore.h>
49 typedef enum wake_type
{ WAKE_BROADCAST_ONESEM
, WAKE_BROADCAST_PERTHREAD
, WAKE_CHAIN
} wake_type_t
;
50 typedef enum my_policy_type
{ MY_POLICY_REALTIME
, MY_POLICY_TIMESHARE
, MY_POLICY_FIXEDPRI
} my_policy_type_t
;
52 #define assert(truth, label) do { if(!(truth)) { printf("Thread %p: failure on line %d\n", pthread_self(), __LINE__); goto label; } } while (0)
54 #define CONSTRAINT_NANOS (20000000ll) /* 20 ms */
55 #define COMPUTATION_NANOS (10000000ll) /* 10 ms */
56 #define TRACEWORTHY_NANOS (10000000ll) /* 10 ms */
59 #define debug_log(args...) printf(args)
61 #define debug_log(args...) do { } while(0)
65 void* child_thread_func(void *arg
);
67 int thread_setup(int my_id
);
68 my_policy_type_t
parse_thread_policy(const char *str
);
69 int thread_finish_iteration();
71 /* Global variables (general) */
73 wake_type_t g_waketype
;
76 struct mach_timebase_info g_mti
;
77 semaphore_t g_main_sem
;
78 uint64_t *g_thread_endtimes_abs
;
79 volatile int32_t g_done_threads
;
80 boolean_t g_do_spin
= FALSE
;
81 boolean_t g_verbose
= FALSE
;
82 boolean_t g_do_affinity
= FALSE
;
83 uint64_t g_starttime_abs
;
84 #if MIMIC_DIGI_LEAD_TIME
86 uint64_t g_spinlength_abs
;
87 #endif /* MIMIC_DIGI_LEAD_TIME */
89 /* Global variables (broadcast) */
90 semaphore_t g_machsem
;
91 semaphore_t g_leadersem
;
93 /* Global variables (chain) */
94 semaphore_t
*g_semarr
;
97 abs_to_nanos(uint64_t abstime
)
99 return (uint64_t)(abstime
* (((double)g_mti
.numer
) / ((double)g_mti
.denom
)));
103 nanos_to_abs(uint64_t ns
)
105 return (uint64_t)(ns
* (((double)g_mti
.denom
) / ((double)g_mti
.numer
)));
109 * Figure out what thread policy to use
112 parse_thread_policy(const char *str
)
114 if (strcmp(str
, "timeshare") == 0) {
115 return MY_POLICY_TIMESHARE
;
116 } else if (strcmp(str
, "realtime") == 0) {
117 return MY_POLICY_REALTIME
;
118 } else if (strcmp(str
, "fixed") == 0) {
119 return MY_POLICY_FIXEDPRI
;
121 printf("Invalid thread policy %s\n", str
);
127 * Figure out what wakeup pattern to use
130 parse_wakeup_pattern(const char *str
)
132 if (strcmp(str
, "chain") == 0) {
134 } else if (strcmp(str
, "broadcast-single-sem") == 0) {
135 return WAKE_BROADCAST_ONESEM
;
136 } else if (strcmp(str
, "broadcast-per-thread") == 0) {
137 return WAKE_BROADCAST_PERTHREAD
;
148 thread_setup(int my_id
)
153 case MY_POLICY_TIMESHARE
:
157 case MY_POLICY_REALTIME
:
159 thread_time_constraint_policy_data_t pol
;
161 /* Hard-coded realtime parameters (similar to what Digi uses) */
163 pol
.constraint
= nanos_to_abs(CONSTRAINT_NANOS
);
164 pol
.computation
= nanos_to_abs(COMPUTATION_NANOS
);
165 pol
.preemptible
= 0; /* Ignored by OS */
167 res
= thread_policy_set(mach_thread_self(), THREAD_TIME_CONSTRAINT_POLICY
, (thread_policy_t
) &pol
, THREAD_TIME_CONSTRAINT_POLICY_COUNT
);
168 assert(res
== 0, fail
);
171 case MY_POLICY_FIXEDPRI
:
173 thread_extended_policy_data_t pol
;
176 res
= thread_policy_set(mach_thread_self(), THREAD_EXTENDED_POLICY
, (thread_policy_t
) &pol
, THREAD_EXTENDED_POLICY_COUNT
);
177 assert(res
== 0, fail
);
182 printf("invalid policy type\n");
188 thread_affinity_policy_data_t affinity
;
190 affinity
.affinity_tag
= my_id
% 2;
192 res
= thread_policy_set(mach_thread_self(), THREAD_AFFINITY_POLICY
, (thread_policy_t
)&affinity
, THREAD_AFFINITY_POLICY_COUNT
);
193 assert(res
== 0, fail
);
202 * Wake up main thread if everyone's done
205 thread_finish_iteration(int id
)
209 volatile float x
= 0.0;
210 volatile float y
= 0.0;
212 debug_log("Thread %p finished iteration.\n", pthread_self());
214 #if MIMIC_DIGI_LEAD_TIME
216 * One randomly chosen thread determines when everybody gets to stop.
219 if (g_long_spinid
== id
) {
222 /* This thread took up fully half of his computation */
223 endspin
= g_starttime_abs
+ g_spinlength_abs
;
224 while (mach_absolute_time() < endspin
) {
230 #endif /* MIMIC_DIGI_LEAD_TIME */
232 new = OSAtomicIncrement32(&g_done_threads
);
234 debug_log("New value is %d\n", new);
237 * When the last thread finishes, everyone gets to go back to sleep.
239 if (new == g_numthreads
) {
240 debug_log("Thread %p signalling main thread.\n", pthread_self());
241 res
= semaphore_signal(g_main_sem
);
243 #ifndef MIMIC_DIGI_LEAD_TIME
245 while (g_done_threads
< g_numthreads
) {
257 * Wait for a wakeup, potentially wake up another of the "0-N" threads,
258 * and notify the main thread when done.
261 child_thread_func(void *arg
)
263 int my_id
= (int)(uintptr_t)arg
;
268 /* Set policy and so forth */
271 /* Tell main thread when everyone has set up */
272 new = OSAtomicIncrement32(&g_done_threads
);
273 if (new == g_numthreads
) {
274 semaphore_signal(g_main_sem
);
277 /* For each iteration */
278 for (i
= 0; i
< g_iterations
; i
++) {
280 * Leader thread either wakes everyone up or starts the chain going.
283 res
= semaphore_wait(g_leadersem
);
284 assert(res
== 0, fail
);
286 g_thread_endtimes_abs
[my_id
] = mach_absolute_time();
288 #if MIMIC_DIGI_LEAD_TIME
289 g_long_spinid
= rand() % g_numthreads
;
290 #endif /* MIMIC_DIGI_LEAD_TIME */
292 switch (g_waketype
) {
294 semaphore_signal(g_semarr
[my_id
+ 1]);
296 case WAKE_BROADCAST_ONESEM
:
297 semaphore_signal_all(g_machsem
);
299 case WAKE_BROADCAST_PERTHREAD
:
300 for (j
= 1; j
< g_numthreads
; j
++) {
301 semaphore_signal(g_semarr
[j
]);
305 printf("Invalid wakeup type?!\n");
310 * Everyone else waits to be woken up,
311 * records when she wake up, and possibly
315 case WAKE_BROADCAST_ONESEM
:
316 res
= semaphore_wait(g_machsem
);
317 assert(res
== KERN_SUCCESS
, fail
);
319 g_thread_endtimes_abs
[my_id
] = mach_absolute_time();
323 * For the chain wakeup case:
324 * wait, record time, signal next thread if appropriate
326 case WAKE_BROADCAST_PERTHREAD
:
327 res
= semaphore_wait(g_semarr
[my_id
]);
328 assert(res
== 0, fail
);
330 g_thread_endtimes_abs
[my_id
] = mach_absolute_time();
334 res
= semaphore_wait(g_semarr
[my_id
]);
335 assert(res
== 0, fail
);
337 g_thread_endtimes_abs
[my_id
] = mach_absolute_time();
339 if (my_id
< (g_numthreads
- 1)) {
340 res
= semaphore_signal(g_semarr
[my_id
+ 1]);
341 assert(res
== 0, fail
);
346 printf("Invalid wake type.\n");
351 res
= thread_finish_iteration(my_id
);
352 assert(res
== 0, fail
);
361 * Admittedly not very attractive.
366 printf("Usage: zn <num threads> <chain | broadcast-single-sem | broadcast-per-thread> <realtime | timeshare | fixed> <num iterations> [-trace <traceworthy latency in ns>] [-spin] [-affinity] [-verbose]\n");
370 * Given an array of uint64_t values, compute average, max, min, and standard deviation
373 compute_stats(uint64_t *values
, uint64_t count
, float *averagep
, uint64_t *maxp
, uint64_t *minp
, float *stddevp
)
378 uint64_t _min
= UINT64_MAX
;
382 for (i
= 0; i
< count
; i
++) {
384 _max
= values
[i
] > _max
? values
[i
] : _max
;
385 _min
= values
[i
] < _min
? values
[i
] : _min
;
388 _avg
= ((float)_sum
) / ((float)count
);
391 for (i
= 0; i
< count
; i
++) {
392 _dev
+= powf((((float)values
[i
]) - _avg
), 2);
405 main(int argc
, char **argv
)
410 uint64_t *worst_latencies_ns
;
411 uint64_t *worst_latencies_from_first_ns
;
414 uint64_t traceworthy_latency_ns
= TRACEWORTHY_NANOS
;
419 if (argc
< 5 || argc
> 9) {
424 /* How many threads? */
425 g_numthreads
= atoi(argv
[1]);
427 /* What wakeup pattern? */
428 g_waketype
= parse_wakeup_pattern(argv
[2]);
431 g_policy
= parse_thread_policy(argv
[3]);
434 g_iterations
= atoi(argv
[4]);
437 for (i
= 5; i
< argc
; i
++) {
438 if (strcmp(argv
[i
], "-spin") == 0) {
440 } else if (strcmp(argv
[i
], "-verbose") == 0) {
442 } else if ((strcmp(argv
[i
], "-trace") == 0) &&
444 traceworthy_latency_ns
= strtoull(argv
[++i
], NULL
, 10);
445 } else if (strcmp(argv
[i
], "-affinity") == 0) {
446 g_do_affinity
= TRUE
;
453 mach_timebase_info(&g_mti
);
455 #if MIMIC_DIGI_LEAD_TIME
456 g_spinlength_abs
= nanos_to_abs(COMPUTATION_NANOS
) / 2;
457 #endif /* MIMIC_DIGI_LEAD_TIME */
459 /* Arrays for threads and their wakeup times */
460 threads
= (pthread_t
*) malloc(sizeof(pthread_t
) * g_numthreads
);
461 assert(threads
, fail
);
463 g_thread_endtimes_abs
= (uint64_t*) malloc(sizeof(uint64_t) * g_numthreads
);
464 assert(g_thread_endtimes_abs
, fail
);
466 worst_latencies_ns
= (uint64_t*) malloc(sizeof(uint64_t) * g_iterations
);
467 assert(worst_latencies_ns
, fail
);
469 worst_latencies_from_first_ns
= (uint64_t*) malloc(sizeof(uint64_t) * g_iterations
);
470 assert(worst_latencies_from_first_ns
, fail
);
471 res
= semaphore_create(mach_task_self(), &g_main_sem
, SYNC_POLICY_FIFO
, 0);
472 assert(res
== KERN_SUCCESS
, fail
);
474 /* Either one big semaphore or one per thread */
475 if (g_waketype
== WAKE_CHAIN
|| g_waketype
== WAKE_BROADCAST_PERTHREAD
) {
476 g_semarr
= malloc(sizeof(semaphore_t
) * g_numthreads
);
477 assert(g_semarr
!= NULL
, fail
);
479 for (i
= 0; i
< g_numthreads
; i
++) {
480 res
= semaphore_create(mach_task_self(), &g_semarr
[i
], SYNC_POLICY_FIFO
, 0);
481 assert(res
== KERN_SUCCESS
, fail
);
484 g_leadersem
= g_semarr
[0];
486 res
= semaphore_create(mach_task_self(), &g_machsem
, SYNC_POLICY_FIFO
, 0);
487 assert(res
== KERN_SUCCESS
, fail
);
488 res
= semaphore_create(mach_task_self(), &g_leadersem
, SYNC_POLICY_FIFO
, 0);
489 assert(res
== KERN_SUCCESS
, fail
);
492 /* Create the threads */
494 for (i
= 0; i
< g_numthreads
; i
++) {
495 res
= pthread_create(&threads
[i
], NULL
, child_thread_func
, (void*)(uintptr_t)i
);
496 assert(res
== 0, fail
);
499 /* Let everyone get settled */
500 semaphore_wait(g_main_sem
);
504 for (i
= 0; i
< g_iterations
; i
++) {
506 uint64_t worst_abs
= 0, best_abs
= UINT64_MAX
;
511 g_starttime_abs
= mach_absolute_time();
514 semaphore_signal(g_leadersem
);
516 /* Wait for worker threads to finish */
517 semaphore_wait(g_main_sem
);
518 assert(res
== KERN_SUCCESS
, fail
);
521 * We report the worst latencies relative to start time
522 * and relative to the lead worker thread.
524 for (j
= 0; j
< g_numthreads
; j
++) {
525 uint64_t latency_abs
;
527 latency_abs
= g_thread_endtimes_abs
[j
] - g_starttime_abs
;
528 worst_abs
= worst_abs
< latency_abs
? latency_abs
: worst_abs
;
531 worst_latencies_ns
[i
] = abs_to_nanos(worst_abs
);
534 for (j
= 1; j
< g_numthreads
; j
++) {
535 uint64_t latency_abs
;
537 latency_abs
= g_thread_endtimes_abs
[j
] - g_thread_endtimes_abs
[0];
538 worst_abs
= worst_abs
< latency_abs
? latency_abs
: worst_abs
;
539 best_abs
= best_abs
> latency_abs
? latency_abs
: best_abs
;
542 worst_latencies_from_first_ns
[i
] = abs_to_nanos(worst_abs
);
545 * In the event of a bad run, cut a trace point.
547 if (worst_latencies_from_first_ns
[i
] > traceworthy_latency_ns
) {
551 printf("Worst on this round was %.2f us.\n", ((float)worst_latencies_from_first_ns
[i
]) / 1000.0);
554 _tmp
= syscall(SYS_kdebug_trace
, 0xEEEEEEEE, 0, 0, 0, 0);
557 /* Let worker threads get back to sleep... */
558 usleep(g_numthreads
* 10);
563 for (i
= 0; i
< g_numthreads
; i
++) {
564 res
= pthread_join(threads
[i
], NULL
);
565 assert(res
== 0, fail
);
568 compute_stats(worst_latencies_ns
, g_iterations
, &avg
, &max
, &min
, &stddev
);
569 printf("Results (from a stop):\n");
570 printf("Max:\t\t%.2f us\n", ((float)max
) / 1000.0);
571 printf("Min:\t\t%.2f us\n", ((float)min
) / 1000.0);
572 printf("Avg:\t\t%.2f us\n", avg
/ 1000.0);
573 printf("Stddev:\t\t%.2f us\n", stddev
/ 1000.0);
577 compute_stats(worst_latencies_from_first_ns
, g_iterations
, &avg
, &max
, &min
, &stddev
);
578 printf("Results (relative to first thread):\n");
579 printf("Max:\t\t%.2f us\n", ((float)max
) / 1000.0);
580 printf("Min:\t\t%.2f us\n", ((float)min
) / 1000.0);
581 printf("Avg:\t\t%.2f us\n", avg
/ 1000.0);
582 printf("Stddev:\t\t%.2f us\n", stddev
/ 1000.0);
585 for (i
= 0; i
< g_iterations
; i
++) {
586 printf("Iteration %d: %f us\n", i
, worst_latencies_ns
[i
] / 1000.0);