]> git.saurik.com Git - apple/libdispatch.git/blob - testing/slice_benchmarks.c
libdispatch-84.5.5.tar.gz
[apple/libdispatch.git] / testing / slice_benchmarks.c
1 #include <libkern/OSAtomic.h>
2 #include <sys/sysctl.h>
3 #include <mach/mach.h>
4 #include <mach/mach_time.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <stdint.h>
8 #include <unistd.h>
9 #include <assert.h>
10 #include <errno.h>
11 #include <pthread.h>
12 #include <math.h>
13 #include <libproc.h>
14 #include <sys/proc_info.h>
15 #include <dispatch/dispatch.h>
16 // #include "../src/private.h"
17 #include <Block.h>
18
19 // "normal" loop size
20 #define LOOP 100000
21 #define SMALL_LOOP 1000
22
23 void report(const char *func, char *full_name, double x, unsigned long loops, char *unit) {
24 // XXX: make cols pretty & stuff
25 const char *prefix = "bench_";
26 const int plen = strlen(prefix);
27 assert(!strncmp(func, prefix, plen));
28 func += plen;
29 char *name;
30 asprintf(&name, "[%s] %s", func, full_name);
31 assert(name);
32
33 x /= loops;
34
35 if (!strcmp("mach", unit)) {
36 static mach_timebase_info_data_t mtb;
37 if (!mtb.denom) {
38 (void)mach_timebase_info(&mtb);
39 }
40 x = (x * mtb.numer) / mtb.denom;
41 unit = "ns";
42 }
43
44 printf("%-64s %13f%-2s\n", name, x, unit);
45 free(name);
46 }
47
48 void bench_queue_mem_use(void) {
49 struct proc_taskinfo pti;
50 uint64_t target_size;
51
52 // The 1st call eats a little memory that isn't accounted for
53 // until the 2nd call. Also the _first_ printf eats >1M, so
54 // if you insert some for debugging make sure it isn't the first!
55 proc_pidinfo(getpid(), PROC_PIDTASKINFO, 0, &pti, sizeof(pti));
56 proc_pidinfo(getpid(), PROC_PIDTASKINFO, 0, &pti, sizeof(pti));
57 target_size = pti.pti_virtual_size + 1024*1024;
58 int n;
59
60 for(n = 0; target_size >= pti.pti_virtual_size; n++) {
61 dispatch_queue_t leak = dispatch_queue_create("to be deleted", NULL);
62 assert(leak);
63 proc_pidinfo(getpid(), PROC_PIDTASKINFO, 0, &pti, sizeof(pti));
64 //printf("pti_virtual_size %qd; togo %qd, n %d\n", pti.pti_virtual_size, target_size - pti.pti_virtual_size, n);
65 }
66
67 report(__FUNCTION__, "#queues to grow VSIZE 1Mbyte", n-1, 1, "x");
68 }
69
70 void bench_message_round_trip(void) {
71 dispatch_queue_t q1 = dispatch_queue_create("q1", NULL);
72 dispatch_queue_t q2 = dispatch_queue_create("q2", NULL);
73 uint64_t start = mach_absolute_time();
74
75 int i;
76 for(i = 0; i < LOOP; i++) {
77 // make sure we don't build up too much of a backlog
78 if (i && !(i & 0x3ff)) {
79 dispatch_sync(q2, ^{});
80 }
81 dispatch_queue_retain(q2);
82 dispatch_async(q1, ^{
83 dispatch_async(q2, ^{
84 dispatch_queue_release(q2);
85 });
86 });
87 }
88
89 // Make sure eveything has drained before we take the end timestamp
90 dispatch_sync(q1, ^{});
91 dispatch_sync(q2, ^{});
92
93 uint64_t end = mach_absolute_time();
94 report(__FUNCTION__, "round trip (async async - implicit copy)", (end - start), LOOP, "mach");
95 dispatch_queue_release(q1);
96 dispatch_queue_release(q2);
97 }
98
99 void bench_precopy_message_round_trip(void) {
100 dispatch_queue_t q1 = dispatch_queue_create("q1", NULL);
101 dispatch_queue_t q2 = dispatch_queue_create("q2", NULL);
102 assert(q1 && q2);
103
104 unsigned long rc;
105
106 dispatch_block_t b2 = Block_copy(^{
107 });
108 dispatch_block_t b1 = Block_copy(^{
109 unsigned long rc = dispatch_async(q2, b2);
110 assert(!rc);
111 dispatch_queue_release(q2);
112 });
113 dispatch_block_t be = Block_copy(^{});
114 assert(b1 && b2);
115 uint64_t start = mach_absolute_time();
116
117 int i;
118 for(i = 0; i < LOOP; i++) {
119 // make sure we don't build up too much of a backlog
120 if (i && !(i & 0x3ff)) {
121 dispatch_sync(q2, be);
122 }
123 dispatch_queue_retain(q2);
124 rc = dispatch_async(q1, b1);
125 assert(!rc);
126 }
127
128 // Make sure eveything has drained before we take the end timestamp
129 dispatch_sync(q1, be);
130 dispatch_sync(q2, be);
131
132 uint64_t end = mach_absolute_time();
133 report(__FUNCTION__, "round trip (a/a - precopy)", (end - start), LOOP, "mach");
134 dispatch_queue_release(q1);
135 dispatch_queue_release(q2);
136 }
137
138 void bench_message_round_type_syncasync(void) {
139 dispatch_queue_t q1 = dispatch_queue_create("q1", NULL);
140 dispatch_queue_t q2 = dispatch_queue_create("q2", NULL);
141 uint64_t start = mach_absolute_time();
142
143 int i;
144 for(i = 0; i < LOOP; i++) {
145 dispatch_queue_retain(q2);
146 dispatch_sync(q1, ^{
147 dispatch_async(q2, ^{
148 dispatch_queue_release(q2);
149 });
150 });
151 }
152
153 // Make sure eveything has drained before we take the end timestamp
154 dispatch_sync(q1, ^{});
155 dispatch_sync(q2, ^{});
156
157 uint64_t end = mach_absolute_time();
158 report(__FUNCTION__, "round trip (s/a - implicit copy)", (end - start), LOOP, "mach");
159 dispatch_queue_release(q1);
160 dispatch_queue_release(q2);
161 }
162
163 void nothing_f(void *ignored) {
164 }
165
166 void brt_f_q1(void *vq2) {
167 unsigned long rc = dispatch_async_f((dispatch_queue_t)vq2, NULL, nothing_f);
168 assert(!rc);
169 }
170
171 void bench_message_round_trip_f(void) {
172 dispatch_queue_t q1 = dispatch_queue_create("q1", NULL);
173 dispatch_queue_t q2 = dispatch_queue_create("q2", NULL);
174 uint64_t start = mach_absolute_time();
175 unsigned long rc;
176
177 int i;
178 for(i = 0; i < LOOP; i++) {
179 // make sure we don't build up too much of a backlog
180 if (i && !(i & 0x3ff)) {
181 dispatch_sync_f(q2, NULL, nothing_f);
182 }
183 rc = dispatch_async_f(q1, q2, brt_f_q1);
184 assert(!rc);
185 }
186
187 // Make sure eveything has drained before we take the end timestamp
188 dispatch_sync_f(q1, NULL, nothing_f);
189 dispatch_sync_f(q2, NULL, nothing_f);
190
191 uint64_t end = mach_absolute_time();
192 report(__FUNCTION__, "round trip (a/a - no blocks)", (end - start), LOOP, "mach");
193 dispatch_queue_release(q1);
194 dispatch_queue_release(q2);
195 }
196
197 void bench_message_round_type_syncasync_f(void) {
198 }
199
200 struct baton {
201 // should extend to keep data on times for latency calc
202 int passes_left;
203 int at_q;
204 int baton_number;
205
206 // Avoid false ache line shares. Big speed difference on a Mac Pro
207 char pad[128 - sizeof(int)*3];
208 };
209
210 pthread_mutex_t kludge;
211 static int n_baton_kludge;
212
213 void pass(dispatch_queue_t *q, struct baton *bat, const int n_queues, dispatch_queue_t complete_q) {
214 //fprintf(stderr, "bat#%d q#%d, passes left: %d\n", bat->baton_number, bat->at_q, bat->baton_number);
215 if (0 == --(bat->passes_left)) {
216 dispatch_queue_resume(complete_q);
217 // XXX: atomic
218 if (!__sync_sub_and_fetch(&n_baton_kludge, 1)) {
219 pthread_mutex_unlock(&kludge);
220 }
221 return;
222 }
223 bat->at_q = (bat->at_q + 1) % n_queues;
224 unsigned long rc = dispatch_async(q[bat->at_q], ^{ pass(q, bat, n_queues, complete_q); });
225 assert(rc == 0);
226 }
227
228 void bench_baton() {
229 const int n_queues = 128;
230 const int q_div_b = 4;
231 const int n_batons = n_queues / q_div_b;
232 assert(q_div_b * n_batons == n_queues);
233 n_baton_kludge = n_batons;
234 dispatch_queue_t *q;
235 dispatch_queue_t complete_q = dispatch_queue_create("completion q", NULL);;
236 char *q_labels[n_queues];
237 int i;
238 unsigned long rc;
239
240 // creting a queue ("C"), suspending it, blocking in a dispatch_sync, and
241 // having another queue resume C does not appear to ever unblock the
242 // dispatch_sync. XXX: make test case and file radar. (if it still
243 // works that way on recent builds, with dispatch inside libsystem, and
244 // such)
245
246
247 pthread_mutex_init(&kludge, NULL);
248 rc = pthread_mutex_trylock(&kludge);
249 assert(!rc);
250 q = alloca(n_queues * sizeof(dispatch_queue_t));
251
252 for(i = 0; i < n_queues; i++) {
253 asprintf(q_labels + i, "relay#%d (%s)", i, __FUNCTION__);
254 assert(q_labels[i]);
255 q[i] = dispatch_queue_create(q_labels[i], NULL);
256 assert(q[i]);
257 }
258
259 uint64_t start_time = mach_absolute_time();
260
261 for(i = 0; i < n_queues; i += q_div_b) {
262 struct baton *bat = valloc(sizeof(struct baton));
263 assert(bat);
264 bat->passes_left = SMALL_LOOP;
265 bat->at_q = i;
266 bat->baton_number = i / q_div_b;
267 dispatch_queue_suspend(complete_q);
268 rc = dispatch_async(q[i], ^{
269 pass(q, bat, n_queues, complete_q);
270 });
271 assert(rc == 0);
272 }
273
274 // XXX: dispatch_sync(complete_q, ^{});
275 rc = pthread_mutex_lock(&kludge);
276 assert(!rc);
277 uint64_t end_time = mach_absolute_time();
278 report(__FUNCTION__, "baton pass", (end_time - start_time), SMALL_LOOP*n_batons, "mach");
279 // dispatch_queue_release(q);
280 }
281
282 void bench_overload2() {
283 const int n_queues = 128;
284 const int q_div_b = 1;
285 const int n_batons = n_queues / q_div_b;
286 n_baton_kludge = n_batons;
287 assert(q_div_b * n_batons == n_queues);
288 dispatch_queue_t *q = alloca(n_queues * sizeof(dispatch_queue_t));
289 dispatch_source_t *ds = alloca(n_queues * sizeof(dispatch_source_t));
290 dispatch_queue_t complete_q = dispatch_queue_create("completion q", NULL);
291 __block uint64_t start_time = 0;
292 uint64_t time_to_start;
293 uint64_t end_time;
294 char *q_labels[n_queues];
295 int i;
296 unsigned int rc;
297
298 rc = pthread_mutex_unlock(&kludge);
299 assert(!rc);
300 rc = pthread_mutex_trylock(&kludge);
301 assert(!rc);
302
303 // Start all batons one to two seconds from now.
304 time_to_start = (2 + time(NULL)) * 1000000000;
305
306 for(i = 0; i < n_queues; i++) {
307 asprintf(q_labels + i, "queue#%d (%s)", i, __FUNCTION__);
308 assert(q_labels[i]);
309 q[i] = dispatch_queue_create(q_labels[i], NULL);
310 assert(q[i]);
311 struct baton *bat = valloc(sizeof(struct baton));
312 assert(bat);
313 bat->passes_left = SMALL_LOOP;
314 bat->at_q = i;
315 bat->baton_number = i / q_div_b;
316 dispatch_queue_suspend(complete_q);
317 ds[i] = dispatch_source_timer_create(DISPATCH_TIMER_ABSOLUTE, time_to_start, 0, NULL, q[i], ^(dispatch_event_t event){
318 assert(!dispatch_event_get_error(event, NULL));
319 // We want to measure the time from the first
320 // baton pass, and NOT include hte wait time
321 // for eveyone to start to fire
322 if (!start_time) {
323 uint64_t s = mach_absolute_time();
324 __sync_bool_compare_and_swap(&start_time, 0, s);
325 }
326 pass(q, bat, n_queues, complete_q);
327 });
328 assert(ds[i]);
329 }
330
331 // XXX: dispatch_sync(complete_q, ^{});
332 rc = pthread_mutex_lock(&kludge);
333 assert(!rc);
334
335 end_time = mach_absolute_time();
336 report(__FUNCTION__, "overload#2", (end_time - start_time), SMALL_LOOP*n_batons, "mach");
337 // Many releases and free()s
338
339 }
340
341 void bench_overload1() {
342 const int n_queues = 128;
343 const int q_div_b = 1;
344 const int n_batons = n_queues / q_div_b;
345 n_baton_kludge = n_batons;
346 assert(q_div_b * n_batons == n_queues);
347 dispatch_queue_t *q = alloca(n_queues * sizeof(dispatch_queue_t));
348 dispatch_queue_t complete_q = dispatch_queue_create("completion q", NULL);
349 __block uint64_t start_time = 0;
350 struct timeval time_to_start;
351 uint64_t end_time;
352 char *q_labels[n_queues];
353 int i;
354 unsigned int rc;
355
356 rc = pthread_mutex_unlock(&kludge);
357 assert(!rc);
358 rc = pthread_mutex_trylock(&kludge);
359 assert(!rc);
360
361 // Start all batons one to two seconds from now.
362 gettimeofday(&time_to_start, NULL);
363 time_to_start.tv_sec += 2;
364
365 for(i = 0; i < n_queues; i++) {
366 asprintf(q_labels + i, "queue#%d (%s)", i, __FUNCTION__);
367 assert(q_labels[i]);
368 q[i] = dispatch_queue_create(q_labels[i], NULL);
369 assert(q[i]);
370 struct baton *bat = valloc(sizeof(struct baton));
371 assert(bat);
372 bat->passes_left = SMALL_LOOP;
373 bat->at_q = i;
374 bat->baton_number = i / q_div_b;
375 dispatch_queue_suspend(complete_q);
376 dispatch_async(q[i], ^(void) {
377 struct timeval now;
378 gettimeofday(&now, NULL);
379 int sec = time_to_start.tv_sec - now.tv_sec;
380 if (sec >= 0) {
381 int usec = time_to_start.tv_usec + now.tv_usec;
382 if (usec > 0 || sec > 0) {
383 usleep(1000000 * sec + usec);
384 } else {
385 // XXX: log here
386 }
387 }
388
389 // We want to measure the time from the first
390 // baton pass, and NOT include hte wait time
391 // for eveyone to start to fire
392 if (!start_time) {
393 uint64_t s = mach_absolute_time();
394 __sync_bool_compare_and_swap(&start_time, 0, s);
395 }
396
397 pass(q, bat, n_queues, complete_q);
398 });
399 }
400
401 // XXX: dispatch_sync(complete_q, ^{});
402 rc = pthread_mutex_lock(&kludge);
403 assert(!rc);
404
405 end_time = mach_absolute_time();
406 report(__FUNCTION__, "overload#1", (end_time - start_time), SMALL_LOOP*n_batons, "mach");
407 // Many releases and free()s
408
409 }
410
411 int main(int argc, char *argv[]) {
412 // Someday we will be able to take a list of tests to run, or exclude, or something.
413
414 // There are somewhat diffrent perfomance chararistics when using the
415 // main queue, so we use a "normal" queue for all our tests.
416 dispatch_queue_t bench_q = dispatch_queue_create("benhmark Q", NULL);
417
418 dispatch_async(bench_q, ^{
419 // These two aren't as intresting in duel core, they queue all
420 // the calls before making them which isn't really what we
421 // want to test, is it? It also limites the number of loops
422 // we can spin around.
423 #if 1
424 bench_message_round_trip();
425 bench_precopy_message_round_trip();
426
427 bench_message_round_type_syncasync();
428 bench_message_round_trip_f();
429 bench_message_round_type_syncasync_f();
430 #endif
431 bench_baton();
432 bench_overload1();
433 bench_overload2();
434
435 // This leaks, so we run it last. Also it gives
436 // wrong results if stdio hasn't been started already,
437 // so we definitly don't want to run it first even if
438 // the leaks are fixed (or ignored)
439 bench_queue_mem_use();
440
441 exit(0);
442 });
443
444 dispatch_main();
445 }