]>
Commit | Line | Data |
---|---|---|
b0d623f7 A |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms | |
5 | * of the Common Development and Distribution License | |
6 | * (the "License"). You may not use this file except | |
7 | * in compliance with the License. | |
8 | * | |
9 | * You can obtain a copy of the license at | |
10 | * src/OPENSOLARIS.LICENSE | |
11 | * or http://www.opensolaris.org/os/licensing. | |
12 | * See the License for the specific language governing | |
13 | * permissions and limitations under the License. | |
14 | * | |
15 | * When distributing Covered Code, include this CDDL | |
16 | * HEADER in each file and include the License file at | |
17 | * usr/src/OPENSOLARIS.LICENSE. If applicable, | |
18 | * add the following below this CDDL HEADER, with the | |
19 | * fields enclosed by brackets "[]" replaced with your | |
20 | * own identifying information: Portions Copyright [yyyy] | |
21 | * [name of copyright owner] | |
22 | * | |
23 | * CDDL HEADER END | |
24 | */ | |
25 | ||
26 | /* | |
27 | * Copyright 2007 Sun Microsystems, Inc. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | */ | |
30 | ||
31 | /* | |
32 | * benchmarking routines | |
33 | */ | |
34 | ||
35 | #include <sys/types.h> | |
36 | #include <sys/time.h> | |
37 | #include <sys/ipc.h> | |
38 | #include <sys/sem.h> | |
39 | #include <sys/mman.h> | |
40 | #include <sys/wait.h> | |
41 | #include <ctype.h> | |
42 | #include <string.h> | |
43 | #include <strings.h> | |
44 | #include <signal.h> | |
45 | #include <stdio.h> | |
46 | #include <unistd.h> | |
47 | #include <stdlib.h> | |
48 | #include <poll.h> | |
49 | #include <pthread.h> | |
50 | #include <dlfcn.h> | |
51 | #include <errno.h> | |
52 | #include <sys/resource.h> | |
53 | #include <math.h> | |
54 | #include <limits.h> | |
55 | ||
56 | #ifdef __sun | |
57 | #include <sys/elf.h> | |
58 | #endif | |
59 | ||
60 | #include "libmicro.h" | |
61 | ||
62 | ||
63 | #if defined(__APPLE__) | |
64 | #include <mach/mach_time.h> | |
65 | ||
66 | long long | |
67 | gethrtime(void) | |
68 | { | |
69 | long long elapsed; | |
70 | static long long start; | |
71 | static mach_timebase_info_data_t sTimebaseInfo = { 0, 0 }; | |
72 | ||
73 | // If this is the first time we've run, get the timebase. | |
74 | // We can use denom == 0 to indicate that sTimebaseInfo is | |
75 | // uninitialised because it makes no sense to have a zero | |
76 | // denominator in a fraction. | |
77 | ||
78 | if ( sTimebaseInfo.denom == 0 ) { | |
79 | (void) mach_timebase_info(&sTimebaseInfo); | |
80 | start = mach_absolute_time(); | |
81 | } | |
82 | ||
83 | elapsed = mach_absolute_time() - start; | |
84 | ||
85 | // Convert to nanoseconds. | |
86 | // return (elapsed * (long long)sTimebaseInfo.numer)/(long long)sTimebaseInfo.denom; | |
87 | ||
88 | // Provided the final result is representable in 64 bits the following maneuver will | |
89 | // deliver that result without intermediate overflow. | |
90 | if (sTimebaseInfo.denom == sTimebaseInfo.numer) | |
91 | return elapsed; | |
92 | else if (sTimebaseInfo.denom == 1) | |
93 | return elapsed * (long long)sTimebaseInfo.numer; | |
94 | else { | |
95 | // Decompose elapsed = eta32 * 2^32 + eps32: | |
96 | long long eta32 = elapsed >> 32; | |
97 | long long eps32 = elapsed & 0x00000000ffffffffLL; | |
98 | ||
99 | long long numer = sTimebaseInfo.numer, denom = sTimebaseInfo.denom; | |
100 | ||
101 | // Form product of elapsed64 (decomposed) and numer: | |
102 | long long mu64 = numer * eta32; | |
103 | long long lambda64 = numer * eps32; | |
104 | ||
105 | // Divide the constituents by denom: | |
106 | long long q32 = mu64/denom; | |
107 | long long r32 = mu64 - (q32 * denom); // mu64 % denom | |
108 | ||
109 | return (q32 << 32) + ((r32 << 32) + lambda64)/denom; | |
110 | } | |
111 | } | |
112 | ||
113 | #endif | |
114 | ||
115 | /* | |
116 | * user visible globals | |
117 | */ | |
118 | ||
119 | int lm_argc = 0; | |
120 | char ** lm_argv = NULL; | |
121 | ||
122 | int lm_opt1; | |
123 | int lm_optA; | |
124 | int lm_optB; | |
125 | int lm_optC = 100; | |
126 | int lm_optD; | |
127 | int lm_optE; | |
128 | int lm_optH; | |
129 | int lm_optI; | |
130 | int lm_optL = 0; | |
131 | int lm_optM = 0; | |
132 | char *lm_optN; | |
133 | int lm_optP; | |
134 | int lm_optS; | |
135 | int lm_optT; | |
136 | int lm_optW; | |
137 | ||
138 | int lm_def1 = 0; | |
139 | int lm_defB = 0; /* use lm_nsecs_per_op */ | |
140 | int lm_defD = 10; | |
141 | int lm_defH = 0; | |
142 | char *lm_defN = NULL; | |
143 | int lm_defP = 1; | |
144 | ||
145 | int lm_defS = 0; | |
146 | int lm_defT = 1; | |
147 | ||
148 | /* | |
149 | * default on fast platform, should be overridden by individual | |
150 | * benchmarks if significantly wrong in either direction. | |
151 | */ | |
152 | ||
153 | int lm_nsecs_per_op = 5; | |
154 | ||
155 | char *lm_procpath; | |
156 | char lm_procname[STRSIZE]; | |
157 | char lm_usage[STRSIZE]; | |
158 | char lm_optstr[STRSIZE]; | |
159 | char lm_header[STRSIZE]; | |
160 | size_t lm_tsdsize = 0; | |
161 | ||
162 | ||
163 | /* | |
164 | * Globals we do not export to the user | |
165 | */ | |
166 | ||
167 | static barrier_t *lm_barrier; | |
168 | static pid_t *pids = NULL; | |
169 | static pthread_t *tids = NULL; | |
170 | static int pindex = -1; | |
171 | static void *tsdseg = NULL; | |
172 | static size_t tsdsize = 0; | |
173 | ||
174 | #ifdef USE_RDTSC | |
175 | static long long lm_hz = 0; | |
176 | #endif | |
177 | ||
178 | ||
179 | /* | |
180 | * Forward references | |
181 | */ | |
182 | ||
183 | static void worker_process(); | |
184 | static void usage(); | |
185 | static void print_stats(barrier_t *); | |
186 | static void print_histo(barrier_t *); | |
187 | static int remove_outliers(double *, int, stats_t *); | |
188 | static long long nsecs_overhead; | |
189 | static long long nsecs_resolution; | |
190 | static long long get_nsecs_overhead(); | |
191 | static int crunch_stats(double *, int, stats_t *); | |
192 | static void compute_stats(barrier_t *); | |
193 | /* | |
194 | * main routine; renamed in this file to allow linking with other | |
195 | * files | |
196 | */ | |
197 | ||
198 | int | |
199 | actual_main(int argc, char *argv[]) | |
200 | { | |
201 | int i; | |
202 | int opt; | |
203 | extern char *optarg; | |
204 | char *tmp; | |
205 | char optstr[256]; | |
206 | barrier_t *b; | |
207 | long long startnsecs = getnsecs(); | |
208 | ||
209 | #ifdef USE_RDTSC | |
210 | if (getenv("LIBMICRO_HZ") == NULL) { | |
211 | (void) printf("LIBMICRO_HZ needed but not set\n"); | |
212 | exit(1); | |
213 | } | |
214 | lm_hz = strtoll(getenv("LIBMICRO_HZ"), NULL, 10); | |
215 | #endif | |
216 | ||
217 | lm_argc = argc; | |
218 | lm_argv = argv; | |
219 | ||
220 | /* before we do anything */ | |
221 | (void) benchmark_init(); | |
222 | ||
223 | ||
224 | nsecs_overhead = get_nsecs_overhead(); | |
225 | nsecs_resolution = get_nsecs_resolution(); | |
226 | ||
227 | /* | |
228 | * Set defaults | |
229 | */ | |
230 | ||
231 | lm_opt1 = lm_def1; | |
232 | lm_optB = lm_defB; | |
233 | lm_optD = lm_defD; | |
234 | lm_optH = lm_defH; | |
235 | lm_optN = lm_defN; | |
236 | lm_optP = lm_defP; | |
237 | ||
238 | lm_optS = lm_defS; | |
239 | lm_optT = lm_defT; | |
240 | ||
241 | /* | |
242 | * squirrel away the path to the current | |
243 | * binary in a way that works on both | |
244 | * Linux and Solaris | |
245 | */ | |
246 | ||
247 | if (*argv[0] == '/') { | |
248 | lm_procpath = strdup(argv[0]); | |
249 | *strrchr(lm_procpath, '/') = 0; | |
250 | } else { | |
251 | char path[1024]; | |
252 | (void) getcwd(path, 1024); | |
253 | (void) strcat(path, "/"); | |
254 | (void) strcat(path, argv[0]); | |
255 | *strrchr(path, '/') = 0; | |
256 | lm_procpath = strdup(path); | |
257 | } | |
258 | ||
259 | /* | |
260 | * name of binary | |
261 | */ | |
262 | ||
263 | if ((tmp = strrchr(argv[0], '/')) == NULL) | |
264 | (void) strcpy(lm_procname, argv[0]); | |
265 | else | |
266 | (void) strcpy(lm_procname, tmp + 1); | |
267 | ||
268 | if (lm_optN == NULL) { | |
269 | lm_optN = lm_procname; | |
270 | } | |
271 | ||
272 | /* | |
273 | * Parse command line arguments | |
274 | */ | |
275 | ||
276 | (void) sprintf(optstr, "1AB:C:D:EHI:LMN:P:RST:VW?%s", lm_optstr); | |
277 | while ((opt = getopt(argc, argv, optstr)) != -1) { | |
278 | switch (opt) { | |
279 | case '1': | |
280 | lm_opt1 = 1; | |
281 | break; | |
282 | case 'A': | |
283 | lm_optA = 1; | |
284 | break; | |
285 | case 'B': | |
286 | lm_optB = sizetoint(optarg); | |
287 | break; | |
288 | case 'C': | |
289 | lm_optC = sizetoint(optarg); | |
290 | break; | |
291 | case 'D': | |
292 | lm_optD = sizetoint(optarg); | |
293 | break; | |
294 | case 'E': | |
295 | lm_optE = 1; | |
296 | break; | |
297 | case 'H': | |
298 | lm_optH = 1; | |
299 | break; | |
300 | case 'I': | |
301 | lm_optI = sizetoint(optarg); | |
302 | break; | |
303 | case 'L': | |
304 | lm_optL = 1; | |
305 | break; | |
306 | case 'M': | |
307 | lm_optM = 1; | |
308 | break; | |
309 | case 'N': | |
310 | lm_optN = optarg; | |
311 | break; | |
312 | case 'P': | |
313 | lm_optP = sizetoint(optarg); | |
314 | break; | |
315 | case 'S': | |
316 | lm_optS = 1; | |
317 | break; | |
318 | case 'T': | |
319 | lm_optT = sizetoint(optarg); | |
320 | break; | |
321 | case 'V': | |
322 | (void) printf("%s\n", LIBMICRO_VERSION); | |
323 | exit(0); | |
324 | break; | |
325 | case 'W': | |
326 | lm_optW = 1; | |
327 | lm_optS = 1; | |
328 | break; | |
329 | case '?': | |
330 | usage(); | |
331 | exit(0); | |
332 | break; | |
333 | default: | |
334 | if (benchmark_optswitch(opt, optarg) == -1) { | |
335 | usage(); | |
336 | exit(0); | |
337 | } | |
338 | } | |
339 | } | |
340 | ||
341 | /* deal with implicit and overriding options */ | |
342 | if (lm_opt1 && lm_optP > 1) { | |
343 | lm_optP = 1; | |
344 | (void) printf("warning: -1 overrides -P\n"); | |
345 | } | |
346 | ||
347 | if (lm_optE) { | |
348 | (void) fprintf(stderr, "Running:%20s", lm_optN); | |
349 | (void) fflush(stderr); | |
350 | } | |
351 | ||
352 | if (lm_optB == 0) { | |
353 | /* | |
354 | * neither benchmark or user has specified the number | |
355 | * of cnts/sample, so use computed value | |
356 | */ | |
357 | if (lm_optI) | |
358 | lm_nsecs_per_op = lm_optI; | |
359 | #define BLOCK_TOCK_DURATION 10000 /* number of raw timer "tocks" ideally comprising a block of work */ | |
360 | lm_optB = nsecs_resolution * BLOCK_TOCK_DURATION / lm_nsecs_per_op; | |
361 | if (lm_optB == 0) | |
362 | lm_optB = 1; | |
363 | } | |
364 | ||
365 | /* | |
366 | * now that the options are set | |
367 | */ | |
368 | ||
369 | if (benchmark_initrun() == -1) { | |
370 | exit(1); | |
371 | } | |
372 | ||
373 | /* allocate dynamic data */ | |
374 | pids = (pid_t *)malloc(lm_optP * sizeof (pid_t)); | |
375 | if (pids == NULL) { | |
376 | perror("malloc(pids)"); | |
377 | exit(1); | |
378 | } | |
379 | tids = (pthread_t *)malloc(lm_optT * sizeof (pthread_t)); | |
380 | if (tids == NULL) { | |
381 | perror("malloc(tids)"); | |
382 | exit(1); | |
383 | } | |
384 | ||
385 | /* check that the case defines lm_tsdsize before proceeding */ | |
386 | if (lm_tsdsize == (size_t)-1) { | |
387 | (void) fprintf(stderr, "error in benchmark_init: " | |
388 | "lm_tsdsize not set\n"); | |
389 | exit(1); | |
390 | } | |
391 | ||
392 | /* round up tsdsize to nearest 128 to eliminate false sharing */ | |
393 | tsdsize = ((lm_tsdsize + 127) / 128) * 128; | |
394 | ||
395 | /* allocate sufficient TSD for each thread in each process */ | |
396 | tsdseg = (void *)mmap(NULL, lm_optT * lm_optP * tsdsize + 8192, | |
397 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0L); | |
398 | if (tsdseg == NULL) { | |
399 | perror("mmap(tsd)"); | |
400 | exit(1); | |
401 | } | |
402 | ||
403 | /* initialise worker synchronisation */ | |
404 | b = barrier_create(lm_optT * lm_optP, DATASIZE); | |
405 | if (b == NULL) { | |
406 | perror("barrier_create()"); | |
407 | exit(1); | |
408 | } | |
409 | lm_barrier = b; | |
410 | b->ba_flag = 1; | |
411 | ||
412 | /* need this here so that parent and children can call exit() */ | |
413 | (void) fflush(stdout); | |
414 | (void) fflush(stderr); | |
415 | ||
416 | /* when we started and when to stop */ | |
417 | ||
418 | b->ba_starttime = getnsecs(); | |
419 | b->ba_deadline = (long long) (b->ba_starttime + (lm_optD * 1000000LL)); | |
420 | ||
421 | /* do the work */ | |
422 | if (lm_opt1) { | |
423 | /* single process, non-fork mode */ | |
424 | pindex = 0; | |
425 | worker_process(); | |
426 | } else { | |
427 | /* create worker processes */ | |
428 | for (i = 0; i < lm_optP; i++) { | |
429 | pids[i] = fork(); | |
430 | ||
431 | switch (pids[i]) { | |
432 | case 0: | |
433 | pindex = i; | |
434 | worker_process(); | |
435 | exit(0); | |
436 | break; | |
437 | case -1: | |
438 | perror("fork"); | |
439 | exit(1); | |
440 | break; | |
441 | default: | |
442 | continue; | |
443 | } | |
444 | } | |
445 | ||
446 | /* wait for worker processes */ | |
447 | for (i = 0; i < lm_optP; i++) { | |
448 | if (pids[i] > 0) { | |
449 | (void) waitpid(pids[i], NULL, 0); | |
450 | } | |
451 | } | |
452 | } | |
453 | ||
454 | b->ba_endtime = getnsecs(); | |
455 | ||
456 | /* compute results */ | |
457 | ||
458 | compute_stats(b); | |
459 | ||
460 | /* print arguments benchmark was invoked with ? */ | |
461 | if (lm_optL) { | |
462 | int l; | |
463 | (void) printf("# %s ", argv[0]); | |
464 | for (l = 1; l < argc; l++) { | |
465 | (void) printf("%s ", argv[l]); | |
466 | } | |
467 | (void) printf("\n"); | |
468 | } | |
469 | ||
470 | /* print result header (unless suppressed) */ | |
471 | if (!lm_optH) { | |
472 | (void) printf("%12s %3s %3s %12s %12s %8s %8s %s\n", | |
473 | "", "prc", "thr", | |
474 | "usecs/call", | |
475 | "samples", "errors", "cnt/samp", lm_header); | |
476 | } | |
477 | ||
478 | /* print result */ | |
479 | ||
480 | (void) printf("%-12s %3d %3d %12.5f %12d %8lld %8d %s\n", | |
481 | lm_optN, lm_optP, lm_optT, | |
482 | (lm_optM?b->ba_corrected.st_mean:b->ba_corrected.st_median), | |
483 | b->ba_batches, b->ba_errors, lm_optB, | |
484 | benchmark_result()); | |
485 | ||
486 | if (lm_optS) { | |
487 | print_stats(b); | |
488 | } | |
489 | ||
490 | /* just incase something goes awry */ | |
491 | (void) fflush(stdout); | |
492 | (void) fflush(stderr); | |
493 | ||
494 | /* cleanup by stages */ | |
495 | (void) benchmark_finirun(); | |
496 | (void) barrier_destroy(b); | |
497 | (void) benchmark_fini(); | |
498 | ||
499 | if (lm_optE) { | |
500 | (void) fprintf(stderr, " for %12.5f seconds\n", | |
501 | (double)(getnsecs() - startnsecs) / | |
502 | 1.e9); | |
503 | (void) fflush(stderr); | |
504 | } | |
505 | return (0); | |
506 | } | |
507 | ||
508 | void * | |
509 | worker_thread(void *arg) | |
510 | { | |
511 | result_t r; | |
512 | long long last_sleep = 0; | |
513 | long long t; | |
514 | ||
515 | r.re_errors = benchmark_initworker(arg); | |
516 | ||
517 | while (lm_barrier->ba_flag) { | |
518 | r.re_count = 0; | |
519 | r.re_errors += benchmark_initbatch(arg); | |
520 | ||
521 | /* sync to clock */ | |
522 | ||
523 | if (lm_optA && ((t = getnsecs()) - last_sleep) > 75000000LL) { | |
524 | (void) poll(0, 0, 10); | |
525 | last_sleep = t; | |
526 | } | |
527 | /* wait for it ... */ | |
528 | (void) barrier_queue(lm_barrier, NULL); | |
529 | ||
530 | /* time the test */ | |
531 | r.re_t0 = getnsecs(); | |
532 | (void) benchmark(arg, &r); | |
533 | r.re_t1 = getnsecs(); | |
534 | ||
535 | /* time to stop? */ | |
536 | if (r.re_t1 > lm_barrier->ba_deadline && | |
537 | (!lm_optC || lm_optC < lm_barrier->ba_batches)) { | |
538 | lm_barrier->ba_flag = 0; | |
539 | } | |
540 | ||
541 | /* record results and sync */ | |
542 | (void) barrier_queue(lm_barrier, &r); | |
543 | ||
544 | (void) benchmark_finibatch(arg); | |
545 | ||
546 | r.re_errors = 0; | |
547 | } | |
548 | ||
549 | (void) benchmark_finiworker(arg); | |
550 | ||
551 | return (0); | |
552 | } | |
553 | ||
554 | void | |
555 | worker_process() | |
556 | { | |
557 | int i; | |
558 | void *tsd; | |
559 | ||
560 | for (i = 1; i < lm_optT; i++) { | |
561 | tsd = gettsd(pindex, i); | |
562 | if (pthread_create(&tids[i], NULL, worker_thread, tsd) != 0) { | |
563 | perror("pthread_create"); | |
564 | exit(1); | |
565 | } | |
566 | } | |
567 | ||
568 | tsd = gettsd(pindex, 0); | |
569 | (void) worker_thread(tsd); | |
570 | ||
571 | for (i = 1; i < lm_optT; i++) { | |
572 | (void) pthread_join(tids[i], NULL); | |
573 | } | |
574 | } | |
575 | ||
576 | void | |
577 | usage() | |
578 | { | |
579 | (void) printf( | |
580 | "usage: %s\n" | |
581 | " [-1] (single process; overrides -P > 1)\n" | |
582 | " [-A] (align with clock)\n" | |
583 | " [-B batch-size (default %d)]\n" | |
584 | " [-C minimum number of samples (default 0)]\n" | |
585 | " [-D duration in msecs (default %ds)]\n" | |
586 | " [-E (echo name to stderr)]\n" | |
587 | " [-H] (suppress headers)\n" | |
588 | " [-I] nsecs per op (used to compute batch size)" | |
589 | " [-L] (print argument line)\n" | |
590 | " [-M] (reports mean rather than median)\n" | |
591 | " [-N test-name (default '%s')]\n" | |
592 | " [-P processes (default %d)]\n" | |
593 | " [-S] (print detailed stats)\n" | |
594 | " [-T threads (default %d)]\n" | |
595 | " [-V] (print the libMicro version and exit)\n" | |
596 | " [-W] (flag possible benchmark problems)\n" | |
597 | "%s\n", | |
598 | lm_procname, | |
599 | lm_defB, lm_defD, lm_procname, lm_defP, lm_defT, | |
600 | lm_usage); | |
601 | } | |
602 | ||
603 | void | |
604 | print_warnings(barrier_t *b) | |
605 | { | |
606 | int head = 0; | |
607 | int increase; | |
608 | ||
609 | if (b->ba_quant) { | |
610 | if (!head++) { | |
611 | (void) printf("#\n# WARNINGS\n"); | |
612 | } | |
613 | increase = (int)(floor((nsecs_resolution * 100.0) / | |
614 | ((double)lm_optB * b->ba_corrected.st_median * 1000.0)) + | |
615 | 1.0); | |
616 | (void) printf("# Quantization error likely;" | |
617 | "increase batch size (-B option) %dX to avoid.\n", | |
618 | increase); | |
619 | } | |
620 | ||
621 | /* | |
622 | * XXX should warn on median != mean by a lot | |
623 | */ | |
624 | ||
625 | if (b->ba_errors) { | |
626 | if (!head++) { | |
627 | (void) printf("#\n# WARNINGS\n"); | |
628 | } | |
629 | (void) printf("# Errors occured during benchmark.\n"); | |
630 | } | |
631 | } | |
632 | ||
633 | void | |
634 | print_stats(barrier_t *b) | |
635 | { | |
636 | (void) printf("#\n"); | |
637 | (void) printf("# STATISTICS %12s %12s\n", | |
638 | "usecs/call (raw)", | |
639 | "usecs/call (outliers removed)"); | |
640 | ||
641 | if (b->ba_count == 0) { | |
642 | (void) printf("zero samples\n"); | |
643 | return; | |
644 | } | |
645 | ||
646 | (void) printf("# min %12.5f %12.5f\n", | |
647 | b->ba_raw.st_min, | |
648 | b->ba_corrected.st_min); | |
649 | ||
650 | (void) printf("# max %12.5f %12.5f\n", | |
651 | b->ba_raw.st_max, | |
652 | b->ba_corrected.st_max); | |
653 | (void) printf("# mean %12.5f %12.5f\n", | |
654 | b->ba_raw.st_mean, | |
655 | b->ba_corrected.st_mean); | |
656 | (void) printf("# median %12.5f %12.5f\n", | |
657 | b->ba_raw.st_median, | |
658 | b->ba_corrected.st_median); | |
659 | (void) printf("# stddev %12.5f %12.5f\n", | |
660 | b->ba_raw.st_stddev, | |
661 | b->ba_corrected.st_stddev); | |
662 | (void) printf("# standard error %12.5f %12.5f\n", | |
663 | b->ba_raw.st_stderr, | |
664 | b->ba_corrected.st_stderr); | |
665 | (void) printf("# 99%% confidence level %12.5f %12.5f\n", | |
666 | b->ba_raw.st_99confidence, | |
667 | b->ba_corrected.st_99confidence); | |
668 | (void) printf("# skew %12.5f %12.5f\n", | |
669 | b->ba_raw.st_skew, | |
670 | b->ba_corrected.st_skew); | |
671 | (void) printf("# kurtosis %12.5f %12.5f\n", | |
672 | b->ba_raw.st_kurtosis, | |
673 | b->ba_corrected.st_kurtosis); | |
674 | ||
675 | (void) printf("# time correlation %12.5f %12.5f\n", | |
676 | b->ba_raw.st_timecorr, | |
677 | b->ba_corrected.st_timecorr); | |
678 | (void) printf("#\n"); | |
679 | ||
680 | (void) printf("# elasped time %12.5f\n", (b->ba_endtime - | |
681 | b->ba_starttime) / 1.0e9); | |
682 | (void) printf("# number of samples %12d\n", b->ba_batches); | |
683 | (void) printf("# number of outliers %12d\n", b->ba_outliers); | |
684 | (void) printf("# getnsecs overhead %12d\n", (int)nsecs_overhead); | |
685 | ||
686 | (void) printf("#\n"); | |
687 | (void) printf("# DISTRIBUTION\n"); | |
688 | ||
689 | print_histo(b); | |
690 | ||
691 | if (lm_optW) { | |
692 | print_warnings(b); | |
693 | } | |
694 | } | |
695 | ||
696 | void | |
697 | update_stats(barrier_t *b, result_t *r) | |
698 | { | |
699 | double time; | |
700 | double nsecs_per_call; | |
701 | ||
702 | if (b->ba_waiters == 0) { | |
703 | /* first thread only */ | |
704 | b->ba_t0 = r->re_t0; | |
705 | b->ba_t1 = r->re_t1; | |
706 | b->ba_count0 = 0; | |
707 | b->ba_errors0 = 0; | |
708 | } else { | |
709 | /* all but first thread */ | |
710 | if (r->re_t0 < b->ba_t0) { | |
711 | b->ba_t0 = r->re_t0; | |
712 | } | |
713 | if (r->re_t1 > b->ba_t1) { | |
714 | b->ba_t1 = r->re_t1; | |
715 | } | |
716 | } | |
717 | ||
718 | b->ba_count0 += r->re_count; | |
719 | b->ba_errors0 += r->re_errors; | |
720 | ||
721 | if (b->ba_waiters == b->ba_hwm - 1) { | |
722 | /* last thread only */ | |
723 | ||
724 | ||
725 | time = (double)b->ba_t1 - (double)b->ba_t0 - | |
726 | (double)nsecs_overhead; | |
727 | ||
728 | if (time < 100 * nsecs_resolution) | |
729 | b->ba_quant++; | |
730 | ||
731 | /* | |
732 | * normalize by procs * threads if not -U | |
733 | */ | |
734 | ||
735 | nsecs_per_call = time / (double)b->ba_count0 * | |
736 | (double)(lm_optT * lm_optP); | |
737 | ||
738 | b->ba_count += b->ba_count0; | |
739 | b->ba_errors += b->ba_errors0; | |
740 | ||
741 | b->ba_data[b->ba_batches % b->ba_datasize] = | |
742 | nsecs_per_call; | |
743 | ||
744 | b->ba_batches++; | |
745 | } | |
746 | } | |
747 | ||
748 | #ifdef USE_SEMOP | |
749 | barrier_t * | |
750 | barrier_create(int hwm, int datasize) | |
751 | { | |
752 | struct sembuf s[1]; | |
753 | barrier_t *b; | |
754 | ||
755 | /*LINTED*/ | |
756 | b = (barrier_t *)mmap(NULL, | |
757 | sizeof (barrier_t) + (datasize - 1) * sizeof (double), | |
758 | PROT_READ | PROT_WRITE, | |
759 | MAP_SHARED | MAP_ANON, -1, 0L); | |
760 | if (b == (barrier_t *)MAP_FAILED) { | |
761 | return (NULL); | |
762 | } | |
763 | b->ba_datasize = datasize; | |
764 | ||
765 | b->ba_flag = 0; | |
766 | b->ba_hwm = hwm; | |
767 | b->ba_semid = semget(IPC_PRIVATE, 3, 0600); | |
768 | if (b->ba_semid == -1) { | |
769 | (void) munmap((void *)b, sizeof (barrier_t)); | |
770 | return (NULL); | |
771 | } | |
772 | ||
773 | /* [hwm - 1, 0, 0] */ | |
774 | s[0].sem_num = 0; | |
775 | s[0].sem_op = hwm - 1; | |
776 | s[0].sem_flg = 0; | |
777 | if (semop(b->ba_semid, s, 1) == -1) { | |
778 | perror("semop(1)"); | |
779 | (void) semctl(b->ba_semid, 0, IPC_RMID); | |
780 | (void) munmap((void *)b, sizeof (barrier_t)); | |
781 | return (NULL); | |
782 | } | |
783 | ||
784 | b->ba_waiters = 0; | |
785 | b->ba_phase = 0; | |
786 | ||
787 | b->ba_count = 0; | |
788 | b->ba_errors = 0; | |
789 | ||
790 | return (b); | |
791 | } | |
792 | ||
793 | int | |
794 | barrier_destroy(barrier_t *b) | |
795 | { | |
796 | (void) semctl(b->ba_semid, 0, IPC_RMID); | |
797 | (void) munmap((void *)b, sizeof (barrier_t)); | |
798 | ||
799 | return (0); | |
800 | } | |
801 | ||
802 | int | |
803 | barrier_queue(barrier_t *b, result_t *r) | |
804 | { | |
805 | struct sembuf s[2]; | |
806 | ||
807 | /* | |
808 | * {s0(-(hwm-1))} | |
809 | * if ! nowait {s1(-(hwm-1))} | |
810 | * (all other threads) | |
811 | * update shared stats | |
812 | * {s0(hwm-1), s1(1)} | |
813 | * {s0(1), s2(-1)} | |
814 | * else | |
815 | * (last thread) | |
816 | * update shared stats | |
817 | * {s2(hwm-1)} | |
818 | */ | |
819 | ||
820 | s[0].sem_num = 0; | |
821 | s[0].sem_op = -(b->ba_hwm - 1); | |
822 | s[0].sem_flg = 0; | |
823 | if (semop(b->ba_semid, s, 1) == -1) { | |
824 | perror("semop(2)"); | |
825 | return (-1); | |
826 | } | |
827 | ||
828 | s[0].sem_num = 1; | |
829 | s[0].sem_op = -(b->ba_hwm - 1); | |
830 | s[0].sem_flg = IPC_NOWAIT; | |
831 | if (semop(b->ba_semid, s, 1) == -1) { | |
832 | if (errno != EAGAIN) { | |
833 | perror("semop(3)"); | |
834 | return (-1); | |
835 | } | |
836 | ||
837 | /* all but the last thread */ | |
838 | ||
839 | if (r != NULL) { | |
840 | update_stats(b, r); | |
841 | } | |
842 | ||
843 | b->ba_waiters++; | |
844 | ||
845 | s[0].sem_num = 0; | |
846 | s[0].sem_op = b->ba_hwm - 1; | |
847 | s[0].sem_flg = 0; | |
848 | s[1].sem_num = 1; | |
849 | s[1].sem_op = 1; | |
850 | s[1].sem_flg = 0; | |
851 | if (semop(b->ba_semid, s, 2) == -1) { | |
852 | perror("semop(4)"); | |
853 | return (-1); | |
854 | } | |
855 | ||
856 | s[0].sem_num = 0; | |
857 | s[0].sem_op = 1; | |
858 | s[0].sem_flg = 0; | |
859 | s[1].sem_num = 2; | |
860 | s[1].sem_op = -1; | |
861 | s[1].sem_flg = 0; | |
862 | if (semop(b->ba_semid, s, 2) == -1) { | |
863 | perror("semop(5)"); | |
864 | return (-1); | |
865 | } | |
866 | ||
867 | } else { | |
868 | /* the last thread */ | |
869 | ||
870 | if (r != NULL) { | |
871 | update_stats(b, r); | |
872 | } | |
873 | ||
874 | b->ba_waiters = 0; | |
875 | b->ba_phase++; | |
876 | ||
877 | s[0].sem_num = 2; | |
878 | s[0].sem_op = b->ba_hwm - 1; | |
879 | s[0].sem_flg = 0; | |
880 | if (semop(b->ba_semid, s, 1) == -1) { | |
881 | perror("semop(6)"); | |
882 | return (-1); | |
883 | } | |
884 | } | |
885 | ||
886 | return (0); | |
887 | } | |
888 | ||
889 | #else /* USE_SEMOP */ | |
890 | ||
891 | barrier_t * | |
892 | barrier_create(int hwm, int datasize) | |
893 | { | |
894 | pthread_mutexattr_t attr; | |
895 | pthread_condattr_t cattr; | |
896 | barrier_t *b; | |
897 | ||
898 | /*LINTED*/ | |
899 | b = (barrier_t *)mmap(NULL, | |
900 | sizeof (barrier_t) + (datasize - 1) * sizeof (double), | |
901 | PROT_READ | PROT_WRITE, | |
902 | MAP_SHARED | MAP_ANON, -1, 0L); | |
903 | if (b == (barrier_t *)MAP_FAILED) { | |
904 | return (NULL); | |
905 | } | |
906 | b->ba_datasize = datasize; | |
907 | ||
908 | b->ba_hwm = hwm; | |
909 | b->ba_flag = 0; | |
910 | ||
911 | (void) pthread_mutexattr_init(&attr); | |
912 | (void) pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); | |
913 | ||
914 | (void) pthread_condattr_init(&cattr); | |
915 | (void) pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED); | |
916 | ||
917 | (void) pthread_mutex_init(&b->ba_lock, &attr); | |
918 | (void) pthread_cond_init(&b->ba_cv, &cattr); | |
919 | ||
920 | b->ba_waiters = 0; | |
921 | b->ba_phase = 0; | |
922 | ||
923 | b->ba_count = 0; | |
924 | b->ba_errors = 0; | |
925 | ||
926 | return (b); | |
927 | } | |
928 | ||
929 | int | |
930 | barrier_destroy(barrier_t *b) | |
931 | { | |
932 | (void) munmap((void *)b, sizeof (barrier_t)); | |
933 | ||
934 | return (0); | |
935 | } | |
936 | ||
937 | int | |
938 | barrier_queue(barrier_t *b, result_t *r) | |
939 | { | |
940 | int phase; | |
941 | ||
942 | (void) pthread_mutex_lock(&b->ba_lock); | |
943 | ||
944 | if (r != NULL) { | |
945 | update_stats(b, r); | |
946 | } | |
947 | ||
948 | phase = b->ba_phase; | |
949 | ||
950 | b->ba_waiters++; | |
951 | if (b->ba_hwm == b->ba_waiters) { | |
952 | b->ba_waiters = 0; | |
953 | b->ba_phase++; | |
954 | (void) pthread_cond_broadcast(&b->ba_cv); | |
955 | } | |
956 | ||
957 | while (b->ba_phase == phase) { | |
958 | (void) pthread_cond_wait(&b->ba_cv, &b->ba_lock); | |
959 | } | |
960 | ||
961 | (void) pthread_mutex_unlock(&b->ba_lock); | |
962 | return (0); | |
963 | } | |
964 | #endif /* USE_SEMOP */ | |
965 | ||
966 | int | |
967 | gettindex() | |
968 | { | |
969 | int i; | |
970 | ||
971 | if (tids == NULL) { | |
972 | return (-1); | |
973 | } | |
974 | ||
975 | for (i = 1; i < lm_optT; i++) { | |
976 | if (pthread_self() == tids[i]) { | |
977 | return (i); | |
978 | } | |
979 | } | |
980 | ||
981 | return (0); | |
982 | } | |
983 | ||
984 | int | |
985 | getpindex() | |
986 | { | |
987 | return (pindex); | |
988 | } | |
989 | ||
990 | void * | |
991 | gettsd(int p, int t) | |
992 | { | |
993 | if ((p < 0) || (p >= lm_optP) || (t < 0) || (t >= lm_optT)) | |
994 | return (NULL); | |
995 | ||
996 | return ((void *)((unsigned long)tsdseg + | |
997 | (((p * lm_optT) + t) * tsdsize))); | |
998 | } | |
999 | ||
1000 | #if defined(__APPLE__) | |
1001 | int | |
1002 | gettsdindex(void *arg){ | |
1003 | /* | |
1004 | * gettindex() can race with pthread_create() filling in tids[]. | |
1005 | * This is an alternative approach to finding the calling thread's tsd in t | |
1006 | sdseg | |
1007 | */ | |
1008 | return tsdsize ? ((unsigned long)arg - (unsigned long)tsdseg)/tsdsize : 0; | |
1009 | } | |
1010 | #endif /* __APPLE__ */ | |
1011 | ||
1012 | #ifdef USE_GETHRTIME | |
1013 | long long | |
1014 | getnsecs() | |
1015 | { | |
1016 | return (gethrtime()); | |
1017 | } | |
1018 | ||
1019 | long long | |
1020 | getusecs() | |
1021 | { | |
1022 | return (gethrtime() / 1000); | |
1023 | } | |
1024 | ||
1025 | #elif USE_RDTSC /* USE_GETHRTIME */ | |
1026 | ||
1027 | __inline__ long long | |
1028 | rdtsc(void) | |
1029 | { | |
1030 | unsigned long long x; | |
1031 | __asm__ volatile(".byte 0x0f, 0x31" : "=A" (x)); | |
1032 | return (x); | |
1033 | } | |
1034 | ||
1035 | long long | |
1036 | getusecs() | |
1037 | { | |
1038 | return (rdtsc() * 1000000 / lm_hz); | |
1039 | } | |
1040 | ||
1041 | long long | |
1042 | getnsecs() | |
1043 | { | |
1044 | return (rdtsc() * 1000000000 / lm_hz); | |
1045 | } | |
1046 | ||
1047 | #else /* USE_GETHRTIME */ | |
1048 | ||
1049 | long long | |
1050 | getusecs() | |
1051 | { | |
1052 | struct timeval tv; | |
1053 | ||
1054 | (void) gettimeofday(&tv, NULL); | |
1055 | ||
1056 | return ((long long)tv.tv_sec * 1000000LL + (long long) tv.tv_usec); | |
1057 | } | |
1058 | ||
1059 | long long | |
1060 | getnsecs() | |
1061 | { | |
1062 | struct timeval tv; | |
1063 | ||
1064 | (void) gettimeofday(&tv, NULL); | |
1065 | ||
1066 | return ((long long)tv.tv_sec * 1000000000LL + | |
1067 | (long long) tv.tv_usec * 1000LL); | |
1068 | } | |
1069 | ||
1070 | #endif /* USE_GETHRTIME */ | |
1071 | ||
1072 | int | |
1073 | setfdlimit(int limit) | |
1074 | { | |
1075 | struct rlimit rlimit; | |
1076 | ||
1077 | if (getrlimit(RLIMIT_NOFILE, &rlimit) < 0) { | |
1078 | perror("getrlimit"); | |
1079 | exit(1); | |
1080 | } | |
1081 | ||
1082 | if (rlimit.rlim_cur > limit) | |
1083 | return (0); /* no worries */ | |
1084 | ||
1085 | rlimit.rlim_cur = limit; | |
1086 | ||
1087 | if (rlimit.rlim_max < limit) | |
1088 | rlimit.rlim_max = limit; | |
1089 | ||
1090 | if (setrlimit(RLIMIT_NOFILE, &rlimit) < 0) { | |
1091 | perror("setrlimit"); | |
1092 | exit(3); | |
1093 | } | |
1094 | ||
1095 | return (0); | |
1096 | } | |
1097 | ||
1098 | ||
1099 | #define KILOBYTE 1024 | |
1100 | #define MEGABYTE (KILOBYTE * KILOBYTE) | |
1101 | #define GIGABYTE (KILOBYTE * MEGABYTE) | |
1102 | ||
1103 | long long | |
1104 | sizetoll(const char *arg) | |
1105 | { | |
1106 | int len = strlen(arg); | |
1107 | int i; | |
1108 | long long mult = 1; | |
1109 | ||
1110 | if (len && isalpha(arg[len - 1])) { | |
1111 | switch (arg[len - 1]) { | |
1112 | ||
1113 | case 'k': | |
1114 | case 'K': | |
1115 | mult = KILOBYTE; | |
1116 | break; | |
1117 | case 'm': | |
1118 | case 'M': | |
1119 | mult = MEGABYTE; | |
1120 | break; | |
1121 | case 'g': | |
1122 | case 'G': | |
1123 | mult = GIGABYTE; | |
1124 | break; | |
1125 | default: | |
1126 | return (-1); | |
1127 | } | |
1128 | ||
1129 | for (i = 0; i < len - 1; i++) | |
1130 | if (!isdigit(arg[i])) | |
1131 | return (-1); | |
1132 | } | |
1133 | ||
1134 | return (mult * strtoll(arg, NULL, 10)); | |
1135 | } | |
1136 | ||
1137 | int | |
1138 | sizetoint(const char *arg) | |
1139 | { | |
1140 | int len = strlen(arg); | |
1141 | int i; | |
1142 | long long mult = 1; | |
1143 | ||
1144 | if (len && isalpha(arg[len - 1])) { | |
1145 | switch (arg[len - 1]) { | |
1146 | ||
1147 | case 'k': | |
1148 | case 'K': | |
1149 | mult = KILOBYTE; | |
1150 | break; | |
1151 | case 'm': | |
1152 | case 'M': | |
1153 | mult = MEGABYTE; | |
1154 | break; | |
1155 | case 'g': | |
1156 | case 'G': | |
1157 | mult = GIGABYTE; | |
1158 | break; | |
1159 | default: | |
1160 | return (-1); | |
1161 | } | |
1162 | ||
1163 | for (i = 0; i < len - 1; i++) | |
1164 | if (!isdigit(arg[i])) | |
1165 | return (-1); | |
1166 | } | |
1167 | ||
1168 | return (mult * atoi(arg)); | |
1169 | } | |
1170 | ||
1171 | static void | |
1172 | print_bar(long count, long total) | |
1173 | { | |
1174 | int i; | |
1175 | ||
1176 | (void) putchar_unlocked(count ? '*' : ' '); | |
1177 | for (i = 1; i < (32 * count) / total; i++) | |
1178 | (void) putchar_unlocked('*'); | |
1179 | for (; i < 32; i++) | |
1180 | (void) putchar_unlocked(' '); | |
1181 | } | |
1182 | ||
1183 | static int | |
1184 | doublecmp(const void *p1, const void *p2) | |
1185 | { | |
1186 | double a = *((double *)p1); | |
1187 | double b = *((double *)p2); | |
1188 | ||
1189 | if (a > b) | |
1190 | return (1); | |
1191 | if (a < b) | |
1192 | return (-1); | |
1193 | return (0); | |
1194 | } | |
1195 | ||
1196 | static void | |
1197 | print_histo(barrier_t *b) | |
1198 | { | |
1199 | int n; | |
1200 | int i; | |
1201 | int j; | |
1202 | int last; | |
1203 | long long maxcount; | |
1204 | double sum; | |
1205 | long long min; | |
1206 | long long scale; | |
1207 | double x; | |
1208 | long long y; | |
1209 | long long count; | |
1210 | int i95; | |
1211 | double p95; | |
1212 | double r95; | |
1213 | double m95; | |
1214 | histo_t *histo; | |
1215 | ||
1216 | (void) printf("# %12s %12s %32s %12s\n", "counts", "usecs/call", | |
1217 | "", "means"); | |
1218 | ||
1219 | /* calculate how much data we've captured */ | |
1220 | n = b->ba_batches > b->ba_datasize ? b->ba_datasize : b->ba_batches; | |
1221 | ||
1222 | /* find the 95th percentile - index, value and range */ | |
1223 | qsort((void *)b->ba_data, n, sizeof (double), doublecmp); | |
1224 | min = b->ba_data[0] + 0.000001; | |
1225 | i95 = n * 95 / 100; | |
1226 | p95 = b->ba_data[i95]; | |
1227 | r95 = p95 - min + 1; | |
1228 | ||
1229 | /* find a suitable min and scale */ | |
1230 | i = 0; | |
1231 | x = r95 / (HISTOSIZE - 1); | |
1232 | while (x >= 10.0) { | |
1233 | x /= 10.0; | |
1234 | i++; | |
1235 | } | |
1236 | y = x + 0.9999999999; | |
1237 | while (i > 0) { | |
1238 | y *= 10; | |
1239 | i--; | |
1240 | } | |
1241 | min /= y; | |
1242 | min *= y; | |
1243 | scale = y * (HISTOSIZE - 1); | |
1244 | if (scale < (HISTOSIZE - 1)) { | |
1245 | scale = (HISTOSIZE - 1); | |
1246 | } | |
1247 | ||
1248 | /* create and initialise the histogram */ | |
1249 | histo = malloc(HISTOSIZE * sizeof (histo_t)); | |
1250 | for (i = 0; i < HISTOSIZE; i++) { | |
1251 | histo[i].sum = 0.0; | |
1252 | histo[i].count = 0; | |
1253 | } | |
1254 | ||
1255 | /* populate the histogram */ | |
1256 | last = 0; | |
1257 | sum = 0.0; | |
1258 | count = 0; | |
1259 | for (i = 0; i < i95; i++) { | |
1260 | j = (HISTOSIZE - 1) * (b->ba_data[i] - min) / scale; | |
1261 | ||
1262 | if (j >= HISTOSIZE) { | |
1263 | (void) printf("panic!\n"); | |
1264 | j = HISTOSIZE - 1; | |
1265 | } | |
1266 | ||
1267 | histo[j].sum += b->ba_data[i]; | |
1268 | histo[j].count++; | |
1269 | ||
1270 | sum += b->ba_data[i]; | |
1271 | count++; | |
1272 | } | |
1273 | m95 = sum / count; | |
1274 | ||
1275 | /* find the larges bucket */ | |
1276 | maxcount = 0; | |
1277 | for (i = 0; i < HISTOSIZE; i++) | |
1278 | if (histo[i].count > 0) { | |
1279 | last = i; | |
1280 | if (histo[i].count > maxcount) | |
1281 | maxcount = histo[i].count; | |
1282 | } | |
1283 | ||
1284 | /* print the buckets */ | |
1285 | for (i = 0; i <= last; i++) { | |
1286 | (void) printf("# %12lld %12.5f |", histo[i].count, | |
1287 | (min + scale * (double)i / (HISTOSIZE - 1))); | |
1288 | ||
1289 | print_bar(histo[i].count, maxcount); | |
1290 | ||
1291 | if (histo[i].count > 0) | |
1292 | (void) printf("%12.5f\n", | |
1293 | histo[i].sum / histo[i].count); | |
1294 | else | |
1295 | (void) printf("%12s\n", "-"); | |
1296 | } | |
1297 | ||
1298 | /* find the mean of values beyond the 95th percentile */ | |
1299 | sum = 0.0; | |
1300 | count = 0; | |
1301 | for (i = i95; i < n; i++) { | |
1302 | sum += b->ba_data[i]; | |
1303 | count++; | |
1304 | } | |
1305 | ||
1306 | /* print the >95% bucket summary */ | |
1307 | (void) printf("#\n"); | |
1308 | (void) printf("# %12lld %12s |", count, "> 95%"); | |
1309 | print_bar(count, maxcount); | |
1310 | if (count > 0) | |
1311 | (void) printf("%12.5f\n", sum / count); | |
1312 | else | |
1313 | (void) printf("%12s\n", "-"); | |
1314 | (void) printf("#\n"); | |
1315 | (void) printf("# %12s %12.5f\n", "mean of 95%", m95); | |
1316 | (void) printf("# %12s %12.5f\n", "95th %ile", p95); | |
1317 | ||
1318 | /* quantify any buffer overflow */ | |
1319 | if (b->ba_batches > b->ba_datasize) | |
1320 | (void) printf("# %12s %12d\n", "data dropped", | |
1321 | b->ba_batches - b->ba_datasize); | |
1322 | } | |
1323 | ||
1324 | static void | |
1325 | compute_stats(barrier_t *b) | |
1326 | { | |
1327 | int i; | |
1328 | ||
1329 | if (b->ba_batches > b->ba_datasize) | |
1330 | b->ba_batches = b->ba_datasize; | |
1331 | ||
1332 | /* | |
1333 | * convert to usecs/call | |
1334 | */ | |
1335 | ||
1336 | for (i = 0; i < b->ba_batches; i++) | |
1337 | b->ba_data[i] /= 1000.0; | |
1338 | ||
1339 | /* | |
1340 | * do raw stats | |
1341 | */ | |
1342 | ||
1343 | (void) crunch_stats(b->ba_data, b->ba_batches, &b->ba_raw); | |
1344 | ||
1345 | /* | |
1346 | * recursively apply 3 sigma rule to remove outliers | |
1347 | */ | |
1348 | ||
1349 | b->ba_corrected = b->ba_raw; | |
1350 | b->ba_outliers = 0; | |
1351 | ||
1352 | if (b->ba_batches > 40) { /* remove outliers */ | |
1353 | int removed; | |
1354 | ||
1355 | do { | |
1356 | removed = remove_outliers(b->ba_data, b->ba_batches, | |
1357 | &b->ba_corrected); | |
1358 | b->ba_outliers += removed; | |
1359 | b->ba_batches -= removed; | |
1360 | (void) crunch_stats(b->ba_data, b->ba_batches, | |
1361 | &b->ba_corrected); | |
1362 | } while (removed != 0 && b->ba_batches > 40); | |
1363 | } | |
1364 | ||
1365 | } | |
1366 | ||
1367 | /* | |
1368 | * routine to compute various statistics on array of doubles. | |
1369 | */ | |
1370 | ||
1371 | static int | |
1372 | crunch_stats(double *data, int count, stats_t *stats) | |
1373 | { | |
1374 | double a; | |
1375 | double std; | |
1376 | double diff; | |
1377 | double sk; | |
1378 | double ku; | |
1379 | double mean; | |
1380 | int i; | |
1381 | int bytes; | |
1382 | double *dupdata; | |
1383 | ||
1384 | /* | |
1385 | * first we need the mean | |
1386 | */ | |
1387 | ||
1388 | mean = 0.0; | |
1389 | ||
1390 | for (i = 0; i < count; i++) { | |
1391 | mean += data[i]; | |
1392 | } | |
1393 | ||
1394 | mean /= count; | |
1395 | ||
1396 | stats->st_mean = mean; | |
1397 | ||
1398 | /* | |
1399 | * malloc and sort so we can do median | |
1400 | */ | |
1401 | ||
1402 | dupdata = malloc(bytes = sizeof (double) * count); | |
1403 | (void) memcpy(dupdata, data, bytes); | |
1404 | qsort((void *)dupdata, count, sizeof (double), doublecmp); | |
1405 | stats->st_median = dupdata[count/2]; | |
1406 | ||
1407 | /* | |
1408 | * reuse dupdata to compute time correlation of data to | |
1409 | * detect interesting time-based trends | |
1410 | */ | |
1411 | ||
1412 | for (i = 0; i < count; i++) | |
1413 | dupdata[i] = (double)i; | |
1414 | ||
1415 | (void) fit_line(dupdata, data, count, &a, &stats->st_timecorr); | |
1416 | free(dupdata); | |
1417 | ||
1418 | std = 0.0; | |
1419 | sk = 0.0; | |
1420 | ku = 0.0; | |
1421 | ||
1422 | stats->st_max = -1; | |
1423 | stats->st_min = 1.0e99; /* hard to find portable values */ | |
1424 | ||
1425 | for (i = 0; i < count; i++) { | |
1426 | if (data[i] > stats->st_max) | |
1427 | stats->st_max = data[i]; | |
1428 | if (data[i] < stats->st_min) | |
1429 | stats->st_min = data[i]; | |
1430 | ||
1431 | diff = data[i] - mean; | |
1432 | std += diff * diff; | |
1433 | sk += diff * diff * diff; | |
1434 | ku += diff * diff * diff * diff; | |
1435 | } | |
1436 | ||
1437 | stats->st_stddev = std = sqrt(std/(double)(count - 1)); | |
1438 | stats->st_stderr = std / sqrt(count); | |
1439 | stats->st_99confidence = stats->st_stderr * 2.326; | |
1440 | stats->st_skew = sk / (std * std * std) / (double)(count); | |
1441 | stats->st_kurtosis = ku / (std * std * std * std) / | |
1442 | (double)(count) - 3; | |
1443 | ||
1444 | return (0); | |
1445 | } | |
1446 | ||
1447 | /* | |
1448 | * does a least squares fit to the set of points x, y and | |
1449 | * fits a line y = a + bx. Returns a, b | |
1450 | */ | |
1451 | ||
1452 | int | |
1453 | fit_line(double *x, double *y, int count, double *a, double *b) | |
1454 | { | |
1455 | double sumx, sumy, sumxy, sumx2; | |
1456 | double denom; | |
1457 | int i; | |
1458 | ||
1459 | sumx = sumy = sumxy = sumx2 = 0.0; | |
1460 | ||
1461 | for (i = 0; i < count; i++) { | |
1462 | sumx += x[i]; | |
1463 | sumx2 += x[i] * x[i]; | |
1464 | sumy += y[i]; | |
1465 | sumxy += x[i] * y[i]; | |
1466 | } | |
1467 | ||
1468 | denom = count * sumx2 - sumx * sumx; | |
1469 | ||
1470 | if (denom == 0.0) | |
1471 | return (-1); | |
1472 | ||
1473 | *a = (sumy * sumx2 - sumx * sumxy) / denom; | |
1474 | ||
1475 | *b = (count * sumxy - sumx * sumy) / denom; | |
1476 | ||
1477 | return (0); | |
1478 | } | |
1479 | ||
1480 | /* | |
1481 | * empty function for measurement purposes | |
1482 | */ | |
1483 | ||
1484 | int | |
1485 | nop() | |
1486 | { | |
1487 | return (1); | |
1488 | } | |
1489 | ||
1490 | #define NSECITER 1000 | |
1491 | ||
1492 | static long long | |
1493 | get_nsecs_overhead() | |
1494 | { | |
1495 | long long s; | |
1496 | ||
1497 | double data[NSECITER]; | |
1498 | stats_t stats; | |
1499 | ||
1500 | int i; | |
1501 | int count; | |
1502 | int outliers; | |
1503 | ||
1504 | (void) getnsecs(); /* warmup */ | |
1505 | (void) getnsecs(); /* warmup */ | |
1506 | (void) getnsecs(); /* warmup */ | |
1507 | ||
1508 | i = 0; | |
1509 | ||
1510 | count = NSECITER; | |
1511 | ||
1512 | for (i = 0; i < count; i++) { | |
1513 | s = getnsecs(); | |
1514 | data[i] = getnsecs() - s; | |
1515 | } | |
1516 | ||
1517 | (void) crunch_stats(data, count, &stats); | |
1518 | ||
1519 | while ((outliers = remove_outliers(data, count, &stats)) != 0) { | |
1520 | count -= outliers; | |
1521 | (void) crunch_stats(data, count, &stats); | |
1522 | } | |
1523 | ||
1524 | return ((long long)stats.st_mean); | |
1525 | ||
1526 | } | |
1527 | ||
1528 | long long | |
1529 | get_nsecs_resolution() | |
1530 | { | |
1531 | long long y[1000]; | |
1532 | ||
1533 | int i, j, nops, res; | |
1534 | long long start, stop; | |
1535 | ||
1536 | /* | |
1537 | * first, figure out how many nops to use | |
1538 | * to get any delta between time measurements. | |
1539 | * use a minimum of one. | |
1540 | */ | |
1541 | ||
1542 | /* | |
1543 | * warm cache | |
1544 | */ | |
1545 | ||
1546 | stop = start = getnsecs(); | |
1547 | ||
1548 | for (i = 1; i < 10000000; i++) { | |
1549 | start = getnsecs(); | |
1550 | for (j = i; j; j--) | |
1551 | ; | |
1552 | stop = getnsecs(); | |
1553 | if (stop > start) | |
1554 | break; | |
1555 | } | |
1556 | ||
1557 | nops = i; | |
1558 | ||
1559 | /* | |
1560 | * now collect data at linearly varying intervals | |
1561 | */ | |
1562 | ||
1563 | for (i = 0; i < 1000; i++) { | |
1564 | start = getnsecs(); | |
1565 | for (j = nops * i; j; j--) | |
1566 | ; | |
1567 | stop = getnsecs(); | |
1568 | y[i] = stop - start; | |
1569 | } | |
1570 | ||
1571 | /* | |
1572 | * find smallest positive difference between samples; | |
1573 | * this is the timer resolution | |
1574 | */ | |
1575 | ||
1576 | res = 1<<30; | |
1577 | ||
1578 | for (i = 1; i < 1000; i++) { | |
1579 | int diff = y[i] - y[i-1]; | |
1580 | ||
1581 | if (diff > 0 && res > diff) | |
1582 | res = diff; | |
1583 | ||
1584 | } | |
1585 | ||
1586 | return (res); | |
1587 | } | |
1588 | ||
1589 | /* | |
1590 | * remove any data points from the array more than 3 sigma out | |
1591 | */ | |
1592 | ||
1593 | static int | |
1594 | remove_outliers(double *data, int count, stats_t *stats) | |
1595 | { | |
1596 | double outmin = stats->st_mean - 3 * stats->st_stddev; | |
1597 | double outmax = stats->st_mean + 3 * stats->st_stddev; | |
1598 | ||
1599 | int i, j, outliers; | |
1600 | ||
1601 | for (outliers = i = j = 0; i < count; i++) | |
1602 | if (data[i] > outmax || data[i] < outmin) | |
1603 | outliers++; | |
1604 | else | |
1605 | data[j++] = data[i]; | |
1606 | ||
1607 | return (outliers); | |
1608 | } |