4 * The contents of this file are subject to the terms
5 * of the Common Development and Distribution License
6 * (the "License"). You may not use this file except
7 * in compliance with the License.
9 * You can obtain a copy of the license at
10 * src/OPENSOLARIS.LICENSE
11 * or http://www.opensolaris.org/os/licensing.
12 * See the License for the specific language governing
13 * permissions and limitations under the License.
15 * When distributing Covered Code, include this CDDL
16 * HEADER in each file and include the License file at
17 * usr/src/OPENSOLARIS.LICENSE. If applicable,
18 * add the following below this CDDL HEADER, with the
19 * fields enclosed by brackets "[]" replaced with your
20 * own identifying information: Portions Copyright [yyyy]
21 * [name of copyright owner]
27 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
32 * benchmarking routines
35 #include <sys/types.h>
52 #include <sys/resource.h>
63 #if defined(__APPLE__)
64 #include <mach/mach_time.h>
70 static long long start
;
71 static mach_timebase_info_data_t sTimebaseInfo
= { 0, 0 };
73 // If this is the first time we've run, get the timebase.
74 // We can use denom == 0 to indicate that sTimebaseInfo is
75 // uninitialised because it makes no sense to have a zero
76 // denominator in a fraction.
78 if ( sTimebaseInfo
.denom
== 0 ) {
79 (void) mach_timebase_info(&sTimebaseInfo
);
80 start
= mach_absolute_time();
83 elapsed
= mach_absolute_time() - start
;
85 // Convert to nanoseconds.
86 // return (elapsed * (long long)sTimebaseInfo.numer)/(long long)sTimebaseInfo.denom;
88 // Provided the final result is representable in 64 bits the following maneuver will
89 // deliver that result without intermediate overflow.
90 if (sTimebaseInfo
.denom
== sTimebaseInfo
.numer
)
92 else if (sTimebaseInfo
.denom
== 1)
93 return elapsed
* (long long)sTimebaseInfo
.numer
;
95 // Decompose elapsed = eta32 * 2^32 + eps32:
96 long long eta32
= elapsed
>> 32;
97 long long eps32
= elapsed
& 0x00000000ffffffffLL
;
99 long long numer
= sTimebaseInfo
.numer
, denom
= sTimebaseInfo
.denom
;
101 // Form product of elapsed64 (decomposed) and numer:
102 long long mu64
= numer
* eta32
;
103 long long lambda64
= numer
* eps32
;
105 // Divide the constituents by denom:
106 long long q32
= mu64
/denom
;
107 long long r32
= mu64
- (q32
* denom
); // mu64 % denom
109 return (q32
<< 32) + ((r32
<< 32) + lambda64
)/denom
;
116 * user visible globals
120 char ** lm_argv
= NULL
;
139 int lm_defB
= 0; /* use lm_nsecs_per_op */
142 char *lm_defN
= NULL
;
149 * default on fast platform, should be overridden by individual
150 * benchmarks if significantly wrong in either direction.
153 int lm_nsecs_per_op
= 5;
156 char lm_procname
[STRSIZE
];
157 char lm_usage
[STRSIZE
];
158 char lm_optstr
[STRSIZE
];
159 char lm_header
[STRSIZE
];
160 size_t lm_tsdsize
= 0;
164 * Globals we do not export to the user
167 static barrier_t
*lm_barrier
;
168 static pid_t
*pids
= NULL
;
169 static pthread_t
*tids
= NULL
;
170 static int pindex
= -1;
171 static void *tsdseg
= NULL
;
172 static size_t tsdsize
= 0;
175 static long long lm_hz
= 0;
183 static void worker_process();
185 static void print_stats(barrier_t
*);
186 static void print_histo(barrier_t
*);
187 static int remove_outliers(double *, int, stats_t
*);
188 static long long nsecs_overhead
;
189 static long long nsecs_resolution
;
190 static long long get_nsecs_overhead();
191 static int crunch_stats(double *, int, stats_t
*);
192 static void compute_stats(barrier_t
*);
194 * main routine; renamed in this file to allow linking with other
199 actual_main(int argc
, char *argv
[])
207 long long startnsecs
= getnsecs();
210 if (getenv("LIBMICRO_HZ") == NULL
) {
211 (void) printf("LIBMICRO_HZ needed but not set\n");
214 lm_hz
= strtoll(getenv("LIBMICRO_HZ"), NULL
, 10);
220 /* before we do anything */
221 (void) benchmark_init();
224 nsecs_overhead
= get_nsecs_overhead();
225 nsecs_resolution
= get_nsecs_resolution();
242 * squirrel away the path to the current
243 * binary in a way that works on both
247 if (*argv
[0] == '/') {
248 lm_procpath
= strdup(argv
[0]);
249 *strrchr(lm_procpath
, '/') = 0;
252 (void) getcwd(path
, 1024);
253 (void) strcat(path
, "/");
254 (void) strcat(path
, argv
[0]);
255 *strrchr(path
, '/') = 0;
256 lm_procpath
= strdup(path
);
263 if ((tmp
= strrchr(argv
[0], '/')) == NULL
)
264 (void) strcpy(lm_procname
, argv
[0]);
266 (void) strcpy(lm_procname
, tmp
+ 1);
268 if (lm_optN
== NULL
) {
269 lm_optN
= lm_procname
;
273 * Parse command line arguments
276 (void) sprintf(optstr
, "1AB:C:D:EHI:LMN:P:RST:VW?%s", lm_optstr
);
277 while ((opt
= getopt(argc
, argv
, optstr
)) != -1) {
286 lm_optB
= sizetoint(optarg
);
289 lm_optC
= sizetoint(optarg
);
292 lm_optD
= sizetoint(optarg
);
301 lm_optI
= sizetoint(optarg
);
313 lm_optP
= sizetoint(optarg
);
319 lm_optT
= sizetoint(optarg
);
322 (void) printf("%s\n", LIBMICRO_VERSION
);
334 if (benchmark_optswitch(opt
, optarg
) == -1) {
341 /* deal with implicit and overriding options */
342 if (lm_opt1
&& lm_optP
> 1) {
344 (void) printf("warning: -1 overrides -P\n");
348 (void) fprintf(stderr
, "Running:%20s", lm_optN
);
349 (void) fflush(stderr
);
354 * neither benchmark or user has specified the number
355 * of cnts/sample, so use computed value
358 lm_nsecs_per_op
= lm_optI
;
359 #define BLOCK_TOCK_DURATION 10000 /* number of raw timer "tocks" ideally comprising a block of work */
360 lm_optB
= nsecs_resolution
* BLOCK_TOCK_DURATION
/ lm_nsecs_per_op
;
366 * now that the options are set
369 if (benchmark_initrun() == -1) {
373 /* allocate dynamic data */
374 pids
= (pid_t
*)malloc(lm_optP
* sizeof (pid_t
));
376 perror("malloc(pids)");
379 tids
= (pthread_t
*)malloc(lm_optT
* sizeof (pthread_t
));
381 perror("malloc(tids)");
385 /* check that the case defines lm_tsdsize before proceeding */
386 if (lm_tsdsize
== (size_t)-1) {
387 (void) fprintf(stderr
, "error in benchmark_init: "
388 "lm_tsdsize not set\n");
392 /* round up tsdsize to nearest 128 to eliminate false sharing */
393 tsdsize
= ((lm_tsdsize
+ 127) / 128) * 128;
395 /* allocate sufficient TSD for each thread in each process */
396 tsdseg
= (void *)mmap(NULL
, lm_optT
* lm_optP
* tsdsize
+ 8192,
397 PROT_READ
| PROT_WRITE
, MAP_SHARED
| MAP_ANON
, -1, 0L);
398 if (tsdseg
== NULL
) {
403 /* initialise worker synchronisation */
404 b
= barrier_create(lm_optT
* lm_optP
, DATASIZE
);
406 perror("barrier_create()");
412 /* need this here so that parent and children can call exit() */
413 (void) fflush(stdout
);
414 (void) fflush(stderr
);
416 /* when we started and when to stop */
418 b
->ba_starttime
= getnsecs();
419 b
->ba_deadline
= (long long) (b
->ba_starttime
+ (lm_optD
* 1000000LL));
423 /* single process, non-fork mode */
427 /* create worker processes */
428 for (i
= 0; i
< lm_optP
; i
++) {
446 /* wait for worker processes */
447 for (i
= 0; i
< lm_optP
; i
++) {
449 (void) waitpid(pids
[i
], NULL
, 0);
454 b
->ba_endtime
= getnsecs();
456 /* compute results */
460 /* print arguments benchmark was invoked with ? */
463 (void) printf("# %s ", argv
[0]);
464 for (l
= 1; l
< argc
; l
++) {
465 (void) printf("%s ", argv
[l
]);
470 /* print result header (unless suppressed) */
472 (void) printf("%12s %3s %3s %12s %12s %8s %8s %s\n",
475 "samples", "errors", "cnt/samp", lm_header
);
480 (void) printf("%-12s %3d %3d %12.5f %12d %8lld %8d %s\n",
481 lm_optN
, lm_optP
, lm_optT
,
482 (lm_optM
?b
->ba_corrected
.st_mean
:b
->ba_corrected
.st_median
),
483 b
->ba_batches
, b
->ba_errors
, lm_optB
,
490 /* just incase something goes awry */
491 (void) fflush(stdout
);
492 (void) fflush(stderr
);
494 /* cleanup by stages */
495 (void) benchmark_finirun();
496 (void) barrier_destroy(b
);
497 (void) benchmark_fini();
500 (void) fprintf(stderr
, " for %12.5f seconds\n",
501 (double)(getnsecs() - startnsecs
) /
503 (void) fflush(stderr
);
509 worker_thread(void *arg
)
512 long long last_sleep
= 0;
515 r
.re_errors
= benchmark_initworker(arg
);
517 while (lm_barrier
->ba_flag
) {
519 r
.re_errors
+= benchmark_initbatch(arg
);
523 if (lm_optA
&& ((t
= getnsecs()) - last_sleep
) > 75000000LL) {
524 (void) poll(0, 0, 10);
527 /* wait for it ... */
528 (void) barrier_queue(lm_barrier
, NULL
);
531 r
.re_t0
= getnsecs();
532 (void) benchmark(arg
, &r
);
533 r
.re_t1
= getnsecs();
536 if (r
.re_t1
> lm_barrier
->ba_deadline
&&
537 (!lm_optC
|| lm_optC
< lm_barrier
->ba_batches
)) {
538 lm_barrier
->ba_flag
= 0;
541 /* record results and sync */
542 (void) barrier_queue(lm_barrier
, &r
);
544 (void) benchmark_finibatch(arg
);
549 (void) benchmark_finiworker(arg
);
560 for (i
= 1; i
< lm_optT
; i
++) {
561 tsd
= gettsd(pindex
, i
);
562 if (pthread_create(&tids
[i
], NULL
, worker_thread
, tsd
) != 0) {
563 perror("pthread_create");
568 tsd
= gettsd(pindex
, 0);
569 (void) worker_thread(tsd
);
571 for (i
= 1; i
< lm_optT
; i
++) {
572 (void) pthread_join(tids
[i
], NULL
);
581 " [-1] (single process; overrides -P > 1)\n"
582 " [-A] (align with clock)\n"
583 " [-B batch-size (default %d)]\n"
584 " [-C minimum number of samples (default 0)]\n"
585 " [-D duration in msecs (default %ds)]\n"
586 " [-E (echo name to stderr)]\n"
587 " [-H] (suppress headers)\n"
588 " [-I] nsecs per op (used to compute batch size)"
589 " [-L] (print argument line)\n"
590 " [-M] (reports mean rather than median)\n"
591 " [-N test-name (default '%s')]\n"
592 " [-P processes (default %d)]\n"
593 " [-S] (print detailed stats)\n"
594 " [-T threads (default %d)]\n"
595 " [-V] (print the libMicro version and exit)\n"
596 " [-W] (flag possible benchmark problems)\n"
599 lm_defB
, lm_defD
, lm_procname
, lm_defP
, lm_defT
,
604 print_warnings(barrier_t
*b
)
611 (void) printf("#\n# WARNINGS\n");
613 increase
= (int)(floor((nsecs_resolution
* 100.0) /
614 ((double)lm_optB
* b
->ba_corrected
.st_median
* 1000.0)) +
616 (void) printf("# Quantization error likely;"
617 "increase batch size (-B option) %dX to avoid.\n",
622 * XXX should warn on median != mean by a lot
627 (void) printf("#\n# WARNINGS\n");
629 (void) printf("# Errors occured during benchmark.\n");
634 print_stats(barrier_t
*b
)
636 (void) printf("#\n");
637 (void) printf("# STATISTICS %12s %12s\n",
639 "usecs/call (outliers removed)");
641 if (b
->ba_count
== 0) {
642 (void) printf("zero samples\n");
646 (void) printf("# min %12.5f %12.5f\n",
648 b
->ba_corrected
.st_min
);
650 (void) printf("# max %12.5f %12.5f\n",
652 b
->ba_corrected
.st_max
);
653 (void) printf("# mean %12.5f %12.5f\n",
655 b
->ba_corrected
.st_mean
);
656 (void) printf("# median %12.5f %12.5f\n",
658 b
->ba_corrected
.st_median
);
659 (void) printf("# stddev %12.5f %12.5f\n",
661 b
->ba_corrected
.st_stddev
);
662 (void) printf("# standard error %12.5f %12.5f\n",
664 b
->ba_corrected
.st_stderr
);
665 (void) printf("# 99%% confidence level %12.5f %12.5f\n",
666 b
->ba_raw
.st_99confidence
,
667 b
->ba_corrected
.st_99confidence
);
668 (void) printf("# skew %12.5f %12.5f\n",
670 b
->ba_corrected
.st_skew
);
671 (void) printf("# kurtosis %12.5f %12.5f\n",
672 b
->ba_raw
.st_kurtosis
,
673 b
->ba_corrected
.st_kurtosis
);
675 (void) printf("# time correlation %12.5f %12.5f\n",
676 b
->ba_raw
.st_timecorr
,
677 b
->ba_corrected
.st_timecorr
);
678 (void) printf("#\n");
680 (void) printf("# elasped time %12.5f\n", (b
->ba_endtime
-
681 b
->ba_starttime
) / 1.0e9
);
682 (void) printf("# number of samples %12d\n", b
->ba_batches
);
683 (void) printf("# number of outliers %12d\n", b
->ba_outliers
);
684 (void) printf("# getnsecs overhead %12d\n", (int)nsecs_overhead
);
686 (void) printf("#\n");
687 (void) printf("# DISTRIBUTION\n");
697 update_stats(barrier_t
*b
, result_t
*r
)
700 double nsecs_per_call
;
702 if (b
->ba_waiters
== 0) {
703 /* first thread only */
709 /* all but first thread */
710 if (r
->re_t0
< b
->ba_t0
) {
713 if (r
->re_t1
> b
->ba_t1
) {
718 b
->ba_count0
+= r
->re_count
;
719 b
->ba_errors0
+= r
->re_errors
;
721 if (b
->ba_waiters
== b
->ba_hwm
- 1) {
722 /* last thread only */
725 time
= (double)b
->ba_t1
- (double)b
->ba_t0
-
726 (double)nsecs_overhead
;
728 if (time
< 100 * nsecs_resolution
)
732 * normalize by procs * threads if not -U
735 nsecs_per_call
= time
/ (double)b
->ba_count0
*
736 (double)(lm_optT
* lm_optP
);
738 b
->ba_count
+= b
->ba_count0
;
739 b
->ba_errors
+= b
->ba_errors0
;
741 b
->ba_data
[b
->ba_batches
% b
->ba_datasize
] =
750 barrier_create(int hwm
, int datasize
)
756 b
= (barrier_t
*)mmap(NULL
,
757 sizeof (barrier_t
) + (datasize
- 1) * sizeof (double),
758 PROT_READ
| PROT_WRITE
,
759 MAP_SHARED
| MAP_ANON
, -1, 0L);
760 if (b
== (barrier_t
*)MAP_FAILED
) {
763 b
->ba_datasize
= datasize
;
767 b
->ba_semid
= semget(IPC_PRIVATE
, 3, 0600);
768 if (b
->ba_semid
== -1) {
769 (void) munmap((void *)b
, sizeof (barrier_t
));
773 /* [hwm - 1, 0, 0] */
775 s
[0].sem_op
= hwm
- 1;
777 if (semop(b
->ba_semid
, s
, 1) == -1) {
779 (void) semctl(b
->ba_semid
, 0, IPC_RMID
);
780 (void) munmap((void *)b
, sizeof (barrier_t
));
794 barrier_destroy(barrier_t
*b
)
796 (void) semctl(b
->ba_semid
, 0, IPC_RMID
);
797 (void) munmap((void *)b
, sizeof (barrier_t
));
803 barrier_queue(barrier_t
*b
, result_t
*r
)
809 * if ! nowait {s1(-(hwm-1))}
810 * (all other threads)
811 * update shared stats
816 * update shared stats
821 s
[0].sem_op
= -(b
->ba_hwm
- 1);
823 if (semop(b
->ba_semid
, s
, 1) == -1) {
829 s
[0].sem_op
= -(b
->ba_hwm
- 1);
830 s
[0].sem_flg
= IPC_NOWAIT
;
831 if (semop(b
->ba_semid
, s
, 1) == -1) {
832 if (errno
!= EAGAIN
) {
837 /* all but the last thread */
846 s
[0].sem_op
= b
->ba_hwm
- 1;
851 if (semop(b
->ba_semid
, s
, 2) == -1) {
862 if (semop(b
->ba_semid
, s
, 2) == -1) {
868 /* the last thread */
878 s
[0].sem_op
= b
->ba_hwm
- 1;
880 if (semop(b
->ba_semid
, s
, 1) == -1) {
889 #else /* USE_SEMOP */
892 barrier_create(int hwm
, int datasize
)
894 pthread_mutexattr_t attr
;
895 pthread_condattr_t cattr
;
899 b
= (barrier_t
*)mmap(NULL
,
900 sizeof (barrier_t
) + (datasize
- 1) * sizeof (double),
901 PROT_READ
| PROT_WRITE
,
902 MAP_SHARED
| MAP_ANON
, -1, 0L);
903 if (b
== (barrier_t
*)MAP_FAILED
) {
906 b
->ba_datasize
= datasize
;
911 (void) pthread_mutexattr_init(&attr
);
912 (void) pthread_mutexattr_setpshared(&attr
, PTHREAD_PROCESS_SHARED
);
914 (void) pthread_condattr_init(&cattr
);
915 (void) pthread_condattr_setpshared(&cattr
, PTHREAD_PROCESS_SHARED
);
917 (void) pthread_mutex_init(&b
->ba_lock
, &attr
);
918 (void) pthread_cond_init(&b
->ba_cv
, &cattr
);
930 barrier_destroy(barrier_t
*b
)
932 (void) munmap((void *)b
, sizeof (barrier_t
));
938 barrier_queue(barrier_t
*b
, result_t
*r
)
942 (void) pthread_mutex_lock(&b
->ba_lock
);
951 if (b
->ba_hwm
== b
->ba_waiters
) {
954 (void) pthread_cond_broadcast(&b
->ba_cv
);
957 while (b
->ba_phase
== phase
) {
958 (void) pthread_cond_wait(&b
->ba_cv
, &b
->ba_lock
);
961 (void) pthread_mutex_unlock(&b
->ba_lock
);
964 #endif /* USE_SEMOP */
975 for (i
= 1; i
< lm_optT
; i
++) {
976 if (pthread_self() == tids
[i
]) {
993 if ((p
< 0) || (p
>= lm_optP
) || (t
< 0) || (t
>= lm_optT
))
996 return ((void *)((unsigned long)tsdseg
+
997 (((p
* lm_optT
) + t
) * tsdsize
)));
1000 #if defined(__APPLE__)
1002 gettsdindex(void *arg
){
1004 * gettindex() can race with pthread_create() filling in tids[].
1005 * This is an alternative approach to finding the calling thread's tsd in t
1008 return tsdsize
? ((unsigned long)arg
- (unsigned long)tsdseg
)/tsdsize
: 0;
1010 #endif /* __APPLE__ */
1012 #ifdef USE_GETHRTIME
1016 return (gethrtime());
1022 return (gethrtime() / 1000);
1025 #elif USE_RDTSC /* USE_GETHRTIME */
1027 __inline__
long long
1030 unsigned long long x
;
1031 __asm__
volatile(".byte 0x0f, 0x31" : "=A" (x
));
1038 return (rdtsc() * 1000000 / lm_hz
);
1044 return (rdtsc() * 1000000000 / lm_hz
);
1047 #else /* USE_GETHRTIME */
1054 (void) gettimeofday(&tv
, NULL
);
1056 return ((long long)tv
.tv_sec
* 1000000LL + (long long) tv
.tv_usec
);
1064 (void) gettimeofday(&tv
, NULL
);
1066 return ((long long)tv
.tv_sec
* 1000000000LL +
1067 (long long) tv
.tv_usec
* 1000LL);
1070 #endif /* USE_GETHRTIME */
1073 setfdlimit(int limit
)
1075 struct rlimit rlimit
;
1077 if (getrlimit(RLIMIT_NOFILE
, &rlimit
) < 0) {
1078 perror("getrlimit");
1082 if (rlimit
.rlim_cur
> limit
)
1083 return (0); /* no worries */
1085 rlimit
.rlim_cur
= limit
;
1087 if (rlimit
.rlim_max
< limit
)
1088 rlimit
.rlim_max
= limit
;
1090 if (setrlimit(RLIMIT_NOFILE
, &rlimit
) < 0) {
1091 perror("setrlimit");
1099 #define KILOBYTE 1024
1100 #define MEGABYTE (KILOBYTE * KILOBYTE)
1101 #define GIGABYTE (KILOBYTE * MEGABYTE)
1104 sizetoll(const char *arg
)
1106 int len
= strlen(arg
);
1110 if (len
&& isalpha(arg
[len
- 1])) {
1111 switch (arg
[len
- 1]) {
1129 for (i
= 0; i
< len
- 1; i
++)
1130 if (!isdigit(arg
[i
]))
1134 return (mult
* strtoll(arg
, NULL
, 10));
1138 sizetoint(const char *arg
)
1140 int len
= strlen(arg
);
1144 if (len
&& isalpha(arg
[len
- 1])) {
1145 switch (arg
[len
- 1]) {
1163 for (i
= 0; i
< len
- 1; i
++)
1164 if (!isdigit(arg
[i
]))
1168 return (mult
* atoi(arg
));
1172 print_bar(long count
, long total
)
1176 (void) putchar_unlocked(count
? '*' : ' ');
1177 for (i
= 1; i
< (32 * count
) / total
; i
++)
1178 (void) putchar_unlocked('*');
1180 (void) putchar_unlocked(' ');
1184 doublecmp(const void *p1
, const void *p2
)
1186 double a
= *((double *)p1
);
1187 double b
= *((double *)p2
);
1197 print_histo(barrier_t
*b
)
1216 (void) printf("# %12s %12s %32s %12s\n", "counts", "usecs/call",
1219 /* calculate how much data we've captured */
1220 n
= b
->ba_batches
> b
->ba_datasize
? b
->ba_datasize
: b
->ba_batches
;
1222 /* find the 95th percentile - index, value and range */
1223 qsort((void *)b
->ba_data
, n
, sizeof (double), doublecmp
);
1224 min
= b
->ba_data
[0] + 0.000001;
1226 p95
= b
->ba_data
[i95
];
1227 r95
= p95
- min
+ 1;
1229 /* find a suitable min and scale */
1231 x
= r95
/ (HISTOSIZE
- 1);
1236 y
= x
+ 0.9999999999;
1243 scale
= y
* (HISTOSIZE
- 1);
1244 if (scale
< (HISTOSIZE
- 1)) {
1245 scale
= (HISTOSIZE
- 1);
1248 /* create and initialise the histogram */
1249 histo
= malloc(HISTOSIZE
* sizeof (histo_t
));
1250 for (i
= 0; i
< HISTOSIZE
; i
++) {
1255 /* populate the histogram */
1259 for (i
= 0; i
< i95
; i
++) {
1260 j
= (HISTOSIZE
- 1) * (b
->ba_data
[i
] - min
) / scale
;
1262 if (j
>= HISTOSIZE
) {
1263 (void) printf("panic!\n");
1267 histo
[j
].sum
+= b
->ba_data
[i
];
1270 sum
+= b
->ba_data
[i
];
1275 /* find the larges bucket */
1277 for (i
= 0; i
< HISTOSIZE
; i
++)
1278 if (histo
[i
].count
> 0) {
1280 if (histo
[i
].count
> maxcount
)
1281 maxcount
= histo
[i
].count
;
1284 /* print the buckets */
1285 for (i
= 0; i
<= last
; i
++) {
1286 (void) printf("# %12lld %12.5f |", histo
[i
].count
,
1287 (min
+ scale
* (double)i
/ (HISTOSIZE
- 1)));
1289 print_bar(histo
[i
].count
, maxcount
);
1291 if (histo
[i
].count
> 0)
1292 (void) printf("%12.5f\n",
1293 histo
[i
].sum
/ histo
[i
].count
);
1295 (void) printf("%12s\n", "-");
1298 /* find the mean of values beyond the 95th percentile */
1301 for (i
= i95
; i
< n
; i
++) {
1302 sum
+= b
->ba_data
[i
];
1306 /* print the >95% bucket summary */
1307 (void) printf("#\n");
1308 (void) printf("# %12lld %12s |", count
, "> 95%");
1309 print_bar(count
, maxcount
);
1311 (void) printf("%12.5f\n", sum
/ count
);
1313 (void) printf("%12s\n", "-");
1314 (void) printf("#\n");
1315 (void) printf("# %12s %12.5f\n", "mean of 95%", m95
);
1316 (void) printf("# %12s %12.5f\n", "95th %ile", p95
);
1318 /* quantify any buffer overflow */
1319 if (b
->ba_batches
> b
->ba_datasize
)
1320 (void) printf("# %12s %12d\n", "data dropped",
1321 b
->ba_batches
- b
->ba_datasize
);
1325 compute_stats(barrier_t
*b
)
1329 if (b
->ba_batches
> b
->ba_datasize
)
1330 b
->ba_batches
= b
->ba_datasize
;
1333 * convert to usecs/call
1336 for (i
= 0; i
< b
->ba_batches
; i
++)
1337 b
->ba_data
[i
] /= 1000.0;
1343 (void) crunch_stats(b
->ba_data
, b
->ba_batches
, &b
->ba_raw
);
1346 * recursively apply 3 sigma rule to remove outliers
1349 b
->ba_corrected
= b
->ba_raw
;
1352 if (b
->ba_batches
> 40) { /* remove outliers */
1356 removed
= remove_outliers(b
->ba_data
, b
->ba_batches
,
1358 b
->ba_outliers
+= removed
;
1359 b
->ba_batches
-= removed
;
1360 (void) crunch_stats(b
->ba_data
, b
->ba_batches
,
1362 } while (removed
!= 0 && b
->ba_batches
> 40);
1368 * routine to compute various statistics on array of doubles.
1372 crunch_stats(double *data
, int count
, stats_t
*stats
)
1385 * first we need the mean
1390 for (i
= 0; i
< count
; i
++) {
1396 stats
->st_mean
= mean
;
1399 * malloc and sort so we can do median
1402 dupdata
= malloc(bytes
= sizeof (double) * count
);
1403 (void) memcpy(dupdata
, data
, bytes
);
1404 qsort((void *)dupdata
, count
, sizeof (double), doublecmp
);
1405 stats
->st_median
= dupdata
[count
/2];
1408 * reuse dupdata to compute time correlation of data to
1409 * detect interesting time-based trends
1412 for (i
= 0; i
< count
; i
++)
1413 dupdata
[i
] = (double)i
;
1415 (void) fit_line(dupdata
, data
, count
, &a
, &stats
->st_timecorr
);
1423 stats
->st_min
= 1.0e99
; /* hard to find portable values */
1425 for (i
= 0; i
< count
; i
++) {
1426 if (data
[i
] > stats
->st_max
)
1427 stats
->st_max
= data
[i
];
1428 if (data
[i
] < stats
->st_min
)
1429 stats
->st_min
= data
[i
];
1431 diff
= data
[i
] - mean
;
1433 sk
+= diff
* diff
* diff
;
1434 ku
+= diff
* diff
* diff
* diff
;
1437 stats
->st_stddev
= std
= sqrt(std
/(double)(count
- 1));
1438 stats
->st_stderr
= std
/ sqrt(count
);
1439 stats
->st_99confidence
= stats
->st_stderr
* 2.326;
1440 stats
->st_skew
= sk
/ (std
* std
* std
) / (double)(count
);
1441 stats
->st_kurtosis
= ku
/ (std
* std
* std
* std
) /
1442 (double)(count
) - 3;
1448 * does a least squares fit to the set of points x, y and
1449 * fits a line y = a + bx. Returns a, b
1453 fit_line(double *x
, double *y
, int count
, double *a
, double *b
)
1455 double sumx
, sumy
, sumxy
, sumx2
;
1459 sumx
= sumy
= sumxy
= sumx2
= 0.0;
1461 for (i
= 0; i
< count
; i
++) {
1463 sumx2
+= x
[i
] * x
[i
];
1465 sumxy
+= x
[i
] * y
[i
];
1468 denom
= count
* sumx2
- sumx
* sumx
;
1473 *a
= (sumy
* sumx2
- sumx
* sumxy
) / denom
;
1475 *b
= (count
* sumxy
- sumx
* sumy
) / denom
;
1481 * empty function for measurement purposes
1490 #define NSECITER 1000
1493 get_nsecs_overhead()
1497 double data
[NSECITER
];
1504 (void) getnsecs(); /* warmup */
1505 (void) getnsecs(); /* warmup */
1506 (void) getnsecs(); /* warmup */
1512 for (i
= 0; i
< count
; i
++) {
1514 data
[i
] = getnsecs() - s
;
1517 (void) crunch_stats(data
, count
, &stats
);
1519 while ((outliers
= remove_outliers(data
, count
, &stats
)) != 0) {
1521 (void) crunch_stats(data
, count
, &stats
);
1524 return ((long long)stats
.st_mean
);
1529 get_nsecs_resolution()
1533 int i
, j
, nops
, res
;
1534 long long start
, stop
;
1537 * first, figure out how many nops to use
1538 * to get any delta between time measurements.
1539 * use a minimum of one.
1546 stop
= start
= getnsecs();
1548 for (i
= 1; i
< 10000000; i
++) {
1560 * now collect data at linearly varying intervals
1563 for (i
= 0; i
< 1000; i
++) {
1565 for (j
= nops
* i
; j
; j
--)
1568 y
[i
] = stop
- start
;
1572 * find smallest positive difference between samples;
1573 * this is the timer resolution
1578 for (i
= 1; i
< 1000; i
++) {
1579 int diff
= y
[i
] - y
[i
-1];
1581 if (diff
> 0 && res
> diff
)
1590 * remove any data points from the array more than 3 sigma out
1594 remove_outliers(double *data
, int count
, stats_t
*stats
)
1596 double outmin
= stats
->st_mean
- 3 * stats
->st_stddev
;
1597 double outmax
= stats
->st_mean
+ 3 * stats
->st_stddev
;
1601 for (outliers
= i
= j
= 0; i
< count
; i
++)
1602 if (data
[i
] > outmax
|| data
[i
] < outmin
)
1605 data
[j
++] = data
[i
];