tools/tests/libMicro/libmicro.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms
   5  * of the Common Development and Distribution License
   6  * (the "License").  You may not use this file except
   7  * in compliance with the License.
   8  *
   9  * You can obtain a copy of the license at
  10  * src/OPENSOLARIS.LICENSE
  11  * or http://www.opensolaris.org/os/licensing.
  12  * See the License for the specific language governing
  13  * permissions and limitations under the License.
  14  *
  15  * When distributing Covered Code, include this CDDL
  16  * HEADER in each file and include the License file at
  17  * usr/src/OPENSOLARIS.LICENSE.  If applicable,
  18  * add the following below this CDDL HEADER, with the
  19  * fields enclosed by brackets "[]" replaced with your
  20  * own identifying information: Portions Copyright [yyyy]
  21  * [name of copyright owner]
  22  *
  23  * CDDL HEADER END
  24  */
  25
  26 /*
  27  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30
  31 /*
  32  * benchmarking routines
  33  */
  34
  35 #include <sys/types.h>
  36 #include <sys/time.h>
  37 #include <sys/ipc.h>
  38 #include <sys/sem.h>
  39 #include <sys/mman.h>
  40 #include <sys/wait.h>
  41 #include <ctype.h>
  42 #include <string.h>
  43 #include <strings.h>
  44 #include <signal.h>
  45 #include <stdio.h>
  46 #include <unistd.h>
  47 #include <stdlib.h>
  48 #include <poll.h>
  49 #include <pthread.h>
  50 #include <dlfcn.h>
  51 #include <errno.h>
  52 #include <sys/resource.h>
  53 #include <math.h>
  54 #include <limits.h>
  55
  56 #ifdef  __sun
  57 #include <sys/elf.h>
  58 #endif
  59
  60 #include "libmicro.h"
  61
  62
  63 #if defined(__APPLE__)
  64 #include <mach/mach_time.h>
  65
  66 long long
  67 gethrtime(void)
  68 {
  69    long long        elapsed;
  70    static long long        start;
  71    static mach_timebase_info_data_t    sTimebaseInfo = { 0, 0 };
  72
  73    // If this is the first time we've run, get the timebase.
  74    // We can use denom == 0 to indicate that sTimebaseInfo is
  75    // uninitialised because it makes no sense to have a zero
  76    // denominator in a fraction.
  77
  78    if ( sTimebaseInfo.denom == 0 ) {
  79        (void) mach_timebase_info(&sTimebaseInfo);
  80                 start = mach_absolute_time();
  81    }
  82
  83    elapsed = mach_absolute_time() - start;
  84
  85    // Convert to nanoseconds.
  86         // return (elapsed * (long long)sTimebaseInfo.numer)/(long long)sTimebaseInfo.denom;
  87
  88         // Provided the final result is representable in 64 bits the following maneuver will
  89         // deliver that result without intermediate overflow.
  90         if (sTimebaseInfo.denom == sTimebaseInfo.numer)
  91                 return elapsed;
  92         else if (sTimebaseInfo.denom == 1)
  93                 return elapsed * (long long)sTimebaseInfo.numer;
  94         else {
  95        // Decompose elapsed = eta32 * 2^32 + eps32:
  96        long long eta32 = elapsed >> 32;
  97        long long eps32 = elapsed & 0x00000000ffffffffLL;
  98
  99        long long numer = sTimebaseInfo.numer, denom = sTimebaseInfo.denom;
 100
 101        // Form product of elapsed64 (decomposed) and numer:
 102        long long mu64 = numer * eta32;
 103        long long lambda64 = numer * eps32;
 104
 105        // Divide the constituents by denom:
 106        long long q32 = mu64/denom;
 107        long long r32 = mu64 - (q32 * denom); // mu64 % denom
 108
 109        return (q32 << 32) + ((r32 << 32) + lambda64)/denom;
 110         }
 111 }
 112
 113 #endif
 114
 115 /*
 116  * user visible globals
 117  */
 118
 119 int                             lm_argc = 0;
 120 char **                         lm_argv = NULL;
 121
 122 int                             lm_opt1;
 123 int                             lm_optA;
 124 int                             lm_optB;
 125 int                             lm_optC = 100;
 126 int                             lm_optD;
 127 int                             lm_optE;
 128 int                             lm_optH;
 129 int                             lm_optI;
 130 int                             lm_optL = 0;
 131 int                             lm_optM = 0;
 132 char                            *lm_optN;
 133 int                             lm_optP;
 134 int                             lm_optS;
 135 int                             lm_optT;
 136 int                             lm_optW;
 137
 138 int                             lm_def1 = 0;
 139 int                             lm_defB = 0; /* use lm_nsecs_per_op */
 140 int                             lm_defD = 10;
 141 int                             lm_defH = 0;
 142 char                            *lm_defN = NULL;
 143 int                             lm_defP = 1;
 144
 145 int                             lm_defS = 0;
 146 int                             lm_defT = 1;
 147
 148 /*
 149  * default on fast platform, should be overridden by individual
 150  * benchmarks if significantly wrong in either direction.
 151  */
 152
 153 int                             lm_nsecs_per_op = 5;
 154
 155 char                            *lm_procpath;
 156 char                            lm_procname[STRSIZE];
 157 char                            lm_usage[STRSIZE];
 158 char                            lm_optstr[STRSIZE];
 159 char                            lm_header[STRSIZE];
 160 size_t                          lm_tsdsize = 0;
 161
 162
 163 /*
 164  *  Globals we do not export to the user
 165  */
 166
 167 static barrier_t                *lm_barrier;
 168 static pid_t                    *pids = NULL;
 169 static pthread_t                *tids = NULL;
 170 static int                      pindex = -1;
 171 static void                     *tsdseg = NULL;
 172 static size_t                   tsdsize = 0;
 173
 174 #ifdef USE_RDTSC
 175 static long long                lm_hz = 0;
 176 #endif
 177
 178
 179 /*
 180  * Forward references
 181  */
 182
 183 static void             worker_process();
 184 static void             usage();
 185 static void             print_stats(barrier_t *);
 186 static void             print_histo(barrier_t *);
 187 static int              remove_outliers(double *, int, stats_t *);
 188 static long long        nsecs_overhead;
 189 static long long        nsecs_resolution;
 190 static long long        get_nsecs_overhead();
 191 static int              crunch_stats(double *, int, stats_t *);
 192 static void             compute_stats(barrier_t *);
 193 /*
 194  * main routine; renamed in this file to allow linking with other
 195  * files
 196  */
 197
 198 int
 199 actual_main(int argc, char *argv[])
 200 {
 201         int                     i;
 202         int                     opt;
 203         extern char             *optarg;
 204         char                    *tmp;
 205         char                    optstr[256];
 206         barrier_t               *b;
 207         long long               startnsecs = getnsecs();
 208
 209 #ifdef USE_RDTSC
 210         if (getenv("LIBMICRO_HZ") == NULL) {
 211                 (void) printf("LIBMICRO_HZ needed but not set\n");
 212                 exit(1);
 213         }
 214         lm_hz = strtoll(getenv("LIBMICRO_HZ"), NULL, 10);
 215 #endif
 216
 217         lm_argc = argc;
 218         lm_argv = argv;
 219
 220         /* before we do anything */
 221         (void) benchmark_init();
 222
 223
 224         nsecs_overhead = get_nsecs_overhead();
 225         nsecs_resolution = get_nsecs_resolution();
 226
 227         /*
 228          * Set defaults
 229          */
 230
 231         lm_opt1 = lm_def1;
 232         lm_optB = lm_defB;
 233         lm_optD = lm_defD;
 234         lm_optH = lm_defH;
 235         lm_optN = lm_defN;
 236         lm_optP = lm_defP;
 237
 238         lm_optS = lm_defS;
 239         lm_optT = lm_defT;
 240
 241         /*
 242          * squirrel away the path to the current
 243          * binary in a way that works on both
 244          * Linux and Solaris
 245          */
 246
 247         if (*argv[0] == '/') {
 248                 lm_procpath = strdup(argv[0]);
 249                 *strrchr(lm_procpath, '/') = 0;
 250         } else {
 251                 char path[1024];
 252                 (void) getcwd(path, 1024);
 253                 (void) strcat(path, "/");
 254                 (void) strcat(path, argv[0]);
 255                 *strrchr(path, '/') = 0;
 256                 lm_procpath = strdup(path);
 257         }
 258
 259         /*
 260          * name of binary
 261          */
 262
 263         if ((tmp = strrchr(argv[0], '/')) == NULL)
 264                 (void) strcpy(lm_procname, argv[0]);
 265         else
 266                 (void) strcpy(lm_procname, tmp + 1);
 267
 268         if (lm_optN == NULL) {
 269                 lm_optN = lm_procname;
 270         }
 271
 272         /*
 273          * Parse command line arguments
 274          */
 275
 276         (void) sprintf(optstr, "1AB:C:D:EHI:LMN:P:RST:VW?%s", lm_optstr);
 277         while ((opt = getopt(argc, argv, optstr)) != -1) {
 278                 switch (opt) {
 279                 case '1':
 280                         lm_opt1 = 1;
 281                         break;
 282                 case 'A':
 283                         lm_optA = 1;
 284                         break;
 285                 case 'B':
 286                         lm_optB = sizetoint(optarg);
 287                         break;
 288                 case 'C':
 289                         lm_optC = sizetoint(optarg);
 290                         break;
 291                 case 'D':
 292                         lm_optD = sizetoint(optarg);
 293                         break;
 294                 case 'E':
 295                         lm_optE = 1;
 296                         break;
 297                 case 'H':
 298                         lm_optH = 1;
 299                         break;
 300                 case 'I':
 301                         lm_optI = sizetoint(optarg);
 302                         break;
 303                 case 'L':
 304                         lm_optL = 1;
 305                         break;
 306                 case 'M':
 307                         lm_optM = 1;
 308                         break;
 309                 case 'N':
 310                         lm_optN = optarg;
 311                         break;
 312                 case 'P':
 313                         lm_optP = sizetoint(optarg);
 314                         break;
 315                 case 'S':
 316                         lm_optS = 1;
 317                         break;
 318                 case 'T':
 319                         lm_optT = sizetoint(optarg);
 320                         break;
 321                 case 'V':
 322                         (void) printf("%s\n", LIBMICRO_VERSION);
 323                         exit(0);
 324                         break;
 325                 case 'W':
 326                         lm_optW = 1;
 327                         lm_optS = 1;
 328                         break;
 329                 case '?':
 330                         usage();
 331                         exit(0);
 332                         break;
 333                 default:
 334                         if (benchmark_optswitch(opt, optarg) == -1) {
 335                                 usage();
 336                                 exit(0);
 337                         }
 338                 }
 339         }
 340
 341         /* deal with implicit and overriding options */
 342         if (lm_opt1 && lm_optP > 1) {
 343                 lm_optP = 1;
 344                 (void) printf("warning: -1 overrides -P\n");
 345         }
 346
 347         if (lm_optE) {
 348                 (void) fprintf(stderr, "Running:%20s", lm_optN);
 349                 (void) fflush(stderr);
 350         }
 351
 352         if (lm_optB == 0) {
 353                 /*
 354                  * neither benchmark or user has specified the number
 355                  * of cnts/sample, so use computed value
 356                  */
 357                 if (lm_optI)
 358                         lm_nsecs_per_op = lm_optI;
 359 #define BLOCK_TOCK_DURATION 10000 /* number of raw timer "tocks" ideally comprising a block of work */
 360                 lm_optB = nsecs_resolution * BLOCK_TOCK_DURATION / lm_nsecs_per_op;
 361                 if (lm_optB == 0)
 362                         lm_optB = 1;
 363         }
 364
 365         /*
 366          * now that the options are set
 367          */
 368
 369         if (benchmark_initrun() == -1) {
 370                 exit(1);
 371         }
 372
 373         /* allocate dynamic data */
 374         pids = (pid_t *)malloc(lm_optP * sizeof (pid_t));
 375         if (pids == NULL) {
 376                 perror("malloc(pids)");
 377                 exit(1);
 378         }
 379         tids = (pthread_t *)malloc(lm_optT * sizeof (pthread_t));
 380         if (tids == NULL) {
 381                 perror("malloc(tids)");
 382                 exit(1);
 383         }
 384
 385         /* check that the case defines lm_tsdsize before proceeding */
 386         if (lm_tsdsize == (size_t)-1) {
 387                 (void) fprintf(stderr, "error in benchmark_init: "
 388                     "lm_tsdsize not set\n");
 389                 exit(1);
 390         }
 391
 392         /* round up tsdsize to nearest 128 to eliminate false sharing */
 393         tsdsize = ((lm_tsdsize + 127) / 128) * 128;
 394
 395         /* allocate sufficient TSD for each thread in each process */
 396         tsdseg = (void *)mmap(NULL, lm_optT * lm_optP * tsdsize + 8192,
 397             PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0L);
 398         if (tsdseg == NULL) {
 399                 perror("mmap(tsd)");
 400                 exit(1);
 401         }
 402
 403         /* initialise worker synchronisation */
 404         b = barrier_create(lm_optT * lm_optP, DATASIZE);
 405         if (b == NULL) {
 406                 perror("barrier_create()");
 407                 exit(1);
 408         }
 409         lm_barrier = b;
 410         b->ba_flag = 1;
 411
 412         /* need this here so that parent and children can call exit() */
 413         (void) fflush(stdout);
 414         (void) fflush(stderr);
 415
 416         /* when we started and when to stop */
 417
 418         b->ba_starttime = getnsecs();
 419         b->ba_deadline = (long long) (b->ba_starttime + (lm_optD * 1000000LL));
 420
 421         /* do the work */
 422         if (lm_opt1) {
 423                 /* single process, non-fork mode */
 424                 pindex = 0;
 425                 worker_process();
 426         } else {
 427                 /* create worker processes */
 428                 for (i = 0; i < lm_optP; i++) {
 429                         pids[i] = fork();
 430
 431                         switch (pids[i]) {
 432                         case 0:
 433                                 pindex = i;
 434                                 worker_process();
 435                                 exit(0);
 436                                 break;
 437                         case -1:
 438                                 perror("fork");
 439                                 exit(1);
 440                                 break;
 441                         default:
 442                                 continue;
 443                         }
 444                 }
 445
 446                 /* wait for worker processes */
 447                 for (i = 0; i < lm_optP; i++) {
 448                         if (pids[i] > 0) {
 449                                 (void) waitpid(pids[i], NULL, 0);
 450                         }
 451                 }
 452         }
 453
 454         b->ba_endtime = getnsecs();
 455
 456         /* compute results */
 457
 458         compute_stats(b);
 459
 460         /* print arguments benchmark was invoked with ? */
 461         if (lm_optL) {
 462                 int l;
 463                 (void) printf("# %s ", argv[0]);
 464                 for (l = 1; l < argc; l++) {
 465                         (void) printf("%s ", argv[l]);
 466                 }
 467                 (void) printf("\n");
 468         }
 469
 470         /* print result header (unless suppressed) */
 471         if (!lm_optH) {
 472                 (void) printf("%12s %3s %3s %12s %12s %8s %8s %s\n",
 473                     "", "prc", "thr",
 474                     "usecs/call",
 475                     "samples", "errors", "cnt/samp", lm_header);
 476         }
 477
 478         /* print result */
 479
 480         (void) printf("%-12s %3d %3d %12.5f %12d %8lld %8d %s\n",
 481             lm_optN, lm_optP, lm_optT,
 482             (lm_optM?b->ba_corrected.st_mean:b->ba_corrected.st_median),
 483             b->ba_batches, b->ba_errors, lm_optB,
 484             benchmark_result());
 485
 486         if (lm_optS) {
 487                 print_stats(b);
 488         }
 489
 490         /* just incase something goes awry */
 491         (void) fflush(stdout);
 492         (void) fflush(stderr);
 493
 494         /* cleanup by stages */
 495         (void) benchmark_finirun();
 496         (void) barrier_destroy(b);
 497         (void) benchmark_fini();
 498
 499         if (lm_optE) {
 500                 (void) fprintf(stderr, " for %12.5f seconds\n",
 501                     (double)(getnsecs() - startnsecs) /
 502                     1.e9);
 503                 (void) fflush(stderr);
 504         }
 505         return (0);
 506 }
 507
 508 void *
 509 worker_thread(void *arg)
 510 {
 511         result_t                r;
 512         long long               last_sleep = 0;
 513         long long               t;
 514
 515         r.re_errors = benchmark_initworker(arg);
 516
 517         while (lm_barrier->ba_flag) {
 518                 r.re_count = 0;
 519                 r.re_errors += benchmark_initbatch(arg);
 520
 521                 /* sync to clock */
 522
 523                 if (lm_optA && ((t = getnsecs()) - last_sleep) > 75000000LL) {
 524                         (void) poll(0, 0, 10);
 525                         last_sleep = t;
 526                 }
 527                 /* wait for it ... */
 528                 (void) barrier_queue(lm_barrier, NULL);
 529
 530                 /* time the test */
 531                 r.re_t0 = getnsecs();
 532                 (void) benchmark(arg, &r);
 533                 r.re_t1 = getnsecs();
 534
 535                 /* time to stop? */
 536                 if (r.re_t1 > lm_barrier->ba_deadline &&
 537                     (!lm_optC || lm_optC < lm_barrier->ba_batches)) {
 538                         lm_barrier->ba_flag = 0;
 539                 }
 540
 541                 /* record results and sync */
 542                 (void) barrier_queue(lm_barrier, &r);
 543
 544                 (void) benchmark_finibatch(arg);
 545
 546                 r.re_errors = 0;
 547         }
 548
 549         (void) benchmark_finiworker(arg);
 550
 551         return (0);
 552 }
 553
 554 void
 555 worker_process()
 556 {
 557         int                     i;
 558         void                    *tsd;
 559
 560         for (i = 1; i < lm_optT; i++) {
 561                 tsd = gettsd(pindex, i);
 562                 if (pthread_create(&tids[i], NULL, worker_thread, tsd) != 0) {
 563                         perror("pthread_create");
 564                         exit(1);
 565                 }
 566         }
 567
 568         tsd = gettsd(pindex, 0);
 569         (void) worker_thread(tsd);
 570
 571         for (i = 1; i < lm_optT; i++) {
 572                 (void) pthread_join(tids[i], NULL);
 573         }
 574 }
 575
 576 void
 577 usage()
 578 {
 579         (void) printf(
 580             "usage: %s\n"
 581             "       [-1] (single process; overrides -P > 1)\n"
 582             "       [-A] (align with clock)\n"
 583             "       [-B batch-size (default %d)]\n"
 584             "       [-C minimum number of samples (default 0)]\n"
 585             "       [-D duration in msecs (default %ds)]\n"
 586             "       [-E (echo name to stderr)]\n"
 587             "       [-H] (suppress headers)\n"
 588             "       [-I] nsecs per op (used to compute batch size)"
 589             "       [-L] (print argument line)\n"
 590             "       [-M] (reports mean rather than median)\n"
 591             "       [-N test-name (default '%s')]\n"
 592             "       [-P processes (default %d)]\n"
 593             "       [-S] (print detailed stats)\n"
 594             "       [-T threads (default %d)]\n"
 595             "       [-V] (print the libMicro version and exit)\n"
 596             "       [-W] (flag possible benchmark problems)\n"
 597             "%s\n",
 598             lm_procname,
 599             lm_defB, lm_defD, lm_procname, lm_defP, lm_defT,
 600             lm_usage);
 601 }
 602
 603 void
 604 print_warnings(barrier_t *b)
 605 {
 606         int head = 0;
 607         int increase;
 608
 609         if (b->ba_quant) {
 610                 if (!head++) {
 611                         (void) printf("#\n# WARNINGS\n");
 612                 }
 613                 increase = (int)(floor((nsecs_resolution * 100.0) /
 614                     ((double)lm_optB * b->ba_corrected.st_median * 1000.0)) +
 615                     1.0);
 616                 (void) printf("#     Quantization error likely;"
 617                     "increase batch size (-B option) %dX to avoid.\n",
 618                     increase);
 619         }
 620
 621         /*
 622          * XXX should warn on median != mean by a lot
 623          */
 624
 625         if (b->ba_errors) {
 626                 if (!head++) {
 627                         (void) printf("#\n# WARNINGS\n");
 628                 }
 629                 (void) printf("#     Errors occured during benchmark.\n");
 630         }
 631 }
 632
 633 void
 634 print_stats(barrier_t *b)
 635 {
 636         (void) printf("#\n");
 637         (void) printf("# STATISTICS         %12s          %12s\n",
 638             "usecs/call (raw)",
 639             "usecs/call (outliers removed)");
 640
 641         if (b->ba_count == 0) {
 642                 (void) printf("zero samples\n");
 643                 return;
 644         }
 645
 646         (void) printf("#                    min %12.5f            %12.5f\n",
 647             b->ba_raw.st_min,
 648             b->ba_corrected.st_min);
 649
 650         (void) printf("#                    max %12.5f            %12.5f\n",
 651             b->ba_raw.st_max,
 652             b->ba_corrected.st_max);
 653         (void) printf("#                   mean %12.5f            %12.5f\n",
 654             b->ba_raw.st_mean,
 655             b->ba_corrected.st_mean);
 656         (void) printf("#                 median %12.5f            %12.5f\n",
 657             b->ba_raw.st_median,
 658             b->ba_corrected.st_median);
 659         (void) printf("#                 stddev %12.5f            %12.5f\n",
 660             b->ba_raw.st_stddev,
 661             b->ba_corrected.st_stddev);
 662         (void) printf("#         standard error %12.5f            %12.5f\n",
 663             b->ba_raw.st_stderr,
 664             b->ba_corrected.st_stderr);
 665         (void) printf("#   99%% confidence level %12.5f            %12.5f\n",
 666             b->ba_raw.st_99confidence,
 667             b->ba_corrected.st_99confidence);
 668         (void) printf("#                   skew %12.5f            %12.5f\n",
 669             b->ba_raw.st_skew,
 670             b->ba_corrected.st_skew);
 671         (void) printf("#               kurtosis %12.5f            %12.5f\n",
 672             b->ba_raw.st_kurtosis,
 673             b->ba_corrected.st_kurtosis);
 674
 675         (void) printf("#       time correlation %12.5f            %12.5f\n",
 676             b->ba_raw.st_timecorr,
 677             b->ba_corrected.st_timecorr);
 678         (void) printf("#\n");
 679
 680         (void) printf("#           elasped time %12.5f\n", (b->ba_endtime -
 681             b->ba_starttime) / 1.0e9);
 682         (void) printf("#      number of samples %12d\n",   b->ba_batches);
 683         (void) printf("#     number of outliers %12d\n", b->ba_outliers);
 684         (void) printf("#      getnsecs overhead %12d\n", (int)nsecs_overhead);
 685
 686         (void) printf("#\n");
 687         (void) printf("# DISTRIBUTION\n");
 688
 689         print_histo(b);
 690
 691         if (lm_optW) {
 692                 print_warnings(b);
 693         }
 694 }
 695
 696 void
 697 update_stats(barrier_t *b, result_t *r)
 698 {
 699         double                  time;
 700         double                  nsecs_per_call;
 701
 702         if (b->ba_waiters == 0) {
 703                 /* first thread only */
 704                 b->ba_t0 = r->re_t0;
 705                 b->ba_t1 = r->re_t1;
 706                 b->ba_count0 = 0;
 707                 b->ba_errors0 = 0;
 708         } else {
 709                 /* all but first thread */
 710                 if (r->re_t0 < b->ba_t0) {
 711                         b->ba_t0 = r->re_t0;
 712                 }
 713                 if (r->re_t1 > b->ba_t1) {
 714                         b->ba_t1 = r->re_t1;
 715                 }
 716         }
 717
 718         b->ba_count0  += r->re_count;
 719         b->ba_errors0 += r->re_errors;
 720
 721         if (b->ba_waiters == b->ba_hwm - 1) {
 722                 /* last thread only */
 723
 724
 725                 time = (double)b->ba_t1 - (double)b->ba_t0 -
 726                     (double)nsecs_overhead;
 727
 728                 if (time < 100 * nsecs_resolution)
 729                         b->ba_quant++;
 730
 731                 /*
 732                  * normalize by procs * threads if not -U
 733                  */
 734
 735                 nsecs_per_call = time / (double)b->ba_count0 *
 736                     (double)(lm_optT * lm_optP);
 737
 738                 b->ba_count  += b->ba_count0;
 739                 b->ba_errors += b->ba_errors0;
 740
 741                 b->ba_data[b->ba_batches % b->ba_datasize] =
 742                     nsecs_per_call;
 743
 744                 b->ba_batches++;
 745         }
 746 }
 747
 748 #ifdef USE_SEMOP
 749 barrier_t *
 750 barrier_create(int hwm, int datasize)
 751 {
 752         struct sembuf           s[1];
 753         barrier_t               *b;
 754
 755         /*LINTED*/
 756         b = (barrier_t *)mmap(NULL,
 757             sizeof (barrier_t) + (datasize - 1) * sizeof (double),
 758             PROT_READ | PROT_WRITE,
 759             MAP_SHARED | MAP_ANON, -1, 0L);
 760         if (b == (barrier_t *)MAP_FAILED) {
 761                 return (NULL);
 762         }
 763         b->ba_datasize = datasize;
 764
 765         b->ba_flag  = 0;
 766         b->ba_hwm   = hwm;
 767         b->ba_semid = semget(IPC_PRIVATE, 3, 0600);
 768         if (b->ba_semid == -1) {
 769                 (void) munmap((void *)b, sizeof (barrier_t));
 770                 return (NULL);
 771         }
 772
 773         /* [hwm - 1, 0, 0] */
 774         s[0].sem_num = 0;
 775         s[0].sem_op  = hwm - 1;
 776         s[0].sem_flg = 0;
 777         if (semop(b->ba_semid, s, 1) == -1) {
 778                 perror("semop(1)");
 779                 (void) semctl(b->ba_semid, 0, IPC_RMID);
 780                 (void) munmap((void *)b, sizeof (barrier_t));
 781                 return (NULL);
 782         }
 783
 784         b->ba_waiters = 0;
 785         b->ba_phase = 0;
 786
 787         b->ba_count = 0;
 788         b->ba_errors = 0;
 789
 790         return (b);
 791 }
 792
 793 int
 794 barrier_destroy(barrier_t *b)
 795 {
 796         (void) semctl(b->ba_semid, 0, IPC_RMID);
 797         (void) munmap((void *)b, sizeof (barrier_t));
 798
 799         return (0);
 800 }
 801
 802 int
 803 barrier_queue(barrier_t *b, result_t *r)
 804 {
 805         struct sembuf           s[2];
 806
 807         /*
 808          * {s0(-(hwm-1))}
 809          * if ! nowait {s1(-(hwm-1))}
 810          *   (all other threads)
 811          *   update shared stats
 812          *   {s0(hwm-1), s1(1)}
 813          *   {s0(1), s2(-1)}
 814          * else
 815          *   (last thread)
 816          *   update shared stats
 817          *   {s2(hwm-1)}
 818          */
 819
 820         s[0].sem_num = 0;
 821         s[0].sem_op  = -(b->ba_hwm - 1);
 822         s[0].sem_flg = 0;
 823         if (semop(b->ba_semid, s, 1) == -1) {
 824                 perror("semop(2)");
 825                 return (-1);
 826         }
 827
 828         s[0].sem_num = 1;
 829         s[0].sem_op  = -(b->ba_hwm - 1);
 830         s[0].sem_flg = IPC_NOWAIT;
 831         if (semop(b->ba_semid, s, 1) == -1) {
 832                 if (errno != EAGAIN) {
 833                         perror("semop(3)");
 834                         return (-1);
 835                 }
 836
 837                 /* all but the last thread */
 838
 839                 if (r != NULL) {
 840                         update_stats(b, r);
 841                 }
 842
 843                 b->ba_waiters++;
 844
 845                 s[0].sem_num = 0;
 846                 s[0].sem_op  = b->ba_hwm - 1;
 847                 s[0].sem_flg = 0;
 848                 s[1].sem_num = 1;
 849                 s[1].sem_op  = 1;
 850                 s[1].sem_flg = 0;
 851                 if (semop(b->ba_semid, s, 2) == -1) {
 852                         perror("semop(4)");
 853                         return (-1);
 854                 }
 855
 856                 s[0].sem_num = 0;
 857                 s[0].sem_op  = 1;
 858                 s[0].sem_flg = 0;
 859                 s[1].sem_num = 2;
 860                 s[1].sem_op  = -1;
 861                 s[1].sem_flg = 0;
 862                 if (semop(b->ba_semid, s, 2) == -1) {
 863                         perror("semop(5)");
 864                         return (-1);
 865                 }
 866
 867         } else {
 868                 /* the last thread */
 869
 870                 if (r != NULL) {
 871                         update_stats(b, r);
 872                 }
 873
 874                 b->ba_waiters = 0;
 875                 b->ba_phase++;
 876
 877                 s[0].sem_num = 2;
 878                 s[0].sem_op  = b->ba_hwm - 1;
 879                 s[0].sem_flg = 0;
 880                 if (semop(b->ba_semid, s, 1) == -1) {
 881                         perror("semop(6)");
 882                         return (-1);
 883                 }
 884         }
 885
 886         return (0);
 887 }
 888
 889 #else /* USE_SEMOP */
 890
 891 barrier_t *
 892 barrier_create(int hwm, int datasize)
 893 {
 894         pthread_mutexattr_t     attr;
 895         pthread_condattr_t      cattr;
 896         barrier_t               *b;
 897
 898         /*LINTED*/
 899         b = (barrier_t *)mmap(NULL,
 900             sizeof (barrier_t) + (datasize - 1) * sizeof (double),
 901             PROT_READ | PROT_WRITE,
 902             MAP_SHARED | MAP_ANON, -1, 0L);
 903         if (b == (barrier_t *)MAP_FAILED) {
 904                 return (NULL);
 905         }
 906         b->ba_datasize = datasize;
 907
 908         b->ba_hwm = hwm;
 909         b->ba_flag  = 0;
 910
 911         (void) pthread_mutexattr_init(&attr);
 912         (void) pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
 913
 914         (void) pthread_condattr_init(&cattr);
 915         (void) pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED);
 916
 917         (void) pthread_mutex_init(&b->ba_lock, &attr);
 918         (void) pthread_cond_init(&b->ba_cv, &cattr);
 919
 920         b->ba_waiters = 0;
 921         b->ba_phase = 0;
 922
 923         b->ba_count = 0;
 924         b->ba_errors = 0;
 925
 926         return (b);
 927 }
 928
 929 int
 930 barrier_destroy(barrier_t *b)
 931 {
 932         (void) munmap((void *)b, sizeof (barrier_t));
 933
 934         return (0);
 935 }
 936
 937 int
 938 barrier_queue(barrier_t *b, result_t *r)
 939 {
 940         int                     phase;
 941
 942         (void) pthread_mutex_lock(&b->ba_lock);
 943
 944         if (r != NULL) {
 945                 update_stats(b, r);
 946         }
 947
 948         phase = b->ba_phase;
 949
 950         b->ba_waiters++;
 951         if (b->ba_hwm == b->ba_waiters) {
 952                 b->ba_waiters = 0;
 953                 b->ba_phase++;
 954                 (void) pthread_cond_broadcast(&b->ba_cv);
 955         }
 956
 957         while (b->ba_phase == phase) {
 958                 (void) pthread_cond_wait(&b->ba_cv, &b->ba_lock);
 959         }
 960
 961         (void) pthread_mutex_unlock(&b->ba_lock);
 962         return (0);
 963 }
 964 #endif /* USE_SEMOP */
 965
 966 int
 967 gettindex()
 968 {
 969         int                     i;
 970
 971         if (tids == NULL) {
 972                 return (-1);
 973         }
 974
 975         for (i = 1; i < lm_optT; i++) {
 976                 if (pthread_self() == tids[i]) {
 977                         return (i);
 978                 }
 979         }
 980
 981         return (0);
 982 }
 983
 984 int
 985 getpindex()
 986 {
 987         return (pindex);
 988 }
 989
 990 void *
 991 gettsd(int p, int t)
 992 {
 993         if ((p < 0) || (p >= lm_optP) || (t < 0) || (t >= lm_optT))
 994                 return (NULL);
 995
 996         return ((void *)((unsigned long)tsdseg +
 997             (((p * lm_optT) + t) * tsdsize)));
 998 }
 999
1000 #if defined(__APPLE__)
1001 int
1002 gettsdindex(void *arg){
1003         /*
1004          * gettindex() can race with pthread_create() filling in tids[].
1005          * This is an alternative approach to finding the calling thread's tsd in t
1006 sdseg
1007          */
1008         return tsdsize ? ((unsigned long)arg - (unsigned long)tsdseg)/tsdsize : 0;
1009 }
1010 #endif /* __APPLE__ */
1011
1012 #ifdef USE_GETHRTIME
1013 long long
1014 getnsecs()
1015 {
1016         return (gethrtime());
1017 }
1018
1019 long long
1020 getusecs()
1021 {
1022         return (gethrtime() / 1000);
1023 }
1024
1025 #elif USE_RDTSC /* USE_GETHRTIME */
1026
1027 __inline__ long long
1028 rdtsc(void)
1029 {
1030         unsigned long long x;
1031         __asm__ volatile(".byte 0x0f, 0x31" : "=A" (x));
1032         return (x);
1033 }
1034
1035 long long
1036 getusecs()
1037 {
1038         return (rdtsc() * 1000000 / lm_hz);
1039 }
1040
1041 long long
1042 getnsecs()
1043 {
1044         return (rdtsc() * 1000000000 / lm_hz);
1045 }
1046
1047 #else /* USE_GETHRTIME */
1048
1049 long long
1050 getusecs()
1051 {
1052         struct timeval          tv;
1053
1054         (void) gettimeofday(&tv, NULL);
1055
1056         return ((long long)tv.tv_sec * 1000000LL + (long long) tv.tv_usec);
1057 }
1058
1059 long long
1060 getnsecs()
1061 {
1062         struct timeval          tv;
1063
1064         (void) gettimeofday(&tv, NULL);
1065
1066         return ((long long)tv.tv_sec * 1000000000LL +
1067             (long long) tv.tv_usec * 1000LL);
1068 }
1069
1070 #endif /* USE_GETHRTIME */
1071
1072 int
1073 setfdlimit(int limit)
1074 {
1075         struct rlimit rlimit;
1076
1077         if (getrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
1078                 perror("getrlimit");
1079                 exit(1);
1080         }
1081
1082         if (rlimit.rlim_cur > limit)
1083                 return (0); /* no worries */
1084
1085         rlimit.rlim_cur = limit;
1086
1087         if (rlimit.rlim_max < limit)
1088                 rlimit.rlim_max = limit;
1089
1090         if (setrlimit(RLIMIT_NOFILE, &rlimit) < 0) {
1091                 perror("setrlimit");
1092                 exit(3);
1093         }
1094
1095         return (0);
1096 }
1097
1098
1099 #define KILOBYTE                1024
1100 #define MEGABYTE                (KILOBYTE * KILOBYTE)
1101 #define GIGABYTE                (KILOBYTE * MEGABYTE)
1102
1103 long long
1104 sizetoll(const char *arg)
1105 {
1106         int                     len = strlen(arg);
1107         int                     i;
1108         long long               mult = 1;
1109
1110         if (len && isalpha(arg[len - 1])) {
1111                 switch (arg[len - 1]) {
1112
1113                 case 'k':
1114                 case 'K':
1115                         mult = KILOBYTE;
1116                         break;
1117                 case 'm':
1118                 case 'M':
1119                         mult = MEGABYTE;
1120                         break;
1121                 case 'g':
1122                 case 'G':
1123                         mult = GIGABYTE;
1124                         break;
1125                 default:
1126                         return (-1);
1127                 }
1128
1129                 for (i = 0; i < len - 1; i++)
1130                         if (!isdigit(arg[i]))
1131                                 return (-1);
1132         }
1133
1134         return (mult * strtoll(arg, NULL, 10));
1135 }
1136
1137 int
1138 sizetoint(const char *arg)
1139 {
1140         int                     len = strlen(arg);
1141         int                     i;
1142         long long               mult = 1;
1143
1144         if (len && isalpha(arg[len - 1])) {
1145                 switch (arg[len - 1]) {
1146
1147                 case 'k':
1148                 case 'K':
1149                         mult = KILOBYTE;
1150                         break;
1151                 case 'm':
1152                 case 'M':
1153                         mult = MEGABYTE;
1154                         break;
1155                 case 'g':
1156                 case 'G':
1157                         mult = GIGABYTE;
1158                         break;
1159                 default:
1160                         return (-1);
1161                 }
1162
1163                 for (i = 0; i < len - 1; i++)
1164                         if (!isdigit(arg[i]))
1165                                 return (-1);
1166         }
1167
1168         return (mult * atoi(arg));
1169 }
1170
1171 static void
1172 print_bar(long count, long total)
1173 {
1174         int                     i;
1175
1176         (void) putchar_unlocked(count ? '*' : ' ');
1177         for (i = 1; i < (32 * count) / total; i++)
1178                 (void) putchar_unlocked('*');
1179         for (; i < 32; i++)
1180                 (void) putchar_unlocked(' ');
1181 }
1182
1183 static int
1184 doublecmp(const void *p1, const void *p2)
1185 {
1186         double a = *((double *)p1);
1187         double b = *((double *)p2);
1188
1189         if (a > b)
1190                 return (1);
1191         if (a < b)
1192                 return (-1);
1193         return (0);
1194 }
1195
1196 static void
1197 print_histo(barrier_t *b)
1198 {
1199         int                     n;
1200         int                     i;
1201         int                     j;
1202         int                     last;
1203         long long               maxcount;
1204         double                  sum;
1205         long long               min;
1206         long long               scale;
1207         double                  x;
1208         long long               y;
1209         long long               count;
1210         int                     i95;
1211         double                  p95;
1212         double                  r95;
1213         double                  m95;
1214         histo_t                 *histo;
1215
1216         (void) printf("#        %12s %12s %32s %12s\n", "counts", "usecs/call",
1217             "", "means");
1218
1219         /* calculate how much data we've captured */
1220         n = b->ba_batches > b->ba_datasize ? b->ba_datasize : b->ba_batches;
1221
1222         /* find the 95th percentile - index, value and range */
1223         qsort((void *)b->ba_data, n, sizeof (double), doublecmp);
1224         min = b->ba_data[0] + 0.000001;
1225         i95 = n * 95 / 100;
1226         p95 = b->ba_data[i95];
1227         r95 = p95 - min + 1;
1228
1229         /* find a suitable min and scale */
1230         i = 0;
1231         x = r95 / (HISTOSIZE - 1);
1232         while (x >= 10.0) {
1233                 x /= 10.0;
1234                 i++;
1235         }
1236         y = x + 0.9999999999;
1237         while (i > 0) {
1238                 y *= 10;
1239                 i--;
1240         }
1241         min /= y;
1242         min *= y;
1243         scale = y * (HISTOSIZE - 1);
1244         if (scale < (HISTOSIZE - 1)) {
1245                 scale = (HISTOSIZE - 1);
1246         }
1247
1248         /* create and initialise the histogram */
1249         histo = malloc(HISTOSIZE * sizeof (histo_t));
1250         for (i = 0; i < HISTOSIZE; i++) {
1251                 histo[i].sum = 0.0;
1252                 histo[i].count = 0;
1253         }
1254
1255         /* populate the histogram */
1256         last = 0;
1257         sum = 0.0;
1258         count = 0;
1259         for (i = 0; i < i95; i++) {
1260                 j = (HISTOSIZE - 1) * (b->ba_data[i] - min) / scale;
1261
1262                 if (j >= HISTOSIZE) {
1263                         (void) printf("panic!\n");
1264                         j = HISTOSIZE - 1;
1265                 }
1266
1267                 histo[j].sum += b->ba_data[i];
1268                 histo[j].count++;
1269
1270                 sum += b->ba_data[i];
1271                 count++;
1272         }
1273         m95 = sum / count;
1274
1275         /* find the larges bucket */
1276         maxcount = 0;
1277         for (i = 0; i < HISTOSIZE; i++)
1278                 if (histo[i].count > 0) {
1279                         last = i;
1280                         if (histo[i].count > maxcount)
1281                                 maxcount = histo[i].count;
1282                 }
1283
1284         /* print the buckets */
1285         for (i = 0; i <= last; i++) {
1286                 (void) printf("#       %12lld %12.5f |", histo[i].count,
1287                     (min + scale * (double)i / (HISTOSIZE - 1)));
1288
1289                 print_bar(histo[i].count, maxcount);
1290
1291                 if (histo[i].count > 0)
1292                         (void) printf("%12.5f\n",
1293                             histo[i].sum / histo[i].count);
1294                 else
1295                         (void) printf("%12s\n", "-");
1296         }
1297
1298         /* find the mean of values beyond the 95th percentile */
1299         sum = 0.0;
1300         count = 0;
1301         for (i = i95; i < n; i++) {
1302                 sum += b->ba_data[i];
1303                 count++;
1304         }
1305
1306         /* print the >95% bucket summary */
1307         (void) printf("#\n");
1308         (void) printf("#       %12lld %12s |", count, "> 95%");
1309         print_bar(count, maxcount);
1310         if (count > 0)
1311                 (void) printf("%12.5f\n", sum / count);
1312         else
1313                 (void) printf("%12s\n", "-");
1314         (void) printf("#\n");
1315         (void) printf("#       %12s %12.5f\n", "mean of 95%", m95);
1316         (void) printf("#       %12s %12.5f\n", "95th %ile", p95);
1317
1318         /* quantify any buffer overflow */
1319         if (b->ba_batches > b->ba_datasize)
1320                 (void) printf("#       %12s %12d\n", "data dropped",
1321                     b->ba_batches - b->ba_datasize);
1322 }
1323
1324 static void
1325 compute_stats(barrier_t *b)
1326 {
1327         int i;
1328
1329         if (b->ba_batches > b->ba_datasize)
1330                 b->ba_batches = b->ba_datasize;
1331
1332         /*
1333          * convert to usecs/call
1334          */
1335
1336         for (i = 0; i < b->ba_batches; i++)
1337                 b->ba_data[i] /= 1000.0;
1338
1339         /*
1340          * do raw stats
1341          */
1342
1343         (void) crunch_stats(b->ba_data, b->ba_batches, &b->ba_raw);
1344
1345         /*
1346          * recursively apply 3 sigma rule to remove outliers
1347          */
1348
1349         b->ba_corrected = b->ba_raw;
1350         b->ba_outliers = 0;
1351
1352         if (b->ba_batches > 40) { /* remove outliers */
1353                 int removed;
1354
1355                 do {
1356                         removed = remove_outliers(b->ba_data, b->ba_batches,
1357                             &b->ba_corrected);
1358                         b->ba_outliers += removed;
1359                         b->ba_batches -= removed;
1360                         (void) crunch_stats(b->ba_data, b->ba_batches,
1361                             &b->ba_corrected);
1362                         } while (removed != 0 && b->ba_batches > 40);
1363         }
1364
1365 }
1366
1367 /*
1368  * routine to compute various statistics on array of doubles.
1369  */
1370
1371 static int
1372 crunch_stats(double *data, int count, stats_t *stats)
1373 {
1374         double a;
1375         double std;
1376         double diff;
1377         double sk;
1378         double ku;
1379         double mean;
1380         int i;
1381         int bytes;
1382         double *dupdata;
1383
1384         /*
1385          * first we need the mean
1386          */
1387
1388         mean = 0.0;
1389
1390         for (i = 0; i < count; i++) {
1391                 mean += data[i];
1392         }
1393
1394         mean /= count;
1395
1396         stats->st_mean = mean;
1397
1398         /*
1399          * malloc and sort so we can do median
1400          */
1401
1402         dupdata = malloc(bytes = sizeof (double) * count);
1403         (void) memcpy(dupdata, data, bytes);
1404         qsort((void *)dupdata, count, sizeof (double), doublecmp);
1405         stats->st_median   = dupdata[count/2];
1406
1407         /*
1408          * reuse dupdata to compute time correlation of data to
1409          * detect interesting time-based trends
1410          */
1411
1412         for (i = 0; i < count; i++)
1413                 dupdata[i] = (double)i;
1414
1415         (void) fit_line(dupdata, data, count, &a, &stats->st_timecorr);
1416         free(dupdata);
1417
1418         std = 0.0;
1419         sk  = 0.0;
1420         ku  = 0.0;
1421
1422         stats->st_max = -1;
1423         stats->st_min = 1.0e99; /* hard to find portable values */
1424
1425         for (i = 0; i < count; i++) {
1426                 if (data[i] > stats->st_max)
1427                         stats->st_max = data[i];
1428                 if (data[i] < stats->st_min)
1429                         stats->st_min = data[i];
1430
1431                 diff = data[i] - mean;
1432                 std += diff * diff;
1433                 sk  += diff * diff * diff;
1434                 ku  += diff * diff * diff * diff;
1435         }
1436
1437         stats->st_stddev   = std = sqrt(std/(double)(count - 1));
1438         stats->st_stderr   = std / sqrt(count);
1439         stats->st_99confidence = stats->st_stderr * 2.326;
1440         stats->st_skew     = sk / (std * std * std) / (double)(count);
1441         stats->st_kurtosis = ku / (std * std * std * std) /
1442             (double)(count) - 3;
1443
1444         return (0);
1445 }
1446
1447 /*
1448  * does a least squares fit to the set of points x, y and
1449  * fits a line y = a + bx.  Returns a, b
1450  */
1451
1452 int
1453 fit_line(double *x, double *y, int count, double *a, double *b)
1454 {
1455         double sumx, sumy, sumxy, sumx2;
1456         double denom;
1457         int i;
1458
1459         sumx = sumy = sumxy = sumx2 = 0.0;
1460
1461         for (i = 0; i < count; i++) {
1462                 sumx    += x[i];
1463                 sumx2   += x[i] * x[i];
1464                 sumy    += y[i];
1465                 sumxy   += x[i] * y[i];
1466         }
1467
1468         denom = count * sumx2 - sumx * sumx;
1469
1470         if (denom == 0.0)
1471                 return (-1);
1472
1473         *a = (sumy * sumx2 - sumx * sumxy) / denom;
1474
1475         *b = (count * sumxy - sumx * sumy) / denom;
1476
1477         return (0);
1478 }
1479
1480 /*
1481  * empty function for measurement purposes
1482  */
1483
1484 int
1485 nop()
1486 {
1487         return (1);
1488 }
1489
1490 #define NSECITER 1000
1491
1492 static long long
1493 get_nsecs_overhead()
1494 {
1495         long long s;
1496
1497         double data[NSECITER];
1498         stats_t stats;
1499
1500         int i;
1501         int count;
1502         int outliers;
1503
1504         (void) getnsecs(); /* warmup */
1505         (void) getnsecs(); /* warmup */
1506         (void) getnsecs(); /* warmup */
1507
1508         i = 0;
1509
1510         count = NSECITER;
1511
1512         for (i = 0; i < count; i++) {
1513                 s = getnsecs();
1514                 data[i] = getnsecs() - s;
1515         }
1516
1517         (void) crunch_stats(data, count, &stats);
1518
1519         while ((outliers = remove_outliers(data, count, &stats)) != 0) {
1520                 count -= outliers;
1521                 (void) crunch_stats(data, count, &stats);
1522         }
1523
1524         return ((long long)stats.st_mean);
1525
1526 }
1527
1528 long long
1529 get_nsecs_resolution()
1530 {
1531         long long y[1000];
1532
1533         int i, j, nops, res;
1534         long long start, stop;
1535
1536         /*
1537          * first, figure out how many nops to use
1538          * to get any delta between time measurements.
1539          * use a minimum of one.
1540          */
1541
1542         /*
1543          * warm cache
1544          */
1545
1546         stop = start = getnsecs();
1547
1548         for (i = 1; i < 10000000; i++) {
1549                 start = getnsecs();
1550                 for (j = i; j; j--)
1551                         ;
1552                 stop = getnsecs();
1553                 if (stop > start)
1554                         break;
1555         }
1556
1557         nops = i;
1558
1559         /*
1560          * now collect data at linearly varying intervals
1561          */
1562
1563         for (i = 0; i < 1000; i++) {
1564                 start = getnsecs();
1565                 for (j = nops * i; j; j--)
1566                         ;
1567                 stop = getnsecs();
1568                 y[i] = stop - start;
1569         }
1570
1571         /*
1572          * find smallest positive difference between samples;
1573          * this is the timer resolution
1574          */
1575
1576         res = 1<<30;
1577
1578         for (i = 1; i < 1000; i++) {
1579                 int diff = y[i] - y[i-1];
1580
1581                 if (diff > 0 && res > diff)
1582                         res = diff;
1583
1584         }
1585
1586         return (res);
1587 }
1588
1589 /*
1590  * remove any data points from the array more than 3 sigma out
1591  */
1592
1593 static int
1594 remove_outliers(double *data, int count, stats_t *stats)
1595 {
1596         double outmin = stats->st_mean - 3 * stats->st_stddev;
1597         double outmax = stats->st_mean + 3 * stats->st_stddev;
1598
1599         int i, j, outliers;
1600
1601         for (outliers = i = j = 0; i < count; i++)
1602                 if (data[i] > outmax || data[i] < outmin)
1603                         outliers++;
1604                 else
1605                         data[j++] = data[i];
1606
1607         return (outliers);
1608 }