tools/tests/libMicro/cachetocache.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms
   5  * of the Common Development and Distribution License
   6  * (the "License").  You may not use this file except
   7  * in compliance with the License.
   8  *
   9  * You can obtain a copy of the license at
  10  * src/OPENSOLARIS.LICENSE
  11  * or http://www.opensolaris.org/os/licensing.
  12  * See the License for the specific language governing
  13  * permissions and limitations under the License.
  14  *
  15  * When distributing Covered Code, include this CDDL
  16  * HEADER in each file and include the License file at
  17  * usr/src/OPENSOLARIS.LICENSE.  If applicable,
  18  * add the following below this CDDL HEADER, with the
  19  * fields enclosed by brackets "[]" replaced with your
  20  * own identifying information: Portions Copyright [yyyy]
  21  * [name of copyright owner]
  22  *
  23  * CDDL HEADER END
  24  */
  25
  26 /*
  27  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30
  31 /*
  32  * routine to benchmark cache-to-cache transfer times... uses
  33  * solaris features to find and bind to cpus in the current
  34  * processor set, so not likely to work elsewhere.
  35  */
  36
  37
  38 #include <unistd.h>
  39 #include <stdlib.h>
  40 #include <stdio.h>
  41 #include <fcntl.h>
  42 #include <string.h>
  43 #include <sys/processor.h>
  44 #include <sys/types.h>
  45 #include <stdio.h>
  46 #include <errno.h>
  47 #include <sys/pset.h>
  48
  49 #include "libmicro.h"
  50
  51 static long                     opts = 1024*512;
  52
  53 typedef struct {
  54         long                    **ts_data;
  55         long                    ts_result;
  56         pthread_mutex_t         ts_lock;
  57 } tsd_t;
  58
  59 static unsigned int ncpu = 1024;
  60
  61 static tsd_t *thread_data[1024];
  62 static processorid_t cpus[1024];
  63
  64 int traverse_ptrchain(long **, int, int);
  65
  66 int
  67 benchmark_init()
  68 {
  69         lm_tsdsize = sizeof (tsd_t);
  70
  71         (void) sprintf(lm_optstr, "s:");
  72
  73         (void) sprintf(lm_usage,
  74             "       [-s size] size of access area in bytes"
  75             " (default %ld)\n"
  76             "notes: measures cache to cache transfer times on Solaris\n",
  77             opts);
  78
  79         (void) sprintf(lm_header, "%8s", "size");
  80
  81         return (0);
  82 }
  83
  84 int
  85 benchmark_optswitch(int opt, char *optarg)
  86 {
  87         switch (opt) {
  88         case 's':
  89                 opts = sizetoint(optarg);
  90                 break;
  91         default:
  92                 return (-1);
  93         }
  94
  95         return (0);
  96 }
  97
  98 int
  99 benchmark_initrun()
 100 {
 101         if (pset_info(PS_MYID, NULL, &ncpu, cpus) < 0) {
 102                 perror("pset_info");
 103                 return (1);
 104         }
 105
 106         return (0);
 107 }
 108
 109 int
 110 benchmark_initworker(void *tsd)
 111 {
 112         tsd_t                   *ts = (tsd_t *)tsd;
 113         int i, j;
 114         processorid_t cpu;
 115
 116         ts->ts_data = malloc(opts);
 117
 118         if (ts->ts_data == NULL) {
 119                 return (1);
 120         }
 121
 122         (void) pthread_mutex_init(&ts->ts_lock, NULL);
 123
 124
 125         if (processor_bind(P_LWPID, P_MYID,
 126             cpu = cpus[(pthread_self() - 1) % ncpu],
 127             NULL) < 0) {
 128                 perror("processor_bind:");
 129                 return (1);
 130         }
 131
 132         (void) printf("# thread %d using processor %d\n", pthread_self(), cpu);
 133
 134         /*
 135          * use lmbench style backwards stride
 136          */
 137
 138         for (i = 0; i < opts / sizeof (long); i++) {
 139                 j = i - 128;
 140                 if (j < 0)
 141                         j = j + opts / sizeof (long);
 142                 ts->ts_data[i] = (long *)&(ts->ts_data[j]);
 143         }
 144
 145         thread_data[pthread_self() - 1] = ts;
 146
 147         return (0);
 148 }
 149
 150 /*
 151  * here we go in order for each thread, causing inherent serialization
 152  * this is normally not a good idea, but in this case we're trying to
 153  * measure cache-to-cache transfer times, and if we run threads in
 154  * parallel we're likely to see saturation effects rather than cache-to-cache,
 155  * esp. on wimpy memory platforms like P4.
 156  */
 157
 158
 159 /*ARGSUSED*/
 160 int
 161 benchmark(void *tsd, result_t *res)
 162 {
 163         tsd_t                   *ts;
 164         int                     i, j;
 165         int                     count = opts / 128 / sizeof (long);
 166
 167         for (j = 0; j < lm_optB; j++)
 168                 for (i = 0; i < lm_optT; i++) {
 169                         ts = thread_data[i];
 170                         (void) pthread_mutex_lock(&ts->ts_lock);
 171                         ts->ts_result += traverse_ptrchain(
 172                             (long **)ts->ts_data, count, 0);
 173                         (void) pthread_mutex_unlock(&ts->ts_lock);
 174                 }
 175
 176         res->re_count = lm_optB * lm_optT * count;
 177
 178         return (0);
 179 }
 180
 181 int
 182 traverse_ptrchain(long **ptr, int count, int value)
 183 {
 184         int i;
 185
 186         for (i = 0; i < count; i += 10) {
 187                 *ptr = *ptr + value;
 188                 ptr = (long **)*ptr;
 189                 *ptr = *ptr + value;
 190                 ptr = (long **)*ptr;
 191                 *ptr = *ptr + value;
 192                 ptr = (long **)*ptr;
 193                 *ptr = *ptr + value;
 194                 ptr = (long **)*ptr;
 195                 *ptr = *ptr + value;
 196                 ptr = (long **)*ptr;
 197                 *ptr = *ptr + value;
 198                 ptr = (long **)*ptr;
 199                 *ptr = *ptr + value;
 200                 ptr = (long **)*ptr;
 201                 *ptr = *ptr + value;
 202                 ptr = (long **)*ptr;
 203                 *ptr = *ptr + value;
 204                 ptr = (long **)*ptr;
 205                 *ptr = *ptr + value;
 206                 ptr = (long **)*ptr;
 207                 *ptr = *ptr + value;
 208         }
 209         return ((int)*ptr); /* bogus return */
 210 }
 211
 212
 213 char *
 214 benchmark_result()
 215 {
 216         static char  result[256];
 217
 218         (void) sprintf(result, "%8ld ", opts);
 219
 220
 221         return (result);
 222 }