xnu-7195.101.1.tar.gz

[apple/xnu.git] / tools / tests / affinity / sets.c
diff --git a/tools/tests/affinity/sets.c b/tools/tests/affinity/sets.c

index a7115e56b7395823a5b7799ef0a540275dbe15f9..028244ea3ab392d2e1cb4887bb58bf10a82061be 100644 (file)
--- a/tools/tests/affinity/sets.c
+++ b/tools/tests/affinity/sets.c
@@ -1,7 +1,5 @@
  #include <AvailabilityMacros.h>
-#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
-#include </System/Library/Frameworks/System.framework/PrivateHeaders/mach/thread_policy.h>
-#endif
+#include <mach/thread_policy.h>
  #include <mach/mach.h>
  #include <mach/mach_error.h>
  #include <mach/mach_time.h>
@@ -13,20 +11,21 @@
  #include <unistd.h>
  #include <err.h>
  #include <errno.h>
+#include <sys/sysctl.h>
  
  /*
   * Sets is a multithreaded test/benchmarking program to evaluate
   * affinity set placement in Leopard.
   *
   * The picture here, for each set, is:
- *  
+ *
   *       free                   work
   *    -> queue --> producer --> queue --> consumer --
   *   |                                               |
   *    -----------------------------------------------
   *
   *       <------ "stage" -----> <------ "stage" ----->
-
+ *
   * We spin off sets of production line threads (2 sets by default).
   * All threads of each line sets the same affinity tag (unless disabled).
   * By default there are 2 stage (worker) threads per production line.
@@ -47,95 +46,91 @@
   * use as input and output and what function to call for processing is
   * data-driven.
   */
-  
+
  pthread_mutex_t funnel;
-pthread_cond_t barrier;
+pthread_cond_t  barrier;
  
-uint64_t       timer;
-int            threads;
-int            threads_ready = 0;
+uint64_t        timer;
+int             threads;
+int             threads_ready = 0;
  
-int            iterations = 10000;
-boolean_t      affinity = FALSE;
-boolean_t      halting = FALSE;
-boolean_t      cache_config = FALSE;
-int            verbosity = 1;
+int             iterations = 10000;
+boolean_t       affinity = FALSE;
+boolean_t       halting = FALSE;
+boolean_t       cache_config = FALSE;
+int             verbosity = 1;
  
  typedef struct work {
-       TAILQ_ENTRY(work)       link;
-       int                     *data;
+       TAILQ_ENTRY(work)       link;
+       int                     *data;
  } work_t;
  
  /*
   * A work queue, complete with pthread objects for its management
   */
  typedef struct work_queue {
-       pthread_mutex_t         mtx;
-       pthread_cond_t          cnd;
-       TAILQ_HEAD(, work)      queue;
-       boolean_t               waiters;
+       pthread_mutex_t         mtx;
+       pthread_cond_t          cnd;
+       TAILQ_HEAD(, work)      queue;
+       boolean_t               waiters;
  } work_queue_t;
  
  /* Worker functions take a integer array and size */
-typedef void (worker_fn_t)(int *, int); 
+typedef void (worker_fn_t)(int *, int);
  
  /* This struct controls the function of a thread */
  typedef struct {
-       int                     stagenum;
-       char                    *name;
-       worker_fn_t             *fn;
-       work_queue_t            *input;         
-       work_queue_t            *output;                
-       struct line_info        *set;
-       pthread_t               thread;
-       work_queue_t            bufq;
+       int                     stagenum;
+       char                    *name;
+       worker_fn_t             *fn;
+       work_queue_t            *input;
+       work_queue_t            *output;
+       struct line_info        *set;
+       pthread_t               thread;
+       work_queue_t            bufq;
  } stage_info_t;
  
  /* This defines a thread set */
  #define WORKERS_MAX 10
  typedef struct line_info {
-       int                     setnum;
-       int                     *data;
-       int                     isize;
-       stage_info_t            *stage[WORKERS_MAX];
+       int                     setnum;
+       int                     *data;
+       int                     isize;
+       stage_info_t            *stage[WORKERS_MAX];
  } line_info_t;
  
-#define DBG(x...) do {                         \
-       if (verbosity > 1) {                    \
-               pthread_mutex_lock(&funnel);    \
-               printf(x);                      \
-               pthread_mutex_unlock(&funnel);  \
-       }                                       \
+#define DBG(x...) do {                          \
+       if (verbosity > 1) {                    \
+               pthread_mutex_lock(&funnel);    \
+               printf(x);                      \
+               pthread_mutex_unlock(&funnel);  \
+       }                                       \
  } while (0)
  
-#define mutter(x...) do {                      \
-       if (verbosity > 0) {                    \
-               printf(x);                      \
-       }                                       \
+#define mutter(x...) do {                       \
+       if (verbosity > 0) {                    \
+               printf(x);                      \
+       }                                       \
  } while (0)
  
-#define s_if_plural(x) (((x) > 1) ? "s" : "")
+#define s_if_plural(x)  (((x) > 1) ? "s" : "")
  
  static void
  usage()
  {
         fprintf(stderr,
-#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
-               "usage: sets [-a]   Turn affinity on (off)\n"
-               "            [-b B] Number of buffers per set/line (2)\n"
-#else
-               "usage: sets [-b B] Number of buffers per set/line (2)\n"
-#endif
-               "            [-c]   Configure for max cache performance\n"
-               "            [-h]   Print this\n"
-               "            [-i I] Number of items/buffers to process (1000)\n"
-               "            [-s S] Number of stages per set/line (2)\n"
-               "            [-t]   Halt for keyboard input to start\n"
-               "            [-p P] Number of pages per buffer (256=1MB)]\n"
-               "            [-w]   Consumer writes data\n"
-               "            [-v V] Level of verbosity 0..2 (1)\n"
-               "            [N]    Number of sets/lines (2)\n"
-       );
+           "usage: sets [-a]   Turn affinity on (off)\n"
+           "            [-b B] Number of buffers per set/line (2)\n"
+           "            [-c]   Configure for max cache performance\n"
+           "            [-h]   Print this\n"
+           "            [-i I] Number of items/buffers to process (1000)\n"
+           "            [-s S] Number of stages per set/line (2)\n"
+           "            [-t]   Halt for keyboard input to start\n"
+           "            [-p P] Number of pages per buffer (256=1MB)]\n"
+           "            [-w]   Consumer writes data\n"
+           "            [-v V] Level of verbosity 0..2 (1)\n"
+           "            [N]    Number of sets/lines (2)\n"
+           );
         exit(1);
  }
  
@@ -143,7 +138,7 @@ usage()
  void
  writer_fn(int *data, int isize)
  {
-       int     i;
+       int     i;
  
         for (i = 0; i < isize; i++) {
                 data[i] = i;
@@ -154,8 +149,8 @@ writer_fn(int *data, int isize)
  void
  reader_fn(int *data, int isize)
  {
-       int     i;
-       int     datum;
+       int     i;
+       int     datum;
  
         for (i = 0; i < isize; i++) {
                 datum = data[i];
@@ -166,7 +161,7 @@ reader_fn(int *data, int isize)
  void
  reader_writer_fn(int *data, int isize)
  {
-       int     i;
+       int     i;
  
         for (i = 0; i < isize; i++) {
                 data[i] += 1;
@@ -180,37 +175,37 @@ reader_writer_fn(int *data, int isize)
  void *
  manager_fn(void *arg)
  {
-       stage_info_t                    *sp = (stage_info_t *) arg;
-       line_info_t                     *lp = sp->set;
-       kern_return_t                   ret;
-       long                            iteration = 0;
+       stage_info_t                    *sp = (stage_info_t *) arg;
+       line_info_t                     *lp = sp->set;
+       kern_return_t                   ret;
+       long                            iteration = 0;
  
         /*
          * If we're using affinity sets (we are by default)
          * set our tag to by our thread set number.
          */
-#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
-       thread_extended_policy_data_t   epolicy;
-       thread_affinity_policy_data_t   policy;
+       thread_extended_policy_data_t   epolicy;
+       thread_affinity_policy_data_t   policy;
  
         epolicy.timeshare = FALSE;
         ret = thread_policy_set(
-                       mach_thread_self(), THREAD_EXTENDED_POLICY,
-                       (thread_policy_t) &epolicy,
-                       THREAD_EXTENDED_POLICY_COUNT);
-       if (ret != KERN_SUCCESS)
+               mach_thread_self(), THREAD_EXTENDED_POLICY,
+               (thread_policy_t) &epolicy,
+               THREAD_EXTENDED_POLICY_COUNT);
+       if (ret != KERN_SUCCESS) {
                 printf("thread_policy_set(THREAD_EXTENDED_POLICY) returned %d\n", ret);
-       
+       }
+
         if (affinity) {
                 policy.affinity_tag = lp->setnum;
                 ret = thread_policy_set(
-                               mach_thread_self(), THREAD_AFFINITY_POLICY,
-                               (thread_policy_t) &policy,
-                               THREAD_AFFINITY_POLICY_COUNT);
-               if (ret != KERN_SUCCESS)
+                       mach_thread_self(), THREAD_AFFINITY_POLICY,
+                       (thread_policy_t) &policy,
+                       THREAD_AFFINITY_POLICY_COUNT);
+               if (ret != KERN_SUCCESS) {
                         printf("thread_policy_set(THREAD_AFFINITY_POLICY) returned %d\n", ret);
+               }
         }
-#endif
  
         DBG("Starting %s set: %d stage: %d\n", sp->name, lp->setnum, sp->stagenum);
  
@@ -224,7 +219,7 @@ manager_fn(void *arg)
                 pthread_mutex_unlock(&funnel);
                 if (halting) {
                         printf("  all threads ready for process %d, "
-                               "hit any key to start", getpid());
+                           "hit any key to start", getpid());
                         fflush(stdout);
                         (void) getchar();
                 }
@@ -236,8 +231,8 @@ manager_fn(void *arg)
         }
  
         do {
-               int             i;
-               work_t          *workp;
+               int             i;
+               work_t          *workp;
  
                 /*
                  * Get a buffer from the input queue.
@@ -246,10 +241,11 @@ manager_fn(void *arg)
                 pthread_mutex_lock(&sp->input->mtx);
                 while (1) {
                         workp = TAILQ_FIRST(&(sp->input->queue));
-                       if (workp != NULL)
+                       if (workp != NULL) {
                                 break;
+                       }
                         DBG("    %s[%d,%d] iteration %d waiting for buffer\n",
-                               sp->name, lp->setnum, sp->stagenum, iteration);
+                           sp->name, lp->setnum, sp->stagenum, iteration);
                         sp->input->waiters = TRUE;
                         pthread_cond_wait(&sp->input->cnd, &sp->input->mtx);
                         sp->input->waiters = FALSE;
@@ -258,7 +254,7 @@ manager_fn(void *arg)
                 pthread_mutex_unlock(&sp->input->mtx);
  
                 DBG("  %s[%d,%d] iteration %d work %p data %p\n",
-                       sp->name, lp->setnum, sp->stagenum, iteration, workp, workp->data);
+                   sp->name, lp->setnum, sp->stagenum, iteration, workp, workp->data);
  
                 /* Do our stuff with the buffer */
                 (void) sp->fn(workp->data, lp->isize);
@@ -271,7 +267,7 @@ manager_fn(void *arg)
                 TAILQ_INSERT_TAIL(&(sp->output->queue), workp, link);
                 if (sp->output->waiters) {
                         DBG("    %s[%d,%d] iteration %d signaling work\n",
-                               sp->name, lp->setnum, sp->stagenum, iteration);
+                           sp->name, lp->setnum, sp->stagenum, iteration);
                         pthread_cond_signal(&sp->output->cnd);
                 }
                 pthread_mutex_unlock(&sp->output->mtx);
@@ -282,44 +278,55 @@ manager_fn(void *arg)
         return (void *) iteration;
  }
  
+#define MAX_CACHE_DEPTH 10
  static void
  auto_config(int npages, int *nbufs, int *nsets)
  {
-       int     len;
-       int     ncpu;
-       int64_t cacheconfig[10];
-       int64_t cachesize[10];
+       size_t  len;
+       int     ncpu;
+       int     llc;
+       int64_t cacheconfig[MAX_CACHE_DEPTH];
+       int64_t cachesize[MAX_CACHE_DEPTH];
  
         mutter("Autoconfiguring...\n");
  
         len = sizeof(cacheconfig);
         if (sysctlbyname("hw.cacheconfig",
-                        &cacheconfig[0], &len, NULL, 0) != 0) {
+           &cacheconfig[0], &len, NULL, 0) != 0) {
                 printf("Unable to get hw.cacheconfig, %d\n", errno);
                 exit(1);
         }
         len = sizeof(cachesize);
         if (sysctlbyname("hw.cachesize",
-                        &cachesize[0],  &len, NULL, 0) != 0) {
+           &cachesize[0], &len, NULL, 0) != 0) {
                 printf("Unable to get hw.cachesize, %d\n", errno);
                 exit(1);
         }
  
+       /*
+        * Find LLC
+        */
+       for (llc = MAX_CACHE_DEPTH - 1; llc > 0; llc--) {
+               if (cacheconfig[llc] != 0) {
+                       break;
+               }
+       }
+
         /*
          * Calculate number of buffers of size pages*4096 bytes
          * fit into 90% of an L2 cache.
          */
-       *nbufs = cachesize[2] * 9 / (npages * 4096 * 10);
-       mutter("  L2 cache %qd bytes: "
-               "using %d buffers of size %d bytes\n",
-               cachesize[2], *nbufs, (npages * 4096));
+       *nbufs = cachesize[llc] * 9 / (npages * 4096 * 10);
+       mutter("  L%d (LLC) cache %qd bytes: "
+           "using %d buffers of size %d bytes\n",
+           llc, cachesize[llc], *nbufs, (npages * 4096));
  
-       /* 
+       /*
          * Calcalute how many sets:
          */
-       *nsets = cacheconfig[0]/cacheconfig[2];
-       mutter("  %qd cpus; %qd cpus per L2 cache: using %d sets\n",
-               cacheconfig[0], cacheconfig[2], *nsets);
+       *nsets = cacheconfig[0] / cacheconfig[llc];
+       mutter("  %qd cpus; %qd cpus per L%d cache: using %d sets\n",
+           cacheconfig[0], cacheconfig[llc], llc, *nsets);
  }
  
  void (*producer_fnp)(int *data, int isize) = &writer_fn;
@@ -328,30 +335,26 @@ void (*consumer_fnp)(int *data, int isize) = &reader_fn;
  int
  main(int argc, char *argv[])
  {
-       int                     i;
-       int                     j;
-       int                     pages = 256; /* 1MB */
-       int                     buffers = 2;
-       int                     sets = 2;
-       int                     stages = 2;
-       int                     *status;
-       line_info_t             *line_info;
-       line_info_t             *lp;
-       stage_info_t            *stage_info;
-       stage_info_t            *sp;
-       kern_return_t           ret;
-       int                     c;
+       int                     i;
+       int                     j;
+       int                     pages = 256; /* 1MB */
+       int                     buffers = 2;
+       int                     sets = 2;
+       int                     stages = 2;
+       int                     *status;
+       line_info_t             *line_info;
+       line_info_t             *lp;
+       stage_info_t            *stage_info;
+       stage_info_t            *sp;
+       kern_return_t           ret;
+       int                     c;
  
         /* Do switch parsing: */
-       while ((c = getopt (argc, argv, "ab:chi:p:s:twv:")) != -1) {
+       while ((c = getopt(argc, argv, "ab:chi:p:s:twv:")) != -1) {
                 switch (c) {
                 case 'a':
-#ifdef AVAILABLE_MAC_OS_X_VERSION_10_5_AND_LATER
                         affinity = !affinity;
                         break;
-#else
-                       usage();
-#endif
                 case 'b':
                         buffers = atoi(optarg);
                         break;
@@ -366,8 +369,9 @@ main(int argc, char *argv[])
                         break;
                 case 's':
                         stages = atoi(optarg);
-                       if (stages >= WORKERS_MAX)
+                       if (stages >= WORKERS_MAX) {
                                 usage();
+                       }
                         break;
                 case 't':
                         halting = TRUE;
@@ -385,39 +389,43 @@ main(int argc, char *argv[])
                 }
         }
         argc -= optind; argv += optind;
-       if (argc > 0)
+       if (argc > 0) {
                 sets = atoi(*argv);
+       }
  
-       if (cache_config)
+       if (cache_config) {
                 auto_config(pages, &buffers, &sets);
+       }
  
         pthread_mutex_init(&funnel, NULL);
         pthread_cond_init(&barrier, NULL);
  
         /*
-        * Fire up the worker threads.
+        * Fire up the worker threads.
          */
         threads = sets * stages;
         mutter("Launching %d set%s of %d threads with %saffinity, "
-                       "consumer reads%s data\n",
-               sets, s_if_plural(sets), stages, affinity? "": "no ",
-               (consumer_fnp == &reader_writer_fn)? " and writes" : "");
-       if (pages < 256)
+           "consumer reads%s data\n",
+           sets, s_if_plural(sets), stages, affinity? "": "no ",
+           (consumer_fnp == &reader_writer_fn)? " and writes" : "");
+       if (pages < 256) {
                 mutter("  %dkB bytes per buffer, ", pages * 4);
-       else
+       } else {
                 mutter("  %dMB bytes per buffer, ", pages / 256);
+       }
         mutter("%d buffer%s per set ",
-               buffers, s_if_plural(buffers));
-       if (buffers * pages < 256)
+           buffers, s_if_plural(buffers));
+       if (buffers * pages < 256) {
                 mutter("(total %dkB)\n", buffers * pages * 4);
-       else
+       } else {
                 mutter("(total %dMB)\n", buffers * pages / 256);
+       }
         mutter("  processing %d buffer%s...\n",
-               iterations, s_if_plural(iterations));
+           iterations, s_if_plural(iterations));
         line_info = (line_info_t *) malloc(sets * sizeof(line_info_t));
         stage_info = (stage_info_t *) malloc(sets * stages * sizeof(stage_info_t));
         for (i = 0; i < sets; i++) {
-               work_t  *work_array;
+               work_t  *work_array;
  
                 lp = &line_info[i];
  
@@ -427,7 +435,7 @@ main(int argc, char *argv[])
  
                 /* Set up the queue for the workers of this thread set: */
                 for (j = 0; j < stages; j++) {
-                       sp = &stage_info[(i*stages) + j];
+                       sp = &stage_info[(i * stages) + j];
                         sp->stagenum = j;
                         sp->set = lp;
                         lp->stage[j] = sp;
@@ -457,18 +465,19 @@ main(int argc, char *argv[])
                 /* Set up the buffers on the first worker of the set. */
                 work_array = (work_t *)  malloc(buffers * sizeof(work_t));
                 for (j = 0; j < buffers; j++) {
-                       work_array[j].data = lp->data + (lp->isize * j);        
+                       work_array[j].data = lp->data + (lp->isize * j);
                         TAILQ_INSERT_TAIL(&lp->stage[0]->bufq.queue, &work_array[j], link);
                         DBG("  empty work item %p for set %d data %p\n",
-                               &work_array[j], i, work_array[j].data);
+                           &work_array[j], i, work_array[j].data);
                 }
  
                 /* Create this set of threads */
                 for (j = 0; j < stages; j++) {
                         if (ret = pthread_create(&lp->stage[j]->thread, NULL,
-                                       &manager_fn,
-                                       (void *) lp->stage[j]))
-                       err(1, "pthread_create %d,%d", i, j);
+                           &manager_fn,
+                           (void *) lp->stage[j])) {
+                               err(1, "pthread_create %d,%d", i, j);
+                       }
                 }
         }
  
@@ -478,8 +487,9 @@ main(int argc, char *argv[])
         for (i = 0; i < sets; i++) {
                 lp = &line_info[i];
                 for (j = 0; j < stages; j++) {
-                       if(ret = pthread_join(lp->stage[j]->thread, (void **)&status))
-                           err(1, "pthread_join %d,%d", i, j);
+                       if (ret = pthread_join(lp->stage[j]->thread, (void **)&status)) {
+                               err(1, "pthread_join %d,%d", i, j);
+                       }
                         DBG("Thread %d,%d status %d\n", i, j, status);
                 }
         }
@@ -490,7 +500,7 @@ main(int argc, char *argv[])
         timer = mach_absolute_time() - timer;
         timer = timer / 1000000ULL;
         printf("%d.%03d seconds elapsed.\n",
-               (int) (timer/1000ULL), (int) (timer % 1000ULL));
+           (int) (timer / 1000ULL), (int) (timer % 1000ULL));
  
         return 0;
  }