tests/cpucount.c

   1 /*
   2  * Test to validate that we can schedule threads on all hw.ncpus cores according to _os_cpu_number
   3  *
   4  * <rdar://problem/29545645>
   5  *
   6  *  xcrun -sdk macosx.internal clang -o cpucount cpucount.c -ldarwintest -g -Weverything
   7  *  xcrun -sdk iphoneos.internal clang -arch arm64 -o cpucount-ios cpucount.c -ldarwintest -g -Weverything
   8  */
   9
  10 #include <darwintest.h>
  11
  12 #include <stdio.h>
  13 #include <stdlib.h>
  14 #include <stdbool.h>
  15 #include <stdalign.h>
  16 #include <unistd.h>
  17 #include <assert.h>
  18 #include <pthread.h>
  19 #include <err.h>
  20 #include <errno.h>
  21 #include <sysexits.h>
  22 #include <sys/sysctl.h>
  23 #include <stdatomic.h>
  24
  25 #include <mach/mach.h>
  26 #include <mach/mach_time.h>
  27
  28 #include <os/tsd.h> /* private header for _os_cpu_number */
  29
  30 /* const variables aren't constants, but enums are */
  31 enum { max_threads = 40 };
  32
  33 #define CACHE_ALIGNED __attribute__((aligned(128)))
  34
  35 static _Atomic CACHE_ALIGNED uint64_t g_ready_threads = 0;
  36
  37 static _Atomic CACHE_ALIGNED bool g_cpu_seen[max_threads];
  38
  39 static _Atomic CACHE_ALIGNED bool g_bail = false;
  40
  41 static uint32_t g_threads; /* set by sysctl hw.ncpu */
  42
  43 static uint64_t g_spin_ms = 50; /* it takes ~50ms of spinning for CLPC to deign to give us all cores */
  44
  45 /*
  46  * sometimes pageout scan can eat all of CPU 0 long enough to fail the test,
  47  * so we run the test at RT priority
  48  */
  49 static uint32_t g_thread_pri = 97;
  50
  51 /*
  52  * add in some extra low-pri threads to convince the amp scheduler to use E-cores consistently
  53  * works around <rdar://problem/29636191>
  54  */
  55 static uint32_t g_spin_threads = 2;
  56 static uint32_t g_spin_threads_pri = 20;
  57
  58 static semaphore_t g_readysem, g_go_sem;
  59
  60 static mach_timebase_info_data_t timebase_info;
  61
  62 static uint64_t
  63 nanos_to_abs(uint64_t nanos)
  64 {
  65         return nanos * timebase_info.denom / timebase_info.numer;
  66 }
  67
  68 static void
  69 set_realtime(pthread_t thread)
  70 {
  71         kern_return_t kr;
  72         thread_time_constraint_policy_data_t pol;
  73
  74         mach_port_t target_thread = pthread_mach_thread_np(thread);
  75         T_QUIET; T_ASSERT_NOTNULL(target_thread, "pthread_mach_thread_np");
  76
  77         /* 1s 100ms 10ms */
  78         pol.period      = (uint32_t)nanos_to_abs(1000000000);
  79         pol.constraint  = (uint32_t)nanos_to_abs(100000000);
  80         pol.computation = (uint32_t)nanos_to_abs(10000000);
  81
  82         pol.preemptible = 0; /* Ignored by OS */
  83         kr = thread_policy_set(target_thread, THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol,
  84             THREAD_TIME_CONSTRAINT_POLICY_COUNT);
  85         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_policy_set(THREAD_TIME_CONSTRAINT_POLICY)");
  86 }
  87
  88 static pthread_t
  89 create_thread(void *(*start_routine)(void *), uint32_t priority)
  90 {
  91         int rv;
  92         pthread_t new_thread;
  93         pthread_attr_t attr;
  94
  95         struct sched_param param = { .sched_priority = (int)priority };
  96
  97         rv = pthread_attr_init(&attr);
  98         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_init");
  99
 100         rv = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 101         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_setdetachstate");
 102
 103         rv = pthread_attr_setschedparam(&attr, &param);
 104         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_setschedparam");
 105
 106         rv = pthread_create(&new_thread, &attr, start_routine, NULL);
 107         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_create");
 108
 109         if (priority == 97) {
 110                 set_realtime(new_thread);
 111         }
 112
 113         rv = pthread_attr_destroy(&attr);
 114         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_destroy");
 115
 116         return new_thread;
 117 }
 118
 119 static void *
 120 thread_fn(__unused void *arg)
 121 {
 122         T_QUIET; T_EXPECT_TRUE(true, "initialize darwintest on this thread");
 123
 124         kern_return_t kr;
 125
 126         kr = semaphore_wait_signal(g_go_sem, g_readysem);
 127         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
 128
 129         /* atomic inc to say hello */
 130         g_ready_threads++;
 131
 132         uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
 133
 134         /*
 135          * spin to force the other threads to spread out across the cores
 136          * may take some time if cores are masked and CLPC needs to warm up to unmask them
 137          */
 138         while (g_ready_threads < g_threads && mach_absolute_time() < timeout) {
 139                 ;
 140         }
 141
 142         T_QUIET; T_ASSERT_GE(timeout, mach_absolute_time(), "waiting for all threads took too long");
 143
 144         timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
 145
 146         int iteration = 0;
 147         uint32_t cpunum = 0;
 148
 149         /* search for new CPUs for the duration */
 150         while (mach_absolute_time() < timeout) {
 151                 cpunum = _os_cpu_number();
 152
 153                 assert(cpunum < max_threads);
 154
 155                 g_cpu_seen[cpunum] = true;
 156
 157                 if (iteration++ % 10000) {
 158                         uint32_t cpus_seen = 0;
 159
 160                         for (uint32_t i = 0; i < g_threads; i++) {
 161                                 if (g_cpu_seen[i]) {
 162                                         cpus_seen++;
 163                                 }
 164                         }
 165
 166                         /* bail out early if we saw all CPUs */
 167                         if (cpus_seen == g_threads) {
 168                                 break;
 169                         }
 170                 }
 171         }
 172
 173         g_bail = true;
 174
 175         printf("thread cpunum: %d\n", cpunum);
 176
 177         kr = semaphore_wait_signal(g_go_sem, g_readysem);
 178         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
 179
 180         return NULL;
 181 }
 182
 183 static void *
 184 spin_fn(__unused void *arg)
 185 {
 186         T_QUIET; T_EXPECT_TRUE(true, "initialize darwintest on this thread");
 187
 188         kern_return_t kr;
 189
 190         kr = semaphore_wait_signal(g_go_sem, g_readysem);
 191         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
 192
 193         uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC * 2) + mach_absolute_time();
 194
 195         /*
 196          * run and sleep a bit to force some scheduler churn to get all the cores active
 197          * needed to work around bugs in the amp scheduler
 198          */
 199         while (mach_absolute_time() < timeout && g_bail == false) {
 200                 usleep(500);
 201
 202                 uint64_t inner_timeout = nanos_to_abs(1 * NSEC_PER_MSEC) + mach_absolute_time();
 203
 204                 while (mach_absolute_time() < inner_timeout && g_bail == false) {
 205                         ;
 206                 }
 207         }
 208
 209         kr = semaphore_wait_signal(g_go_sem, g_readysem);
 210         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
 211
 212         return NULL;
 213 }
 214
 215
 216 #pragma clang diagnostic push
 217 #pragma clang diagnostic ignored "-Wgnu-flexible-array-initializer"
 218 T_DECL(count_cpus, "Tests we can schedule threads on all hw.ncpus cores according to _os_cpu_number",
 219     T_META_CHECK_LEAKS(false), T_META_ENABLED(false))
 220 #pragma clang diagnostic pop
 221 {
 222         setvbuf(stdout, NULL, _IONBF, 0);
 223         setvbuf(stderr, NULL, _IONBF, 0);
 224
 225         int rv;
 226         kern_return_t kr;
 227         kr = mach_timebase_info(&timebase_info);
 228         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
 229
 230         kr = semaphore_create(mach_task_self(), &g_readysem, SYNC_POLICY_FIFO, 0);
 231         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
 232
 233         kr = semaphore_create(mach_task_self(), &g_go_sem, SYNC_POLICY_FIFO, 0);
 234         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
 235
 236         size_t ncpu_size = sizeof(g_threads);
 237         rv = sysctlbyname("hw.ncpu", &g_threads, &ncpu_size, NULL, 0);
 238         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sysctlbyname(hw.ncpu)");
 239
 240         printf("hw.ncpu: %2d\n", g_threads);
 241
 242         assert(g_threads < max_threads);
 243
 244         for (uint32_t i = 0; i < g_threads; i++) {
 245                 create_thread(&thread_fn, g_thread_pri);
 246         }
 247
 248         for (uint32_t i = 0; i < g_spin_threads; i++) {
 249                 create_thread(&spin_fn, g_spin_threads_pri);
 250         }
 251
 252         for (uint32_t i = 0; i < g_threads + g_spin_threads; i++) {
 253                 kr = semaphore_wait(g_readysem);
 254                 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");
 255         }
 256
 257         uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
 258
 259         /* spin to warm up CLPC :) */
 260         while (mach_absolute_time() < timeout) {
 261                 ;
 262         }
 263
 264         kr = semaphore_signal_all(g_go_sem);
 265         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");
 266
 267         for (uint32_t i = 0; i < g_threads + g_spin_threads; i++) {
 268                 kr = semaphore_wait(g_readysem);
 269                 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");
 270         }
 271
 272         uint32_t cpus_seen = 0;
 273
 274         for (uint32_t i = 0; i < g_threads; i++) {
 275                 if (g_cpu_seen[i]) {
 276                         cpus_seen++;
 277                 }
 278
 279                 printf("cpu %2d: %d\n", i, g_cpu_seen[i]);
 280         }
 281
 282         T_ASSERT_EQ(cpus_seen, g_threads, "test should have run threads on all CPUS");
 283 }