tests/cpucount.c

   1 /*
   2  * Test to validate that we can schedule threads on all hw.ncpus cores according to _os_cpu_number
   3  *
   4  * <rdar://problem/29545645>
   5  *
   6  *  xcrun -sdk macosx.internal clang -o cpucount cpucount.c -ldarwintest -g -Weverything
   7  *  xcrun -sdk iphoneos.internal clang -arch arm64 -o cpucount-ios cpucount.c -ldarwintest -g -Weverything
   8  */
   9
  10 #include <darwintest.h>
  11
  12 #include <stdio.h>
  13 #include <stdlib.h>
  14 #include <stdbool.h>
  15 #include <stdalign.h>
  16 #include <unistd.h>
  17 #include <assert.h>
  18 #include <pthread.h>
  19 #include <err.h>
  20 #include <errno.h>
  21 #include <sysexits.h>
  22 #include <sys/sysctl.h>
  23 #include <stdatomic.h>
  24
  25 #include <mach/mach.h>
  26 #include <mach/mach_time.h>
  27
  28 #include <os/tsd.h> /* private header for _os_cpu_number */
  29
  30 T_GLOBAL_META(T_META_RUN_CONCURRENTLY(true));
  31
  32 /* const variables aren't constants, but enums are */
  33 enum { max_threads = 40 };
  34
  35 #define CACHE_ALIGNED __attribute__((aligned(128)))
  36
  37 static _Atomic CACHE_ALIGNED uint64_t g_ready_threads = 0;
  38
  39 static _Atomic CACHE_ALIGNED bool g_cpu_seen[max_threads];
  40
  41 static _Atomic CACHE_ALIGNED bool g_bail = false;
  42
  43 static uint32_t g_threads; /* set by sysctl hw.ncpu */
  44
  45 static uint64_t g_spin_ms = 50; /* it takes ~50ms of spinning for CLPC to deign to give us all cores */
  46
  47 /*
  48  * sometimes pageout scan can eat all of CPU 0 long enough to fail the test,
  49  * so we run the test at RT priority
  50  */
  51 static uint32_t g_thread_pri = 97;
  52
  53 /*
  54  * add in some extra low-pri threads to convince the amp scheduler to use E-cores consistently
  55  * works around <rdar://problem/29636191>
  56  */
  57 static uint32_t g_spin_threads = 2;
  58 static uint32_t g_spin_threads_pri = 20;
  59
  60 static semaphore_t g_readysem, g_go_sem;
  61
  62 static mach_timebase_info_data_t timebase_info;
  63
  64 static uint64_t
  65 nanos_to_abs(uint64_t nanos)
  66 {
  67         return nanos * timebase_info.denom / timebase_info.numer;
  68 }
  69
  70 static void
  71 set_realtime(pthread_t thread)
  72 {
  73         kern_return_t kr;
  74         thread_time_constraint_policy_data_t pol;
  75
  76         mach_port_t target_thread = pthread_mach_thread_np(thread);
  77         T_QUIET; T_ASSERT_NOTNULL(target_thread, "pthread_mach_thread_np");
  78
  79         /* 1s 100ms 10ms */
  80         pol.period      = (uint32_t)nanos_to_abs(1000000000);
  81         pol.constraint  = (uint32_t)nanos_to_abs(100000000);
  82         pol.computation = (uint32_t)nanos_to_abs(10000000);
  83
  84         pol.preemptible = 0; /* Ignored by OS */
  85         kr = thread_policy_set(target_thread, THREAD_TIME_CONSTRAINT_POLICY, (thread_policy_t) &pol,
  86             THREAD_TIME_CONSTRAINT_POLICY_COUNT);
  87         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "thread_policy_set(THREAD_TIME_CONSTRAINT_POLICY)");
  88 }
  89
  90 static pthread_t
  91 create_thread(void *(*start_routine)(void *), uint32_t priority)
  92 {
  93         int rv;
  94         pthread_t new_thread;
  95         pthread_attr_t attr;
  96
  97         struct sched_param param = { .sched_priority = (int)priority };
  98
  99         rv = pthread_attr_init(&attr);
 100         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_init");
 101
 102         rv = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
 103         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_setdetachstate");
 104
 105         rv = pthread_attr_setschedparam(&attr, &param);
 106         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_setschedparam");
 107
 108         rv = pthread_create(&new_thread, &attr, start_routine, NULL);
 109         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_create");
 110
 111         if (priority == 97) {
 112                 set_realtime(new_thread);
 113         }
 114
 115         rv = pthread_attr_destroy(&attr);
 116         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "pthread_attr_destroy");
 117
 118         return new_thread;
 119 }
 120
 121 static void *
 122 thread_fn(__unused void *arg)
 123 {
 124         T_QUIET; T_EXPECT_TRUE(true, "initialize darwintest on this thread");
 125
 126         kern_return_t kr;
 127
 128         kr = semaphore_wait_signal(g_go_sem, g_readysem);
 129         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
 130
 131         /* atomic inc to say hello */
 132         g_ready_threads++;
 133
 134         uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
 135
 136         /*
 137          * spin to force the other threads to spread out across the cores
 138          * may take some time if cores are masked and CLPC needs to warm up to unmask them
 139          */
 140         while (g_ready_threads < g_threads && mach_absolute_time() < timeout) {
 141                 ;
 142         }
 143
 144         T_QUIET; T_ASSERT_GE(timeout, mach_absolute_time(), "waiting for all threads took too long");
 145
 146         timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
 147
 148         int iteration = 0;
 149         uint32_t cpunum = 0;
 150
 151         /* search for new CPUs for the duration */
 152         while (mach_absolute_time() < timeout) {
 153                 cpunum = _os_cpu_number();
 154
 155                 assert(cpunum < max_threads);
 156
 157                 g_cpu_seen[cpunum] = true;
 158
 159                 if (iteration++ % 10000) {
 160                         uint32_t cpus_seen = 0;
 161
 162                         for (uint32_t i = 0; i < g_threads; i++) {
 163                                 if (g_cpu_seen[i]) {
 164                                         cpus_seen++;
 165                                 }
 166                         }
 167
 168                         /* bail out early if we saw all CPUs */
 169                         if (cpus_seen == g_threads) {
 170                                 break;
 171                         }
 172                 }
 173         }
 174
 175         g_bail = true;
 176
 177         printf("thread cpunum: %d\n", cpunum);
 178
 179         kr = semaphore_wait_signal(g_go_sem, g_readysem);
 180         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
 181
 182         return NULL;
 183 }
 184
 185 static void *
 186 spin_fn(__unused void *arg)
 187 {
 188         T_QUIET; T_EXPECT_TRUE(true, "initialize darwintest on this thread");
 189
 190         kern_return_t kr;
 191
 192         kr = semaphore_wait_signal(g_go_sem, g_readysem);
 193         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
 194
 195         uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC * 2) + mach_absolute_time();
 196
 197         /*
 198          * run and sleep a bit to force some scheduler churn to get all the cores active
 199          * needed to work around bugs in the amp scheduler
 200          */
 201         while (mach_absolute_time() < timeout && g_bail == false) {
 202                 usleep(500);
 203
 204                 uint64_t inner_timeout = nanos_to_abs(1 * NSEC_PER_MSEC) + mach_absolute_time();
 205
 206                 while (mach_absolute_time() < inner_timeout && g_bail == false) {
 207                         ;
 208                 }
 209         }
 210
 211         kr = semaphore_wait_signal(g_go_sem, g_readysem);
 212         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait_signal");
 213
 214         return NULL;
 215 }
 216
 217
 218 #pragma clang diagnostic push
 219 #pragma clang diagnostic ignored "-Wgnu-flexible-array-initializer"
 220 T_DECL(count_cpus, "Tests we can schedule threads on all hw.ncpus cores according to _os_cpu_number",
 221     T_META_CHECK_LEAKS(false), T_META_ENABLED(false))
 222 #pragma clang diagnostic pop
 223 {
 224         setvbuf(stdout, NULL, _IONBF, 0);
 225         setvbuf(stderr, NULL, _IONBF, 0);
 226
 227         int rv;
 228         kern_return_t kr;
 229         kr = mach_timebase_info(&timebase_info);
 230         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "mach_timebase_info");
 231
 232         kr = semaphore_create(mach_task_self(), &g_readysem, SYNC_POLICY_FIFO, 0);
 233         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
 234
 235         kr = semaphore_create(mach_task_self(), &g_go_sem, SYNC_POLICY_FIFO, 0);
 236         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_create");
 237
 238         size_t ncpu_size = sizeof(g_threads);
 239         rv = sysctlbyname("hw.ncpu", &g_threads, &ncpu_size, NULL, 0);
 240         T_QUIET; T_ASSERT_POSIX_SUCCESS(rv, "sysctlbyname(hw.ncpu)");
 241
 242         printf("hw.ncpu: %2d\n", g_threads);
 243
 244         assert(g_threads < max_threads);
 245
 246         for (uint32_t i = 0; i < g_threads; i++) {
 247                 create_thread(&thread_fn, g_thread_pri);
 248         }
 249
 250         for (uint32_t i = 0; i < g_spin_threads; i++) {
 251                 create_thread(&spin_fn, g_spin_threads_pri);
 252         }
 253
 254         for (uint32_t i = 0; i < g_threads + g_spin_threads; i++) {
 255                 kr = semaphore_wait(g_readysem);
 256                 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");
 257         }
 258
 259         uint64_t timeout = nanos_to_abs(g_spin_ms * NSEC_PER_MSEC) + mach_absolute_time();
 260
 261         /* spin to warm up CLPC :) */
 262         while (mach_absolute_time() < timeout) {
 263                 ;
 264         }
 265
 266         kr = semaphore_signal_all(g_go_sem);
 267         T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_signal_all");
 268
 269         for (uint32_t i = 0; i < g_threads + g_spin_threads; i++) {
 270                 kr = semaphore_wait(g_readysem);
 271                 T_QUIET; T_ASSERT_MACH_SUCCESS(kr, "semaphore_wait");
 272         }
 273
 274         uint32_t cpus_seen = 0;
 275
 276         for (uint32_t i = 0; i < g_threads; i++) {
 277                 if (g_cpu_seen[i]) {
 278                         cpus_seen++;
 279                 }
 280
 281                 printf("cpu %2d: %d\n", i, g_cpu_seen[i]);
 282         }
 283
 284         T_ASSERT_EQ(cpus_seen, g_threads, "test should have run threads on all CPUS");
 285 }