xnu-3789.1.32.tar.gz

[apple/xnu.git] / osfmk / kern / sched_average.c
diff --git a/osfmk/kern/sched_average.c b/osfmk/kern/sched_average.c

index e20ddff7384db45c178617093605b20dfaad479e..cf95209153bbea6f9c216fd023e3091b88a13407 100644 (file)
--- a/osfmk/kern/sched_average.c
+++ b/osfmk/kern/sched_average.c
@@ -68,10 +68,18 @@
  #include <kern/assert.h>
  #include <kern/processor.h>
  #include <kern/thread.h>
-       
+#if CONFIG_TELEMETRY
+#include <kern/telemetry.h>
+#endif
+
+#include <sys/kdebug.h>
+
  uint32_t       avenrun[3] = {0, 0, 0};
  uint32_t       mach_factor[3] = {0, 0, 0};
  
+uint32_t       sched_load_average, sched_mach_factor;
+
+#if defined(CONFIG_SCHED_TIMESHARE_CORE)
  /*
   * Values are scaled by LOAD_SCALE, defined in processor_info.h
   */
@@ -87,98 +95,158 @@ static uint32_t            fract[3] = {
  #undef base
  #undef frac
  
+#endif /* CONFIG_SCHED_TIMESHARE_CORE */
+
  static unsigned int            sched_nrun;
  
  typedef void   (*sched_avg_comp_t)(
                                         void                    *param);
  
-#define SCHED_AVG_SECS(n)      ((n) << SCHED_TICK_SHIFT)
-
  static struct sched_average {
         sched_avg_comp_t        comp;
-       void                            *param;
-       int                                     period;
-       int                                     tick;                   
+       void                    *param;
+       int                     period; /* in seconds */
+       uint64_t                deadline;
  } sched_average[] = {
-       { compute_averunnable, &sched_nrun, SCHED_AVG_SECS(5), 0 },
-       { compute_stack_target, NULL, SCHED_AVG_SECS(5), 1 },
-       { compute_memory_pressure, NULL, SCHED_AVG_SECS(1), 0 },
+       { compute_averunnable, &sched_nrun, 5, 0 },
+       { compute_stack_target, NULL, 5, 1 },
+       { compute_memory_pressure, NULL, 1, 0 },
+       { compute_pageout_gc_throttle, NULL, 1, 0 },
+       { compute_pmap_gc_throttle, NULL, 60, 0 },
+#if CONFIG_TELEMETRY
+       { compute_telemetry, NULL, 1, 0 },
+#endif
         { NULL, NULL, 0, 0 }
  };
  
  typedef struct sched_average   *sched_average_t;
  
+uint32_t load_now[TH_BUCKET_MAX];
+
+/* The "stdelta" parameter represents the number of scheduler maintenance
+ * "ticks" that have elapsed since the last invocation, subject to
+ * integer division imprecision.
+ */
+
  void
-compute_averages(void)
+compute_averages(uint64_t stdelta)
  {
-       int                                     ncpus, nthreads, nshared;
-       uint32_t                        factor_now, average_now, load_now = 0;
-       sched_average_t         avg;
-
         /*
-        *      Retrieve counts, ignoring
-        *      the current thread.
+        * Retrieve a snapshot of the current run counts.
+        *
+        * Why not a bcopy()? Because we need atomic word-sized reads of sched_run_buckets,
+        * not byte-by-byte copy.
          */
-       ncpus = processor_avail_count;
-       nthreads = sched_run_count - 1;
-       nshared = sched_share_count;
+       uint32_t ncpus = processor_avail_count;
+
+       load_now[TH_BUCKET_RUN]      = sched_run_buckets[TH_BUCKET_RUN];
+       load_now[TH_BUCKET_FIXPRI]   = sched_run_buckets[TH_BUCKET_FIXPRI];
+       load_now[TH_BUCKET_SHARE_FG] = sched_run_buckets[TH_BUCKET_SHARE_FG];
+       load_now[TH_BUCKET_SHARE_UT] = sched_run_buckets[TH_BUCKET_SHARE_UT];
+       load_now[TH_BUCKET_SHARE_BG] = sched_run_buckets[TH_BUCKET_SHARE_BG];
+
+       assert(load_now[TH_BUCKET_RUN] >= 0);
+       assert(load_now[TH_BUCKET_FIXPRI] >= 0);
+
+       /* Ignore the current thread, which is a running fixpri thread */
+
+       uint32_t nthreads = load_now[TH_BUCKET_RUN] - 1;
+       uint32_t nfixpri  = load_now[TH_BUCKET_FIXPRI] - 1;
+
+       KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+               MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_LOAD) | DBG_FUNC_NONE,
+               load_now[TH_BUCKET_FIXPRI] - 1, load_now[TH_BUCKET_SHARE_FG],
+               load_now[TH_BUCKET_SHARE_BG],   load_now[TH_BUCKET_SHARE_UT], 0);
  
         /*
-        *      Load average and mach factor calculations for
-        *      those which ask about these things.
+        * Compute the timeshare priority conversion factor based on loading.
+        * Because our counters may be incremented and accessed
+        * concurrently with respect to each other, we may have
+        * windows where the invariant (nthreads - nfixpri) == (fg + bg + ut)
+        * is broken, so truncate values in these cases.
          */
-       average_now = nthreads * LOAD_SCALE;
  
-       if (nthreads > ncpus)
-               factor_now = (ncpus * LOAD_SCALE) / (nthreads + 1);
-       else
-               factor_now = (ncpus - nthreads) * LOAD_SCALE;
+       uint32_t timeshare_threads = (nthreads - nfixpri);
  
-       sched_mach_factor =     ((sched_mach_factor << 2) + factor_now) / 5;
-       sched_load_average = ((sched_load_average << 2) + average_now) / 5;
+       for (uint32_t i = TH_BUCKET_SHARE_FG; i <= TH_BUCKET_SHARE_BG ; i++) {
+               if (load_now[i] > timeshare_threads)
+                       load_now[i] = timeshare_threads;
+       }
  
         /*
-        *      Compute the timeshare priority
-        *      conversion factor based on loading.
+        * Utility threads contribute up to NCPUS of load to FG threads
          */
-       if (nshared > nthreads)
-               nshared = nthreads;
-
-       if (nshared > ncpus) {
-               if (ncpus > 1)
-                       load_now = nshared / ncpus;
-               else
-                       load_now = nshared;
+       if (load_now[TH_BUCKET_SHARE_UT] <= ncpus) {
+               load_now[TH_BUCKET_SHARE_FG] += load_now[TH_BUCKET_SHARE_UT];
+       } else {
+               load_now[TH_BUCKET_SHARE_FG] += ncpus;
+       }
  
-               if (load_now > NRQS - 1)
-                       load_now = NRQS - 1;
+       /*
+        * FG and UT should notice there's one thread of competition from BG,
+        * but no more.
+        */
+       if (load_now[TH_BUCKET_SHARE_BG] > 0) {
+               load_now[TH_BUCKET_SHARE_FG] += 1;
+               load_now[TH_BUCKET_SHARE_UT] += 1;
         }
  
         /*
-        *      The conversion factor consists of
-        *      two components: a fixed value based
-        *      on the absolute time unit, and a
-        *      dynamic portion based on loading.
+        * The conversion factor consists of two components:
+        * a fixed value based on the absolute time unit (sched_fixed_shift),
+        * and a dynamic portion based on load (sched_load_shifts).
          *
-        *      Zero loading results in a out of range
-        *      shift count.  Accumulated usage is ignored
-        *      during conversion and new usage deltas
-        *      are discarded.
+        * Zero load results in a out of range shift count.
          */
-       sched_pri_shift = sched_fixed_shift - sched_load_shifts[load_now];
+
+       for (uint32_t i = TH_BUCKET_SHARE_FG; i <= TH_BUCKET_SHARE_BG ; i++) {
+               uint32_t bucket_load = 0;
+
+               if (load_now[i] > ncpus) {
+                       if (ncpus > 1)
+                               bucket_load = load_now[i] / ncpus;
+                       else
+                               bucket_load = load_now[i];
+
+                       if (bucket_load > MAX_LOAD)
+                               bucket_load = MAX_LOAD;
+               }
+
+               sched_pri_shifts[i] = sched_fixed_shift - sched_load_shifts[bucket_load];
+       }
  
         /*
-        *      Sample total running threads.
+        * Sample total running threads for the load average calculation.
          */
         sched_nrun = nthreads;
  
         /*
-        * Compute old-style Mach load averages.
+        * Load average and mach factor calculations for
+        * those which ask about these things.
          */
-       {
-               register int            i;
+       uint32_t average_now = nthreads * LOAD_SCALE;
+       uint32_t factor_now;
  
-               for (i = 0; i < 3; i++) {
+       if (nthreads > ncpus)
+               factor_now = (ncpus * LOAD_SCALE) / (nthreads + 1);
+       else
+               factor_now = (ncpus - nthreads) * LOAD_SCALE;
+
+       /*
+        * For those statistics that formerly relied on being recomputed
+        * on timer ticks, advance by the approximate number of corresponding
+        * elapsed intervals, thus compensating for potential idle intervals.
+        */
+       for (uint32_t index = 0; index < stdelta; index++) {
+               sched_mach_factor = ((sched_mach_factor << 2) + factor_now) / 5;
+               sched_load_average = ((sched_load_average << 2) + average_now) / 5;
+       }
+
+       /*
+        * Compute old-style Mach load averages.
+        */
+       for (uint32_t index = 0; index < stdelta; index++) {
+               for (uint32_t i = 0; i < 3; i++) {
                         mach_factor[i] = ((mach_factor[i] * fract[i]) +
                                                 (factor_now * (LOAD_SCALE - fract[i]))) / LOAD_SCALE;
  
@@ -188,12 +256,22 @@ compute_averages(void)
         }
  
         /*
-        *      Compute averages in other components.
+        * Compute averages in other components.
          */
-       for (avg = sched_average; avg->comp != NULL; ++avg) {
-               if (++avg->tick >= avg->period) {
-                       (*avg->comp)(avg->param);
-                       avg->tick = 0;
+       uint64_t abstime = mach_absolute_time();
+
+       for (sched_average_t avg = sched_average; avg->comp != NULL; ++avg) {
+               if (abstime >= avg->deadline) {
+                       uint64_t period_abs = (avg->period * sched_one_second_interval);
+                       uint64_t ninvokes = 1;
+
+                       ninvokes += (abstime - avg->deadline) / period_abs;
+                       ninvokes = MIN(ninvokes, SCHED_TICK_MAX_DELTA);
+
+                       for (uint32_t index = 0; index < ninvokes; index++) {
+                               (*avg->comp)(avg->param);
+                       }
+                       avg->deadline = abstime + period_abs;
                 }
         }
  }