uint32_t avenrun[3] = {0, 0, 0};
uint32_t mach_factor[3] = {0, 0, 0};
+uint32_t sched_load_average, sched_mach_factor;
+
#if defined(CONFIG_SCHED_TIMESHARE_CORE)
/*
* Values are scaled by LOAD_SCALE, defined in processor_info.h
{ compute_averunnable, &sched_nrun, 5, 0 },
{ compute_stack_target, NULL, 5, 1 },
{ compute_memory_pressure, NULL, 1, 0 },
- { compute_zone_gc_throttle, NULL, 60, 0 },
{ compute_pageout_gc_throttle, NULL, 1, 0 },
{ compute_pmap_gc_throttle, NULL, 60, 0 },
#if CONFIG_TELEMETRY
typedef struct sched_average *sched_average_t;
+uint32_t load_now[TH_BUCKET_MAX];
+
/* The "stdelta" parameter represents the number of scheduler maintenance
* "ticks" that have elapsed since the last invocation, subject to
* integer division imprecision.
void
compute_averages(uint64_t stdelta)
{
- int ncpus, nthreads, nshared, nbackground, nshared_non_bg;
- uint32_t factor_now, average_now, load_now = 0, background_load_now = 0, combined_fgbg_load_now = 0;
- sched_average_t avg;
- uint64_t abstime, index;
-
/*
- * Retrieve counts, ignoring
- * the current thread.
+ * Retrieve a snapshot of the current run counts.
+ *
+ * Why not a bcopy()? Because we need atomic word-sized reads of sched_run_buckets,
+ * not byte-by-byte copy.
*/
- ncpus = processor_avail_count;
- nthreads = sched_run_count - 1;
- nshared = sched_share_count;
- nbackground = sched_background_count;
+ uint32_t ncpus = processor_avail_count;
- /*
- * Load average and mach factor calculations for
- * those which ask about these things.
- */
- average_now = nthreads * LOAD_SCALE;
+ load_now[TH_BUCKET_RUN] = sched_run_buckets[TH_BUCKET_RUN];
+ load_now[TH_BUCKET_FIXPRI] = sched_run_buckets[TH_BUCKET_FIXPRI];
+ load_now[TH_BUCKET_SHARE_FG] = sched_run_buckets[TH_BUCKET_SHARE_FG];
+ load_now[TH_BUCKET_SHARE_UT] = sched_run_buckets[TH_BUCKET_SHARE_UT];
+ load_now[TH_BUCKET_SHARE_BG] = sched_run_buckets[TH_BUCKET_SHARE_BG];
- if (nthreads > ncpus)
- factor_now = (ncpus * LOAD_SCALE) / (nthreads + 1);
- else
- factor_now = (ncpus - nthreads) * LOAD_SCALE;
+ assert(load_now[TH_BUCKET_RUN] >= 0);
+ assert(load_now[TH_BUCKET_FIXPRI] >= 0);
+
+ /* Ignore the current thread, which is a running fixpri thread */
+
+ uint32_t nthreads = load_now[TH_BUCKET_RUN] - 1;
+ uint32_t nfixpri = load_now[TH_BUCKET_FIXPRI] - 1;
+
+ KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+ MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_LOAD) | DBG_FUNC_NONE,
+ load_now[TH_BUCKET_FIXPRI] - 1, load_now[TH_BUCKET_SHARE_FG],
+ load_now[TH_BUCKET_SHARE_BG], load_now[TH_BUCKET_SHARE_UT], 0);
- /* For those statistics that formerly relied on being recomputed
- * on timer ticks, advance by the approximate number of corresponding
- * elapsed intervals, thus compensating for potential idle intervals.
- */
- for (index = 0; index < stdelta; index++) {
- sched_mach_factor = ((sched_mach_factor << 2) + factor_now) / 5;
- sched_load_average = ((sched_load_average << 2) + average_now) / 5;
- }
/*
* Compute the timeshare priority conversion factor based on loading.
* Because our counters may be incremented and accessed
* concurrently with respect to each other, we may have
- * windows where the invariant nthreads >= nshared >= nbackground
+ * windows where the invariant (nthreads - nfixpri) == (fg + bg + ut)
* is broken, so truncate values in these cases.
*/
- if (nshared > nthreads)
- nshared = nthreads;
-
- if (nbackground > nshared)
- nbackground = nshared;
+ uint32_t timeshare_threads = (nthreads - nfixpri);
- nshared_non_bg = nshared - nbackground;
+ for (uint32_t i = TH_BUCKET_SHARE_FG; i <= TH_BUCKET_SHARE_BG ; i++) {
+ if (load_now[i] > timeshare_threads)
+ load_now[i] = timeshare_threads;
+ }
- if (nshared_non_bg > ncpus) {
- if (ncpus > 1)
- load_now = nshared_non_bg / ncpus;
- else
- load_now = nshared_non_bg;
+ /*
+ * Utility threads contribute up to NCPUS of load to FG threads
+ */
+ if (load_now[TH_BUCKET_SHARE_UT] <= ncpus) {
+ load_now[TH_BUCKET_SHARE_FG] += load_now[TH_BUCKET_SHARE_UT];
+ } else {
+ load_now[TH_BUCKET_SHARE_FG] += ncpus;
+ }
- if (load_now > NRQS - 1)
- load_now = NRQS - 1;
+ /*
+ * FG and UT should notice there's one thread of competition from BG,
+ * but no more.
+ */
+ if (load_now[TH_BUCKET_SHARE_BG] > 0) {
+ load_now[TH_BUCKET_SHARE_FG] += 1;
+ load_now[TH_BUCKET_SHARE_UT] += 1;
}
- if (nbackground > ncpus) {
- if (ncpus > 1)
- background_load_now = nbackground / ncpus;
- else
- background_load_now = nbackground;
+ /*
+ * The conversion factor consists of two components:
+ * a fixed value based on the absolute time unit (sched_fixed_shift),
+ * and a dynamic portion based on load (sched_load_shifts).
+ *
+ * Zero load results in a out of range shift count.
+ */
- if (background_load_now > NRQS - 1)
- background_load_now = NRQS - 1;
- }
+ for (uint32_t i = TH_BUCKET_SHARE_FG; i <= TH_BUCKET_SHARE_BG ; i++) {
+ uint32_t bucket_load = 0;
- if (nshared > ncpus) {
- if (ncpus > 1)
- combined_fgbg_load_now = nshared / ncpus;
- else
- combined_fgbg_load_now = nshared;
+ if (load_now[i] > ncpus) {
+ if (ncpus > 1)
+ bucket_load = load_now[i] / ncpus;
+ else
+ bucket_load = load_now[i];
- if (combined_fgbg_load_now > NRQS - 1)
- combined_fgbg_load_now = NRQS - 1;
- }
+ if (bucket_load > MAX_LOAD)
+ bucket_load = MAX_LOAD;
+ }
- KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
- MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_LOAD) | DBG_FUNC_NONE,
- (nthreads - nshared), (nshared - nbackground), nbackground, 0, 0);
+ sched_pri_shifts[i] = sched_fixed_shift - sched_load_shifts[bucket_load];
+ }
/*
- * Sample total running threads.
+ * Sample total running threads for the load average calculation.
*/
sched_nrun = nthreads;
-
-#if defined(CONFIG_SCHED_TIMESHARE_CORE)
/*
- * The conversion factor consists of
- * two components: a fixed value based
- * on the absolute time unit, and a
- * dynamic portion based on loading.
- *
- * Zero loading results in a out of range
- * shift count. Accumulated usage is ignored
- * during conversion and new usage deltas
- * are discarded.
+ * Load average and mach factor calculations for
+ * those which ask about these things.
*/
- sched_pri_shift = sched_fixed_shift - sched_load_shifts[load_now];
- sched_background_pri_shift = sched_fixed_shift - sched_load_shifts[background_load_now];
- sched_combined_fgbg_pri_shift = sched_fixed_shift - sched_load_shifts[combined_fgbg_load_now];
+ uint32_t average_now = nthreads * LOAD_SCALE;
+ uint32_t factor_now;
+
+ if (nthreads > ncpus)
+ factor_now = (ncpus * LOAD_SCALE) / (nthreads + 1);
+ else
+ factor_now = (ncpus - nthreads) * LOAD_SCALE;
/*
- * Compute old-style Mach load averages.
+ * For those statistics that formerly relied on being recomputed
+ * on timer ticks, advance by the approximate number of corresponding
+ * elapsed intervals, thus compensating for potential idle intervals.
*/
+ for (uint32_t index = 0; index < stdelta; index++) {
+ sched_mach_factor = ((sched_mach_factor << 2) + factor_now) / 5;
+ sched_load_average = ((sched_load_average << 2) + average_now) / 5;
+ }
- for (index = 0; index < stdelta; index++) {
- register int i;
-
- for (i = 0; i < 3; i++) {
+ /*
+ * Compute old-style Mach load averages.
+ */
+ for (uint32_t index = 0; index < stdelta; index++) {
+ for (uint32_t i = 0; i < 3; i++) {
mach_factor[i] = ((mach_factor[i] * fract[i]) +
(factor_now * (LOAD_SCALE - fract[i]))) / LOAD_SCALE;
(average_now * (LOAD_SCALE - fract[i]))) / LOAD_SCALE;
}
}
-#endif /* CONFIG_SCHED_TIMESHARE_CORE */
/*
- * Compute averages in other components.
+ * Compute averages in other components.
*/
- abstime = mach_absolute_time();
- for (avg = sched_average; avg->comp != NULL; ++avg) {
+ uint64_t abstime = mach_absolute_time();
+
+ for (sched_average_t avg = sched_average; avg->comp != NULL; ++avg) {
if (abstime >= avg->deadline) {
uint64_t period_abs = (avg->period * sched_one_second_interval);
uint64_t ninvokes = 1;
ninvokes += (abstime - avg->deadline) / period_abs;
ninvokes = MIN(ninvokes, SCHED_TICK_MAX_DELTA);
- for (index = 0; index < ninvokes; index++) {
+ for (uint32_t index = 0; index < ninvokes; index++) {
(*avg->comp)(avg->param);
}
avg->deadline = abstime + period_abs;