X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/490019cf9519204c5fb36b2fba54ceb983bb6b72..3903760236c30e3b5ace7a4eefac3a269d68957c:/osfmk/kern/sched_average.c?ds=sidebyside diff --git a/osfmk/kern/sched_average.c b/osfmk/kern/sched_average.c index 411dfb47c..cf9520915 100644 --- a/osfmk/kern/sched_average.c +++ b/osfmk/kern/sched_average.c @@ -77,6 +77,8 @@ uint32_t avenrun[3] = {0, 0, 0}; uint32_t mach_factor[3] = {0, 0, 0}; +uint32_t sched_load_average, sched_mach_factor; + #if defined(CONFIG_SCHED_TIMESHARE_CORE) /* * Values are scaled by LOAD_SCALE, defined in processor_info.h @@ -109,7 +111,6 @@ static struct sched_average { { compute_averunnable, &sched_nrun, 5, 0 }, { compute_stack_target, NULL, 5, 1 }, { compute_memory_pressure, NULL, 1, 0 }, - { compute_zone_gc_throttle, NULL, 60, 0 }, { compute_pageout_gc_throttle, NULL, 1, 0 }, { compute_pmap_gc_throttle, NULL, 60, 0 }, #if CONFIG_TELEMETRY @@ -120,6 +121,8 @@ static struct sched_average { typedef struct sched_average *sched_average_t; +uint32_t load_now[TH_BUCKET_MAX]; + /* The "stdelta" parameter represents the number of scheduler maintenance * "ticks" that have elapsed since the last invocation, subject to * integer division imprecision. @@ -128,119 +131,122 @@ typedef struct sched_average *sched_average_t; void compute_averages(uint64_t stdelta) { - int ncpus, nthreads, nshared, nbackground, nshared_non_bg; - uint32_t factor_now, average_now, load_now = 0, background_load_now = 0, combined_fgbg_load_now = 0; - sched_average_t avg; - uint64_t abstime, index; - /* - * Retrieve counts, ignoring - * the current thread. + * Retrieve a snapshot of the current run counts. + * + * Why not a bcopy()? Because we need atomic word-sized reads of sched_run_buckets, + * not byte-by-byte copy. */ - ncpus = processor_avail_count; - nthreads = sched_run_count - 1; - nshared = sched_share_count; - nbackground = sched_background_count; + uint32_t ncpus = processor_avail_count; - /* - * Load average and mach factor calculations for - * those which ask about these things. - */ - average_now = nthreads * LOAD_SCALE; + load_now[TH_BUCKET_RUN] = sched_run_buckets[TH_BUCKET_RUN]; + load_now[TH_BUCKET_FIXPRI] = sched_run_buckets[TH_BUCKET_FIXPRI]; + load_now[TH_BUCKET_SHARE_FG] = sched_run_buckets[TH_BUCKET_SHARE_FG]; + load_now[TH_BUCKET_SHARE_UT] = sched_run_buckets[TH_BUCKET_SHARE_UT]; + load_now[TH_BUCKET_SHARE_BG] = sched_run_buckets[TH_BUCKET_SHARE_BG]; - if (nthreads > ncpus) - factor_now = (ncpus * LOAD_SCALE) / (nthreads + 1); - else - factor_now = (ncpus - nthreads) * LOAD_SCALE; + assert(load_now[TH_BUCKET_RUN] >= 0); + assert(load_now[TH_BUCKET_FIXPRI] >= 0); + + /* Ignore the current thread, which is a running fixpri thread */ + + uint32_t nthreads = load_now[TH_BUCKET_RUN] - 1; + uint32_t nfixpri = load_now[TH_BUCKET_FIXPRI] - 1; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_LOAD) | DBG_FUNC_NONE, + load_now[TH_BUCKET_FIXPRI] - 1, load_now[TH_BUCKET_SHARE_FG], + load_now[TH_BUCKET_SHARE_BG], load_now[TH_BUCKET_SHARE_UT], 0); - /* For those statistics that formerly relied on being recomputed - * on timer ticks, advance by the approximate number of corresponding - * elapsed intervals, thus compensating for potential idle intervals. - */ - for (index = 0; index < stdelta; index++) { - sched_mach_factor = ((sched_mach_factor << 2) + factor_now) / 5; - sched_load_average = ((sched_load_average << 2) + average_now) / 5; - } /* * Compute the timeshare priority conversion factor based on loading. * Because our counters may be incremented and accessed * concurrently with respect to each other, we may have - * windows where the invariant nthreads >= nshared >= nbackground + * windows where the invariant (nthreads - nfixpri) == (fg + bg + ut) * is broken, so truncate values in these cases. */ - if (nshared > nthreads) - nshared = nthreads; - - if (nbackground > nshared) - nbackground = nshared; + uint32_t timeshare_threads = (nthreads - nfixpri); - nshared_non_bg = nshared - nbackground; + for (uint32_t i = TH_BUCKET_SHARE_FG; i <= TH_BUCKET_SHARE_BG ; i++) { + if (load_now[i] > timeshare_threads) + load_now[i] = timeshare_threads; + } - if (nshared_non_bg > ncpus) { - if (ncpus > 1) - load_now = nshared_non_bg / ncpus; - else - load_now = nshared_non_bg; + /* + * Utility threads contribute up to NCPUS of load to FG threads + */ + if (load_now[TH_BUCKET_SHARE_UT] <= ncpus) { + load_now[TH_BUCKET_SHARE_FG] += load_now[TH_BUCKET_SHARE_UT]; + } else { + load_now[TH_BUCKET_SHARE_FG] += ncpus; + } - if (load_now > NRQS - 1) - load_now = NRQS - 1; + /* + * FG and UT should notice there's one thread of competition from BG, + * but no more. + */ + if (load_now[TH_BUCKET_SHARE_BG] > 0) { + load_now[TH_BUCKET_SHARE_FG] += 1; + load_now[TH_BUCKET_SHARE_UT] += 1; } - if (nbackground > ncpus) { - if (ncpus > 1) - background_load_now = nbackground / ncpus; - else - background_load_now = nbackground; + /* + * The conversion factor consists of two components: + * a fixed value based on the absolute time unit (sched_fixed_shift), + * and a dynamic portion based on load (sched_load_shifts). + * + * Zero load results in a out of range shift count. + */ - if (background_load_now > NRQS - 1) - background_load_now = NRQS - 1; - } + for (uint32_t i = TH_BUCKET_SHARE_FG; i <= TH_BUCKET_SHARE_BG ; i++) { + uint32_t bucket_load = 0; - if (nshared > ncpus) { - if (ncpus > 1) - combined_fgbg_load_now = nshared / ncpus; - else - combined_fgbg_load_now = nshared; + if (load_now[i] > ncpus) { + if (ncpus > 1) + bucket_load = load_now[i] / ncpus; + else + bucket_load = load_now[i]; - if (combined_fgbg_load_now > NRQS - 1) - combined_fgbg_load_now = NRQS - 1; - } + if (bucket_load > MAX_LOAD) + bucket_load = MAX_LOAD; + } - KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, - MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_LOAD) | DBG_FUNC_NONE, - (nthreads - nshared), (nshared - nbackground), nbackground, 0, 0); + sched_pri_shifts[i] = sched_fixed_shift - sched_load_shifts[bucket_load]; + } /* - * Sample total running threads. + * Sample total running threads for the load average calculation. */ sched_nrun = nthreads; - -#if defined(CONFIG_SCHED_TIMESHARE_CORE) /* - * The conversion factor consists of - * two components: a fixed value based - * on the absolute time unit, and a - * dynamic portion based on loading. - * - * Zero loading results in a out of range - * shift count. Accumulated usage is ignored - * during conversion and new usage deltas - * are discarded. + * Load average and mach factor calculations for + * those which ask about these things. */ - sched_pri_shift = sched_fixed_shift - sched_load_shifts[load_now]; - sched_background_pri_shift = sched_fixed_shift - sched_load_shifts[background_load_now]; - sched_combined_fgbg_pri_shift = sched_fixed_shift - sched_load_shifts[combined_fgbg_load_now]; + uint32_t average_now = nthreads * LOAD_SCALE; + uint32_t factor_now; + + if (nthreads > ncpus) + factor_now = (ncpus * LOAD_SCALE) / (nthreads + 1); + else + factor_now = (ncpus - nthreads) * LOAD_SCALE; /* - * Compute old-style Mach load averages. + * For those statistics that formerly relied on being recomputed + * on timer ticks, advance by the approximate number of corresponding + * elapsed intervals, thus compensating for potential idle intervals. */ + for (uint32_t index = 0; index < stdelta; index++) { + sched_mach_factor = ((sched_mach_factor << 2) + factor_now) / 5; + sched_load_average = ((sched_load_average << 2) + average_now) / 5; + } - for (index = 0; index < stdelta; index++) { - register int i; - - for (i = 0; i < 3; i++) { + /* + * Compute old-style Mach load averages. + */ + for (uint32_t index = 0; index < stdelta; index++) { + for (uint32_t i = 0; i < 3; i++) { mach_factor[i] = ((mach_factor[i] * fract[i]) + (factor_now * (LOAD_SCALE - fract[i]))) / LOAD_SCALE; @@ -248,13 +254,13 @@ compute_averages(uint64_t stdelta) (average_now * (LOAD_SCALE - fract[i]))) / LOAD_SCALE; } } -#endif /* CONFIG_SCHED_TIMESHARE_CORE */ /* - * Compute averages in other components. + * Compute averages in other components. */ - abstime = mach_absolute_time(); - for (avg = sched_average; avg->comp != NULL; ++avg) { + uint64_t abstime = mach_absolute_time(); + + for (sched_average_t avg = sched_average; avg->comp != NULL; ++avg) { if (abstime >= avg->deadline) { uint64_t period_abs = (avg->period * sched_one_second_interval); uint64_t ninvokes = 1; @@ -262,7 +268,7 @@ compute_averages(uint64_t stdelta) ninvokes += (abstime - avg->deadline) / period_abs; ninvokes = MIN(ninvokes, SCHED_TICK_MAX_DELTA); - for (index = 0; index < ninvokes; index++) { + for (uint32_t index = 0; index < ninvokes; index++) { (*avg->comp)(avg->param); } avg->deadline = abstime + period_abs;