X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/490019cf9519204c5fb36b2fba54ceb983bb6b72..3903760236c30e3b5ace7a4eefac3a269d68957c:/osfmk/kern/sched_average.c?ds=sidebyside

diff --git a/osfmk/kern/sched_average.c b/osfmk/kern/sched_average.c
index 411dfb47c..cf9520915 100644
--- a/osfmk/kern/sched_average.c
+++ b/osfmk/kern/sched_average.c
@@ -77,6 +77,8 @@
 uint32_t	avenrun[3] = {0, 0, 0};
 uint32_t	mach_factor[3] = {0, 0, 0};
 
+uint32_t	sched_load_average, sched_mach_factor;
+
 #if defined(CONFIG_SCHED_TIMESHARE_CORE)
 /*
  * Values are scaled by LOAD_SCALE, defined in processor_info.h
@@ -109,7 +111,6 @@ static struct sched_average {
 	{ compute_averunnable, &sched_nrun, 5, 0 },
 	{ compute_stack_target, NULL, 5, 1 },
 	{ compute_memory_pressure, NULL, 1, 0 },
-	{ compute_zone_gc_throttle, NULL, 60, 0 },
 	{ compute_pageout_gc_throttle, NULL, 1, 0 },
 	{ compute_pmap_gc_throttle, NULL, 60, 0 },
 #if CONFIG_TELEMETRY
@@ -120,6 +121,8 @@ static struct sched_average {
 
 typedef struct sched_average	*sched_average_t;
 
+uint32_t load_now[TH_BUCKET_MAX];
+
 /* The "stdelta" parameter represents the number of scheduler maintenance
  * "ticks" that have elapsed since the last invocation, subject to
  * integer division imprecision.
@@ -128,119 +131,122 @@ typedef struct sched_average	*sched_average_t;
 void
 compute_averages(uint64_t stdelta)
 {
-	int			ncpus, nthreads, nshared, nbackground, nshared_non_bg;
-	uint32_t		factor_now, average_now, load_now = 0, background_load_now = 0, combined_fgbg_load_now = 0;
-	sched_average_t		avg;
-	uint64_t		abstime, index;
-
 	/*
-	 *	Retrieve counts, ignoring
-	 *	the current thread.
+	 * Retrieve a snapshot of the current run counts.
+	 *
+	 * Why not a bcopy()? Because we need atomic word-sized reads of sched_run_buckets,
+	 * not byte-by-byte copy.
 	 */
-	ncpus = processor_avail_count;
-	nthreads = sched_run_count - 1;
-	nshared = sched_share_count;
-	nbackground = sched_background_count;
+	uint32_t ncpus = processor_avail_count;
 
-	/*
-	 *	Load average and mach factor calculations for
-	 *	those which ask about these things.
-	 */
-	average_now = nthreads * LOAD_SCALE;
+	load_now[TH_BUCKET_RUN]      = sched_run_buckets[TH_BUCKET_RUN];
+	load_now[TH_BUCKET_FIXPRI]   = sched_run_buckets[TH_BUCKET_FIXPRI];
+	load_now[TH_BUCKET_SHARE_FG] = sched_run_buckets[TH_BUCKET_SHARE_FG];
+	load_now[TH_BUCKET_SHARE_UT] = sched_run_buckets[TH_BUCKET_SHARE_UT];
+	load_now[TH_BUCKET_SHARE_BG] = sched_run_buckets[TH_BUCKET_SHARE_BG];
 
-	if (nthreads > ncpus)
-		factor_now = (ncpus * LOAD_SCALE) / (nthreads + 1);
-	else
-		factor_now = (ncpus - nthreads) * LOAD_SCALE;
+	assert(load_now[TH_BUCKET_RUN] >= 0);
+	assert(load_now[TH_BUCKET_FIXPRI] >= 0);
+
+	/* Ignore the current thread, which is a running fixpri thread */
+
+	uint32_t nthreads = load_now[TH_BUCKET_RUN] - 1;
+	uint32_t nfixpri  = load_now[TH_BUCKET_FIXPRI] - 1;
+
+	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
+	        MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_LOAD) | DBG_FUNC_NONE,
+	        load_now[TH_BUCKET_FIXPRI] - 1, load_now[TH_BUCKET_SHARE_FG],
+	        load_now[TH_BUCKET_SHARE_BG],   load_now[TH_BUCKET_SHARE_UT], 0);
 
-	/* For those statistics that formerly relied on being recomputed
-	 * on timer ticks, advance by the approximate number of corresponding
-	 * elapsed intervals, thus compensating for potential idle intervals.
-	 */
-	for (index = 0; index < stdelta; index++) {
-		sched_mach_factor = ((sched_mach_factor << 2) + factor_now) / 5;
-		sched_load_average = ((sched_load_average << 2) + average_now) / 5;
-	}
 	/*
 	 * Compute the timeshare priority conversion factor based on loading.
 	 * Because our counters may be incremented and accessed
 	 * concurrently with respect to each other, we may have
-	 * windows where the invariant nthreads >= nshared >= nbackground
+	 * windows where the invariant (nthreads - nfixpri) == (fg + bg + ut)
 	 * is broken, so truncate values in these cases.
 	 */
 
-	if (nshared > nthreads)
-		nshared = nthreads;
-
-	if (nbackground > nshared)
-		nbackground = nshared;
+	uint32_t timeshare_threads = (nthreads - nfixpri);
 
-	nshared_non_bg = nshared - nbackground;
+	for (uint32_t i = TH_BUCKET_SHARE_FG; i <= TH_BUCKET_SHARE_BG ; i++) {
+		if (load_now[i] > timeshare_threads)
+			load_now[i] = timeshare_threads;
+	}
 
-	if (nshared_non_bg > ncpus) {
-		if (ncpus > 1)
-			load_now = nshared_non_bg / ncpus;
-		else
-			load_now = nshared_non_bg;
+	/*
+	 * Utility threads contribute up to NCPUS of load to FG threads
+	 */
+	if (load_now[TH_BUCKET_SHARE_UT] <= ncpus) {
+		load_now[TH_BUCKET_SHARE_FG] += load_now[TH_BUCKET_SHARE_UT];
+	} else {
+		load_now[TH_BUCKET_SHARE_FG] += ncpus;
+	}
 
-		if (load_now > NRQS - 1)
-			load_now = NRQS - 1;
+	/*
+	 * FG and UT should notice there's one thread of competition from BG,
+	 * but no more.
+	 */
+	if (load_now[TH_BUCKET_SHARE_BG] > 0) {
+		load_now[TH_BUCKET_SHARE_FG] += 1;
+		load_now[TH_BUCKET_SHARE_UT] += 1;
 	}
 
-	if (nbackground > ncpus) {
-		if (ncpus > 1)
-			background_load_now = nbackground / ncpus;
-		else
-			background_load_now = nbackground;
+	/*
+	 * The conversion factor consists of two components:
+	 * a fixed value based on the absolute time unit (sched_fixed_shift),
+	 * and a dynamic portion based on load (sched_load_shifts).
+	 *
+	 * Zero load results in a out of range shift count.
+	 */
 
-		if (background_load_now > NRQS - 1)
-			background_load_now = NRQS - 1;
-	}
+	for (uint32_t i = TH_BUCKET_SHARE_FG; i <= TH_BUCKET_SHARE_BG ; i++) {
+		uint32_t bucket_load = 0;
 
-	if (nshared > ncpus) {
-		if (ncpus > 1)
-			combined_fgbg_load_now = nshared / ncpus;
-		else
-			combined_fgbg_load_now = nshared;
+		if (load_now[i] > ncpus) {
+			if (ncpus > 1)
+				bucket_load = load_now[i] / ncpus;
+			else
+				bucket_load = load_now[i];
 
-		if (combined_fgbg_load_now > NRQS - 1)
-			combined_fgbg_load_now = NRQS - 1;
-	}
+			if (bucket_load > MAX_LOAD)
+				bucket_load = MAX_LOAD;
+		}
 
-	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
-		MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_LOAD) | DBG_FUNC_NONE,
-		(nthreads - nshared), (nshared - nbackground), nbackground, 0, 0);
+		sched_pri_shifts[i] = sched_fixed_shift - sched_load_shifts[bucket_load];
+	}
 
 	/*
-	 *	Sample total running threads.
+	 * Sample total running threads for the load average calculation.
 	 */
 	sched_nrun = nthreads;
-	
-#if defined(CONFIG_SCHED_TIMESHARE_CORE)
 
 	/*
-	 *	The conversion factor consists of
-	 *	two components: a fixed value based
-	 *	on the absolute time unit, and a
-	 *	dynamic portion based on loading.
-	 *
-	 *	Zero loading results in a out of range
-	 *	shift count.  Accumulated usage is ignored
-	 *	during conversion and new usage deltas
-	 *	are discarded.
+	 * Load average and mach factor calculations for
+	 * those which ask about these things.
 	 */
-	sched_pri_shift = sched_fixed_shift - sched_load_shifts[load_now];
-	sched_background_pri_shift = sched_fixed_shift - sched_load_shifts[background_load_now];
-	sched_combined_fgbg_pri_shift = sched_fixed_shift - sched_load_shifts[combined_fgbg_load_now];
+	uint32_t average_now = nthreads * LOAD_SCALE;
+	uint32_t factor_now;
+
+	if (nthreads > ncpus)
+		factor_now = (ncpus * LOAD_SCALE) / (nthreads + 1);
+	else
+		factor_now = (ncpus - nthreads) * LOAD_SCALE;
 
 	/*
-	 * Compute old-style Mach load averages.
+	 * For those statistics that formerly relied on being recomputed
+	 * on timer ticks, advance by the approximate number of corresponding
+	 * elapsed intervals, thus compensating for potential idle intervals.
 	 */
+	for (uint32_t index = 0; index < stdelta; index++) {
+		sched_mach_factor = ((sched_mach_factor << 2) + factor_now) / 5;
+		sched_load_average = ((sched_load_average << 2) + average_now) / 5;
+	}
 
-	for (index = 0; index < stdelta; index++) {
-		register int		i;
-
-		for (i = 0; i < 3; i++) {
+	/*
+	 * Compute old-style Mach load averages.
+	 */
+	for (uint32_t index = 0; index < stdelta; index++) {
+		for (uint32_t i = 0; i < 3; i++) {
 			mach_factor[i] = ((mach_factor[i] * fract[i]) +
 						(factor_now * (LOAD_SCALE - fract[i]))) / LOAD_SCALE;
 
@@ -248,13 +254,13 @@ compute_averages(uint64_t stdelta)
 						(average_now * (LOAD_SCALE - fract[i]))) / LOAD_SCALE;
 		}
 	}
-#endif /* CONFIG_SCHED_TIMESHARE_CORE */
 
 	/*
-	 *	Compute averages in other components.
+	 * Compute averages in other components.
 	 */
-	abstime = mach_absolute_time();
-	for (avg = sched_average; avg->comp != NULL; ++avg) {
+	uint64_t abstime = mach_absolute_time();
+
+	for (sched_average_t avg = sched_average; avg->comp != NULL; ++avg) {
 		if (abstime >= avg->deadline) {
 			uint64_t period_abs = (avg->period * sched_one_second_interval);
 			uint64_t ninvokes = 1;
@@ -262,7 +268,7 @@ compute_averages(uint64_t stdelta)
 			ninvokes += (abstime - avg->deadline) / period_abs;
 			ninvokes = MIN(ninvokes, SCHED_TICK_MAX_DELTA);
 
-			for (index = 0; index < ninvokes; index++) {
+			for (uint32_t index = 0; index < ninvokes; index++) {
 				(*avg->comp)(avg->param);
 			}
 			avg->deadline = abstime + period_abs;