]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/kern/kern_memorystatus_freeze.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / bsd / kern / kern_memorystatus_freeze.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30#include <kern/sched_prim.h>
31#include <kern/kalloc.h>
32#include <kern/assert.h>
33#include <kern/debug.h>
34#include <kern/locks.h>
35#include <kern/task.h>
36#include <kern/thread.h>
37#include <kern/host.h>
38#include <kern/policy_internal.h>
39#include <kern/thread_group.h>
40
41#include <IOKit/IOBSD.h>
42
43#include <libkern/libkern.h>
44#include <mach/coalition.h>
45#include <mach/mach_time.h>
46#include <mach/task.h>
47#include <mach/host_priv.h>
48#include <mach/mach_host.h>
49#include <os/log.h>
50#include <pexpert/pexpert.h>
51#include <sys/coalition.h>
52#include <sys/kern_event.h>
53#include <sys/proc.h>
54#include <sys/proc_info.h>
55#include <sys/reason.h>
56#include <sys/signal.h>
57#include <sys/signalvar.h>
58#include <sys/sysctl.h>
59#include <sys/sysproto.h>
60#include <sys/wait.h>
61#include <sys/tree.h>
62#include <sys/priv.h>
63#include <vm/vm_pageout.h>
64#include <vm/vm_protos.h>
65#include <mach/machine/sdt.h>
66#include <libkern/section_keywords.h>
67#include <stdatomic.h>
68
69#if CONFIG_FREEZE
70#include <vm/vm_map.h>
71#endif /* CONFIG_FREEZE */
72
73#include <sys/kern_memorystatus.h>
74#include <sys/kern_memorystatus_freeze.h>
75#include <sys/kern_memorystatus_notify.h>
76
77#if CONFIG_JETSAM
78
79extern unsigned int memorystatus_available_pages;
80extern unsigned int memorystatus_available_pages_pressure;
81extern unsigned int memorystatus_available_pages_critical;
82extern unsigned int memorystatus_available_pages_critical_base;
83extern unsigned int memorystatus_available_pages_critical_idle_offset;
84
85#else /* CONFIG_JETSAM */
86
87extern uint64_t memorystatus_available_pages;
88extern uint64_t memorystatus_available_pages_pressure;
89extern uint64_t memorystatus_available_pages_critical;
90
91#endif /* CONFIG_JETSAM */
92
93unsigned int memorystatus_frozen_count = 0;
94unsigned int memorystatus_suspended_count = 0;
95unsigned long freeze_threshold_percentage = 50;
96
97#if CONFIG_FREEZE
98
99lck_grp_attr_t *freezer_lck_grp_attr;
100lck_grp_t *freezer_lck_grp;
101static lck_mtx_t freezer_mutex;
102
103/* Thresholds */
104unsigned int memorystatus_freeze_threshold = 0;
105unsigned int memorystatus_freeze_pages_min = 0;
106unsigned int memorystatus_freeze_pages_max = 0;
107unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
108unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
109uint64_t memorystatus_freeze_budget_pages_remaining = 0; //remaining # of pages that can be frozen to disk
110boolean_t memorystatus_freeze_degradation = FALSE; //protected by the freezer mutex. Signals we are in a degraded freeze mode.
111
112unsigned int memorystatus_max_frozen_demotions_daily = 0;
113unsigned int memorystatus_thaw_count_demotion_threshold = 0;
114
115boolean_t memorystatus_freeze_enabled = FALSE;
116int memorystatus_freeze_wakeup = 0;
117int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
118
119#define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
120
121#ifdef XNU_KERNEL_PRIVATE
122
123unsigned int memorystatus_frozen_processes_max = 0;
124unsigned int memorystatus_frozen_shared_mb = 0;
125unsigned int memorystatus_frozen_shared_mb_max = 0;
126unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
127unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
128unsigned int memorystatus_thaw_count = 0;
129unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
130
131/* Freezer counters collected for telemtry */
132static struct memorystatus_freezer_stats_t {
133 /*
134 * # of processes that we've considered freezing.
135 * Used to normalize the error reasons below.
136 */
137 uint64_t mfs_process_considered_count;
138
139 /*
140 * The following counters track how many times we've failed to freeze
141 * a process because of a specific FREEZER_ERROR.
142 */
143 /* EXCESS_SHARED_MEMORY */
144 uint64_t mfs_error_excess_shared_memory_count;
145 /* LOW_PRIVATE_SHARED_RATIO */
146 uint64_t mfs_error_low_private_shared_ratio_count;
147 /* NO_COMPRESSOR_SPACE */
148 uint64_t mfs_error_no_compressor_space_count;
149 /* NO_SWAP_SPACE */
150 uint64_t mfs_error_no_swap_space_count;
151 /* pages < memorystatus_freeze_pages_min */
152 uint64_t mfs_error_below_min_pages_count;
153 /* dasd determined it was unlikely to be relaunched. */
154 uint64_t mfs_error_low_probability_of_use_count;
155 /* transient reasons (like inability to acquire a lock). */
156 uint64_t mfs_error_other_count;
157
158 /*
159 * # of times that we saw memorystatus_available_pages <= memorystatus_freeze_threshold.
160 * Used to normalize skipped_full_count and shared_mb_high_count.
161 */
162 uint64_t mfs_below_threshold_count;
163
164 /* Skipped running the freezer because we were out of slots */
165 uint64_t mfs_skipped_full_count;
166
167 /* Skipped running the freezer because we were over the shared mb limit*/
168 uint64_t mfs_skipped_shared_mb_high_count;
169
170 /*
171 * How many pages have not been sent to swap because they were in a shared object?
172 * This is being used to gather telemtry so we can understand the impact we'd have
173 * on our NAND budget if we did swap out these pages.
174 */
175 uint64_t mfs_shared_pages_skipped;
176} memorystatus_freezer_stats = {0};
177
178#endif /* XNU_KERNEL_PRIVATE */
179
180static inline boolean_t memorystatus_can_freeze_processes(void);
181static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
182static boolean_t memorystatus_is_process_eligible_for_freeze(proc_t p);
183static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
184
185void memorystatus_disable_freeze(void);
186
187/* Stats */
188static uint64_t memorystatus_freeze_pageouts = 0;
189
190/* Throttling */
191#define DEGRADED_WINDOW_MINS (30)
192#define NORMAL_WINDOW_MINS (24 * 60)
193
194/* Protected by the freezer_mutex */
195static throttle_interval_t throttle_intervals[] = {
196 { DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
197 { NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
198};
199throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
200throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
201
202extern uint64_t vm_swap_get_free_space(void);
203extern boolean_t vm_swap_max_budget(uint64_t *);
204extern int i_coal_jetsam_get_taskrole(coalition_t coal, task_t task);
205
206static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
207static void memorystatus_demote_frozen_processes(boolean_t force_one);
208
209static uint64_t memorystatus_freezer_thread_next_run_ts = 0;
210
211/* Sysctls needed for aggd stats */
212
213SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
214SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
215SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
216SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
217SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
218SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
219SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
220SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
221SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
222SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
223SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
224SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
225SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
226SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
227SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
228SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
229
230/*
231 * Calculates the hit rate for the freezer.
232 * The hit rate is defined as the percentage of procs that are currently in the
233 * freezer which we have thawed.
234 * A low hit rate means we're freezing bad candidates since they're not re-used.
235 */
236static int sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
237{
238#pragma unused(arg1, arg2)
239 size_t thaw_count = 0, frozen_count = 0;
240 int thaw_percentage = 100;
241 unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
242 proc_t p = PROC_NULL;
243 proc_list_lock();
244
245 p = memorystatus_get_first_proc_locked(&band, FALSE);
246
247 while (p) {
248 if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
249 if (p->p_memstat_thaw_count > 0) {
250 thaw_count++;
251 }
252 frozen_count++;
253 }
254 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
255 }
256 proc_list_unlock();
257 if (frozen_count > 0) {
258 thaw_percentage = 100 * thaw_count / frozen_count;
259 }
260 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
261}
262SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
263
264
265#if DEVELOPMENT || DEBUG
266
267SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_jetsam_band, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_jetsam_band, 0, "");
268SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
269SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
270SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
271SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, "");
272SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, "");
273SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
274SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_processes_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_processes_max, 0, "");
275
276/*
277 * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
278 * "0" means no limit.
279 * Default is 10% of system-wide task limit.
280 */
281
282SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
283SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
284
285SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
286SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_private_shared_pages_ratio, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_private_shared_pages_ratio, 0, "");
287
288SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, "");
289
290/*
291 * max. # of frozen process demotions we will allow in our daily cycle.
292 */
293SYSCTL_UINT(_kern, OID_AUTO, memorystatus_max_freeze_demotions_daily, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_max_frozen_demotions_daily, 0, "");
294/*
295 * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
296 */
297SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count_demotion_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_thaw_count_demotion_threshold, 0, "");
298
299boolean_t memorystatus_freeze_throttle_enabled = TRUE;
300SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
301
302/*
303 * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
304 * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
305 */
306boolean_t memorystatus_freeze_to_memory = FALSE;
307SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
308
309#define VM_PAGES_FOR_ALL_PROCS (2)
310/*
311 * Manual trigger of freeze and thaw for dev / debug kernels only.
312 */
313static int
314sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
315{
316#pragma unused(arg1, arg2)
317 int error, pid = 0;
318 proc_t p;
319 int freezer_error_code = 0;
320 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
321 int ntasks = 0;
322 coalition_t coal = COALITION_NULL;
323
324 if (memorystatus_freeze_enabled == FALSE) {
325 printf("sysctl_freeze: Freeze is DISABLED\n");
326 return ENOTSUP;
327 }
328
329 error = sysctl_handle_int(oidp, &pid, 0, req);
330 if (error || !req->newptr) {
331 return error;
332 }
333
334 if (pid == VM_PAGES_FOR_ALL_PROCS) {
335 vm_pageout_anonymous_pages();
336
337 return 0;
338 }
339
340 lck_mtx_lock(&freezer_mutex);
341
342again:
343 p = proc_find(pid);
344 if (p != NULL) {
345 memorystatus_freezer_stats.mfs_process_considered_count++;
346 uint32_t purgeable, wired, clean, dirty, shared;
347 uint32_t max_pages = 0, state = 0;
348
349 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
350 /*
351 * Freezer backed by the compressor and swap file(s)
352 * will hold compressed data.
353 *
354 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
355 * being swapped out to disk. Note that this disables freezer swap support globally,
356 * not just for the process being frozen.
357 *
358 *
359 * We don't care about the global freezer budget or the process's (min/max) budget here.
360 * The freeze sysctl is meant to force-freeze a process.
361 *
362 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
363 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
364 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
365 */
366 max_pages = memorystatus_freeze_pages_max;
367 } else {
368 /*
369 * We only have the compressor without any swap.
370 */
371 max_pages = UINT32_MAX - 1;
372 }
373
374 proc_list_lock();
375 state = p->p_memstat_state;
376 proc_list_unlock();
377
378 /*
379 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
380 * We simply ensure that jetsam is not already working on the process and that the process has not
381 * explicitly disabled freezing.
382 */
383 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
384 printf("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
385 (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
386 (state & P_MEMSTAT_LOCKED) ? " locked" : "",
387 (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
388
389 proc_rele(p);
390 lck_mtx_unlock(&freezer_mutex);
391 return EPERM;
392 }
393
394 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
395 if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
396 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
397 }
398
399 if (error) {
400 char reason[128];
401 if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
402 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
403 strlcpy(reason, "too much shared memory", 128);
404 }
405
406 if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
407 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
408 strlcpy(reason, "low private-shared pages ratio", 128);
409 }
410
411 if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
412 memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
413 strlcpy(reason, "no compressor space", 128);
414 }
415
416 if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) {
417 strlcpy(reason, "no swap space", 128);
418 }
419
420 printf("sysctl_freeze: task_freeze failed: %s\n", reason);
421
422 if (error == KERN_NO_SPACE) {
423 /* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
424 error = ENOSPC;
425 } else {
426 error = EIO;
427 }
428 } else {
429 proc_list_lock();
430 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
431 p->p_memstat_state |= P_MEMSTAT_FROZEN;
432 memorystatus_frozen_count++;
433 }
434 p->p_memstat_frozen_count++;
435
436
437 proc_list_unlock();
438
439 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
440 /*
441 * We elevate only if we are going to swap out the data.
442 */
443 error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
444 memorystatus_freeze_jetsam_band, TRUE);
445
446 if (error) {
447 printf("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
448 }
449 }
450 }
451
452 if ((error == 0) && (coal == NULL)) {
453 /*
454 * We froze a process and so we check to see if it was
455 * a coalition leader and if it has XPC services that
456 * might need freezing.
457 * Only one leader can be frozen at a time and so we shouldn't
458 * enter this block more than once per call. Hence the
459 * check that 'coal' has to be NULL. We should make this an
460 * assert() or panic() once we have a much more concrete way
461 * to detect an app vs a daemon.
462 */
463
464 task_t curr_task = NULL;
465
466 curr_task = proc_task(p);
467 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
468 if (coalition_is_leader(curr_task, coal)) {
469 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
470 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
471
472 if (ntasks > MAX_XPC_SERVICE_PIDS) {
473 ntasks = MAX_XPC_SERVICE_PIDS;
474 }
475 }
476 }
477
478 proc_rele(p);
479
480 while (ntasks) {
481 pid = pid_list[--ntasks];
482 goto again;
483 }
484
485 lck_mtx_unlock(&freezer_mutex);
486 return error;
487 } else {
488 printf("sysctl_freeze: Invalid process\n");
489 }
490
491
492 lck_mtx_unlock(&freezer_mutex);
493 return EINVAL;
494}
495
496SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
497 0, 0, &sysctl_memorystatus_freeze, "I", "");
498
499/*
500 * Manual trigger of agressive frozen demotion for dev / debug kernels only.
501 */
502static int
503sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
504{
505#pragma unused(arg1, arg2, oidp, req)
506 int error, val;
507 /*
508 * Only demote on write to prevent demoting during `sysctl -a`.
509 * The actual value written doesn't matter.
510 */
511 error = sysctl_handle_int(oidp, &val, 0, req);
512 if (error || !req->newptr) {
513 return error;
514 }
515 memorystatus_demote_frozen_processes(false);
516 return 0;
517}
518
519SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
520
521static int
522sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
523{
524#pragma unused(arg1, arg2)
525
526 int error, pid = 0;
527 proc_t p;
528
529 if (memorystatus_freeze_enabled == FALSE) {
530 return ENOTSUP;
531 }
532
533 error = sysctl_handle_int(oidp, &pid, 0, req);
534 if (error || !req->newptr) {
535 return error;
536 }
537
538 if (pid == VM_PAGES_FOR_ALL_PROCS) {
539 do_fastwake_warmup_all();
540 return 0;
541 } else {
542 p = proc_find(pid);
543 if (p != NULL) {
544 error = task_thaw(p->task);
545
546 if (error) {
547 error = EIO;
548 } else {
549 /*
550 * task_thaw() succeeded.
551 *
552 * We increment memorystatus_frozen_count on the sysctl freeze path.
553 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
554 * when this process exits.
555 *
556 * proc_list_lock();
557 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
558 * proc_list_unlock();
559 */
560 }
561 proc_rele(p);
562 return error;
563 }
564 }
565
566 return EINVAL;
567}
568
569SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
570 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
571
572
573typedef struct _global_freezable_status {
574 boolean_t freeze_pages_threshold_crossed;
575 boolean_t freeze_eligible_procs_available;
576 boolean_t freeze_scheduled_in_future;
577}global_freezable_status_t;
578
579typedef struct _proc_freezable_status {
580 boolean_t freeze_has_memstat_state;
581 boolean_t freeze_has_pages_min;
582 int freeze_has_probability;
583 int freeze_leader_eligible;
584 boolean_t freeze_attempted;
585 uint32_t p_memstat_state;
586 uint32_t p_pages;
587 int p_freeze_error_code;
588 int p_pid;
589 int p_leader_pid;
590 char p_name[MAXCOMLEN + 1];
591}proc_freezable_status_t;
592
593#define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
594
595/*
596 * For coalition based freezing evaluations, we proceed as follows:
597 * - detect that the process is a coalition member and a XPC service
598 * - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
599 * - continue its freezability evaluation assuming its leader will be freezable too
600 *
601 * Once we are done evaluating all processes, we do a quick run thru all
602 * processes and for a coalition member XPC service we look up the 'freezable'
603 * status of its leader and iff:
604 * - the xpc service is freezable i.e. its individual freeze evaluation worked
605 * - and, its leader is also marked freezable
606 * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
607 */
608
609#define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN (-1)
610#define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS (1)
611#define FREEZE_PROC_LEADER_FREEZABLE_FAILURE (2)
612
613static int
614memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
615{
616 uint32_t proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
617 global_freezable_status_t *list_head;
618 proc_freezable_status_t *list_entry, *list_entry_start;
619 size_t list_size = 0;
620 proc_t p, leader_proc;
621 memstat_bucket_t *bucket;
622 uint32_t state = 0, pages = 0, entry_count = 0;
623 boolean_t try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
624 int error = 0, probability_of_use = 0;
625 pid_t leader_pid = 0;
626
627
628 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
629 return ENOTSUP;
630 }
631
632 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
633
634 if (buffer_size < list_size) {
635 return EINVAL;
636 }
637
638 list_head = (global_freezable_status_t*)kalloc(list_size);
639 if (list_head == NULL) {
640 return ENOMEM;
641 }
642
643 memset(list_head, 0, list_size);
644
645 list_size = sizeof(global_freezable_status_t);
646
647 proc_list_lock();
648
649 uint64_t curr_time = mach_absolute_time();
650
651 list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
652 list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
653 list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
654
655 list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
656 list_entry = list_entry_start;
657
658 bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
659
660 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
661
662 p = memorystatus_get_first_proc_locked(&band, FALSE);
663 proc_count++;
664
665 while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
666 (p) &&
667 (list_size < buffer_size)) {
668 if (isSysProc(p)) {
669 /*
670 * Daemon:- We will consider freezing it iff:
671 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
672 * - its role in the coalition is XPC service.
673 *
674 * We skip memory size requirements in this case.
675 */
676
677 coalition_t coal = COALITION_NULL;
678 task_t leader_task = NULL, curr_task = NULL;
679 int task_role_in_coalition = 0;
680
681 curr_task = proc_task(p);
682 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
683
684 if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
685 /*
686 * By default, XPC services without an app
687 * will be the leader of their own single-member
688 * coalition.
689 */
690 goto skip_ineligible_xpc;
691 }
692
693 leader_task = coalition_get_leader(coal);
694 if (leader_task == TASK_NULL) {
695 /*
696 * This jetsam coalition is currently leader-less.
697 * This could happen if the app died, but XPC services
698 * have not yet exited.
699 */
700 goto skip_ineligible_xpc;
701 }
702
703 leader_proc = (proc_t)get_bsdtask_info(leader_task);
704 task_deallocate(leader_task);
705
706 if (leader_proc == PROC_NULL) {
707 /* leader task is exiting */
708 goto skip_ineligible_xpc;
709 }
710
711 task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task);
712
713 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
714 xpc_skip_size_probability_check = TRUE;
715 leader_pid = leader_proc->p_pid;
716 goto continue_eval;
717 }
718
719skip_ineligible_xpc:
720 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
721 proc_count++;
722 continue;
723 }
724
725continue_eval:
726 strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
727
728 list_entry->p_pid = p->p_pid;
729
730 state = p->p_memstat_state;
731
732 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
733 !(state & P_MEMSTAT_SUSPENDED)) {
734 try_freeze = list_entry->freeze_has_memstat_state = FALSE;
735 } else {
736 try_freeze = list_entry->freeze_has_memstat_state = TRUE;
737 }
738
739 list_entry->p_memstat_state = state;
740
741 if (xpc_skip_size_probability_check == TRUE) {
742 /*
743 * Assuming the coalition leader is freezable
744 * we don't care re. minimum pages and probability
745 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
746 * XPC services have to be explicity opted-out of the disabled
747 * state. And we checked that state above.
748 */
749 list_entry->freeze_has_pages_min = TRUE;
750 list_entry->p_pages = -1;
751 list_entry->freeze_has_probability = -1;
752
753 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
754 list_entry->p_leader_pid = leader_pid;
755
756 xpc_skip_size_probability_check = FALSE;
757 } else {
758 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
759 list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
760
761 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
762 if (pages < memorystatus_freeze_pages_min) {
763 try_freeze = list_entry->freeze_has_pages_min = FALSE;
764 } else {
765 list_entry->freeze_has_pages_min = TRUE;
766 }
767
768 list_entry->p_pages = pages;
769
770 if (entry_count) {
771 uint32_t j = 0;
772 for (j = 0; j < entry_count; j++) {
773 if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
774 p->p_name,
775 MAXCOMLEN + 1) == 0) {
776 probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
777 break;
778 }
779 }
780
781 list_entry->freeze_has_probability = probability_of_use;
782
783 try_freeze = ((probability_of_use > 0) && try_freeze);
784 } else {
785 list_entry->freeze_has_probability = -1;
786 }
787 }
788
789 if (try_freeze) {
790 uint32_t purgeable, wired, clean, dirty, shared;
791 uint32_t max_pages = 0;
792 int freezer_error_code = 0;
793
794 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
795
796 if (error) {
797 list_entry->p_freeze_error_code = freezer_error_code;
798 }
799
800 list_entry->freeze_attempted = TRUE;
801 }
802
803 list_entry++;
804 freeze_eligible_proc_considered++;
805
806 list_size += sizeof(proc_freezable_status_t);
807
808 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
809 proc_count++;
810 }
811
812 proc_list_unlock();
813
814 list_entry = list_entry_start;
815
816 for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
817 if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
818 leader_pid = list_entry[xpc_index].p_leader_pid;
819
820 leader_proc = proc_find(leader_pid);
821
822 if (leader_proc) {
823 if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) {
824 /*
825 * Leader has already been frozen.
826 */
827 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
828 proc_rele(leader_proc);
829 continue;
830 }
831 proc_rele(leader_proc);
832 }
833
834 for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
835 if (list_entry[leader_index].p_pid == leader_pid) {
836 if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
837 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
838 } else {
839 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
840 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
841 }
842 break;
843 }
844 }
845
846 /*
847 * Didn't find the leader entry. This might be likely because
848 * the leader never made it down to band 0.
849 */
850 if (leader_index == freeze_eligible_proc_considered) {
851 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
852 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
853 }
854 }
855 }
856
857 buffer_size = list_size;
858
859 error = copyout(list_head, buffer, buffer_size);
860 if (error == 0) {
861 *retval = buffer_size;
862 } else {
863 *retval = 0;
864 }
865
866 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
867 kfree(list_head, list_size);
868
869 MEMORYSTATUS_DEBUG(1, "memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)*list_size);
870
871 return error;
872}
873
874int
875memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
876{
877 int err = ENOTSUP;
878
879 if (flags == FREEZER_CONTROL_GET_STATUS) {
880 err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
881 }
882
883 return err;
884}
885
886#endif /* DEVELOPMENT || DEBUG */
887
888extern void vm_swap_consider_defragmenting(int);
889extern boolean_t memorystatus_kill_elevated_process(uint32_t, os_reason_t, unsigned int, int, uint32_t *, uint64_t *);
890
891/*
892 * This routine will _jetsam_ all frozen processes
893 * and reclaim the swap space immediately.
894 *
895 * So freeze has to be DISABLED when we call this routine.
896 */
897
898void
899memorystatus_disable_freeze(void)
900{
901 memstat_bucket_t *bucket;
902 int bucket_count = 0, retries = 0;
903 boolean_t retval = FALSE, killed = FALSE;
904 uint32_t errors = 0, errors_over_prev_iteration = 0;
905 os_reason_t jetsam_reason = 0;
906 unsigned int band = 0;
907 proc_t p = PROC_NULL, next_p = PROC_NULL;
908 uint64_t memory_reclaimed = 0, footprint = 0;
909
910 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
911 memorystatus_available_pages, 0, 0, 0, 0);
912
913 assert(memorystatus_freeze_enabled == FALSE);
914
915 jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
916 if (jetsam_reason == OS_REASON_NULL) {
917 printf("memorystatus_disable_freeze: failed to allocate jetsam reason\n");
918 }
919
920 /*
921 * Let's relocate all frozen processes into band 8. Demoted frozen processes
922 * are sitting in band 0 currently and it's possible to have a frozen process
923 * in the FG band being actively used. We don't reset its frozen state when
924 * it is resumed because it has state on disk.
925 *
926 * We choose to do this relocation rather than implement a new 'kill frozen'
927 * process function for these reasons:
928 * - duplication of code: too many kill functions exist and we need to rework them better.
929 * - disk-space-shortage kills are rare
930 * - not having the 'real' jetsam band at time of the this frozen kill won't preclude us
931 * from answering any imp. questions re. jetsam policy/effectiveness.
932 *
933 * This is essentially what memorystatus_update_inactive_jetsam_priority_band() does while
934 * avoiding the application of memory limits.
935 */
936
937again:
938 proc_list_lock();
939
940 band = JETSAM_PRIORITY_IDLE;
941 p = PROC_NULL;
942 next_p = PROC_NULL;
943
944 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
945 while (next_p) {
946 p = next_p;
947 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
948
949 if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
950 break;
951 }
952
953 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
954 continue;
955 }
956
957 if (p->p_memstat_state & P_MEMSTAT_ERROR) {
958 p->p_memstat_state &= ~P_MEMSTAT_ERROR;
959 }
960
961 if (p->p_memstat_effectivepriority == memorystatus_freeze_jetsam_band) {
962 continue;
963 }
964
965 /*
966 * We explicitly add this flag here so the process looks like a normal
967 * frozen process i.e. P_MEMSTAT_FROZEN and P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND.
968 * We don't bother with assigning the 'active' memory
969 * limits at this point because we are going to be killing it soon below.
970 */
971 p->p_memstat_state |= P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
972 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
973
974 memorystatus_update_priority_locked(p, memorystatus_freeze_jetsam_band, FALSE, TRUE);
975 }
976
977 bucket = &memstat_bucket[memorystatus_freeze_jetsam_band];
978 bucket_count = bucket->count;
979 proc_list_unlock();
980
981 /*
982 * Bucket count is already stale at this point. But, we don't expect
983 * freezing to continue since we have already disabled the freeze functionality.
984 * However, an existing freeze might be in progress. So we might miss that process
985 * in the first go-around. We hope to catch it in the next.
986 */
987
988 errors_over_prev_iteration = 0;
989 while (bucket_count) {
990 bucket_count--;
991
992 /*
993 * memorystatus_kill_elevated_process() drops a reference,
994 * so take another one so we can continue to use this exit reason
995 * even after it returns.
996 */
997
998 os_reason_ref(jetsam_reason);
999 retval = memorystatus_kill_elevated_process(
1000 kMemorystatusKilledDiskSpaceShortage,
1001 jetsam_reason,
1002 memorystatus_freeze_jetsam_band,
1003 0, /* the iteration of aggressive jetsam..ignored here */
1004 &errors,
1005 &footprint);
1006
1007 if (errors > 0) {
1008 printf("memorystatus_disable_freeze: memorystatus_kill_elevated_process returned %d error(s)\n", errors);
1009 errors_over_prev_iteration += errors;
1010 errors = 0;
1011 }
1012
1013 if (retval == 0) {
1014 /*
1015 * No frozen processes left to kill.
1016 */
1017 break;
1018 }
1019
1020 killed = TRUE;
1021 memory_reclaimed += footprint;
1022 }
1023
1024 proc_list_lock();
1025
1026 if (memorystatus_frozen_count) {
1027 /*
1028 * A frozen process snuck in and so
1029 * go back around to kill it. That
1030 * process may have been resumed and
1031 * put into the FG band too. So we
1032 * have to do the relocation again.
1033 */
1034 assert(memorystatus_freeze_enabled == FALSE);
1035
1036 retries++;
1037 if (retries < 3) {
1038 proc_list_unlock();
1039 goto again;
1040 }
1041#if DEVELOPMENT || DEBUG
1042 panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d, errors = %d",
1043 memorystatus_frozen_count, errors_over_prev_iteration);
1044#endif /* DEVELOPMENT || DEBUG */
1045 }
1046 proc_list_unlock();
1047
1048 os_reason_free(jetsam_reason);
1049
1050 if (killed) {
1051 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1052
1053 proc_list_lock();
1054 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1055 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1056 uint64_t timestamp_now = mach_absolute_time();
1057 memorystatus_jetsam_snapshot->notification_time = timestamp_now;
1058 memorystatus_jetsam_snapshot->js_gencount++;
1059 if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
1060 timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
1061 proc_list_unlock();
1062 int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1063 if (!ret) {
1064 proc_list_lock();
1065 memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
1066 proc_list_unlock();
1067 }
1068 } else {
1069 proc_list_unlock();
1070 }
1071 }
1072
1073 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1074 memorystatus_available_pages, memory_reclaimed, 0, 0, 0);
1075
1076 return;
1077}
1078
1079__private_extern__ void
1080memorystatus_freeze_init(void)
1081{
1082 kern_return_t result;
1083 thread_t thread;
1084
1085 freezer_lck_grp_attr = lck_grp_attr_alloc_init();
1086 freezer_lck_grp = lck_grp_alloc_init("freezer", freezer_lck_grp_attr);
1087
1088 lck_mtx_init(&freezer_mutex, freezer_lck_grp, NULL);
1089
1090 /*
1091 * This is just the default value if the underlying
1092 * storage device doesn't have any specific budget.
1093 * We check with the storage layer in memorystatus_freeze_update_throttle()
1094 * before we start our freezing the first time.
1095 */
1096 memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1097
1098 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1099 if (result == KERN_SUCCESS) {
1100 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1101 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1102 thread_set_thread_name(thread, "VM_freezer");
1103
1104 thread_deallocate(thread);
1105 } else {
1106 panic("Could not create memorystatus_freeze_thread");
1107 }
1108}
1109
1110static boolean_t
1111memorystatus_is_process_eligible_for_freeze(proc_t p)
1112{
1113 /*
1114 * Called with proc_list_lock held.
1115 */
1116
1117 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1118
1119 boolean_t should_freeze = FALSE;
1120 uint32_t state = 0, entry_count = 0, pages = 0, i = 0;
1121 int probability_of_use = 0;
1122
1123 state = p->p_memstat_state;
1124
1125 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) {
1126 goto out;
1127 }
1128
1129 if (isSysProc(p)) {
1130 /*
1131 * Daemon:- We consider freezing it if:
1132 * - it belongs to a coalition and the leader is frozen, and,
1133 * - its role in the coalition is XPC service.
1134 *
1135 * We skip memory size requirements in this case.
1136 */
1137
1138 coalition_t coal = COALITION_NULL;
1139 task_t leader_task = NULL, curr_task = NULL;
1140 proc_t leader_proc = NULL;
1141 int task_role_in_coalition = 0;
1142
1143 curr_task = proc_task(p);
1144 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1145
1146 if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1147 /*
1148 * By default, XPC services without an app
1149 * will be the leader of their own single-member
1150 * coalition.
1151 */
1152 goto out;
1153 }
1154
1155 leader_task = coalition_get_leader(coal);
1156 if (leader_task == TASK_NULL) {
1157 /*
1158 * This jetsam coalition is currently leader-less.
1159 * This could happen if the app died, but XPC services
1160 * have not yet exited.
1161 */
1162 goto out;
1163 }
1164
1165 leader_proc = (proc_t)get_bsdtask_info(leader_task);
1166 task_deallocate(leader_task);
1167
1168 if (leader_proc == PROC_NULL) {
1169 /* leader task is exiting */
1170 goto out;
1171 }
1172
1173 if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) {
1174 goto out;
1175 }
1176
1177 task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task);
1178
1179 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
1180 should_freeze = TRUE;
1181 }
1182
1183 goto out;
1184 } else {
1185 /*
1186 * Application. In addition to the above states we need to make
1187 * sure we only consider suspended applications for freezing.
1188 */
1189 if (!(state & P_MEMSTAT_SUSPENDED)) {
1190 goto out;
1191 }
1192 }
1193
1194 /*
1195 * This proc is a suspended application.
1196 * We're interested in tracking what percentage of these
1197 * actually get frozen.
1198 */
1199 memorystatus_freezer_stats.mfs_process_considered_count++;
1200
1201 /* Only freeze applications meeting our minimum resident page criteria */
1202 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
1203 if (pages < memorystatus_freeze_pages_min) {
1204 memorystatus_freezer_stats.mfs_error_below_min_pages_count++;
1205 goto out;
1206 }
1207
1208 /* Don't freeze processes that are already exiting on core. It may have started exiting
1209 * after we chose it for freeze, but before we obtained the proc_list_lock.
1210 * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync.
1211 * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
1212 */
1213 if ((p->p_listflag & P_LIST_EXITED) != 0) {
1214 memorystatus_freezer_stats.mfs_error_other_count++;
1215 goto out;
1216 }
1217
1218 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1219
1220 if (entry_count) {
1221 for (i = 0; i < entry_count; i++) {
1222 if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1223 p->p_name,
1224 MAXCOMLEN + 1) == 0) {
1225 probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1226 break;
1227 }
1228 }
1229
1230 if (probability_of_use == 0) {
1231 memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++;
1232 goto out;
1233 }
1234 }
1235
1236 should_freeze = TRUE;
1237out:
1238 return should_freeze;
1239}
1240
1241/*
1242 * Synchronously freeze the passed proc. Called with a reference to the proc held.
1243 *
1244 * Doesn't deal with:
1245 * - re-freezing because this is called on a specific process and
1246 * not by the freezer thread. If that changes, we'll have to teach it about
1247 * refreezing a frozen process.
1248 *
1249 * - grouped/coalition freezing because we are hoping to deprecate this
1250 * interface as it was used by user-space to freeze particular processes. But
1251 * we have moved away from that approach to having the kernel choose the optimal
1252 * candidates to be frozen.
1253 *
1254 * Returns EINVAL or the value returned by task_freeze().
1255 */
1256int
1257memorystatus_freeze_process_sync(proc_t p)
1258{
1259 int ret = EINVAL;
1260 pid_t aPid = 0;
1261 boolean_t memorystatus_freeze_swap_low = FALSE;
1262 int freezer_error_code = 0;
1263
1264 lck_mtx_lock(&freezer_mutex);
1265
1266 if (p == NULL) {
1267 printf("memorystatus_freeze_process_sync: Invalid process\n");
1268 goto exit;
1269 }
1270
1271 if (memorystatus_freeze_enabled == FALSE) {
1272 printf("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
1273 goto exit;
1274 }
1275
1276 if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
1277 printf("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
1278 goto exit;
1279 }
1280
1281 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1282 if (!memorystatus_freeze_budget_pages_remaining) {
1283 printf("memorystatus_freeze_process_sync: exit with NO available budget\n");
1284 goto exit;
1285 }
1286
1287 proc_list_lock();
1288
1289 if (p != NULL) {
1290 uint32_t purgeable, wired, clean, dirty, shared;
1291 uint32_t max_pages, i;
1292
1293 aPid = p->p_pid;
1294
1295 /* Ensure the process is eligible for freezing */
1296 if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) {
1297 proc_list_unlock();
1298 goto exit;
1299 }
1300
1301 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1302 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1303 } else {
1304 /*
1305 * We only have the compressor without any swap.
1306 */
1307 max_pages = UINT32_MAX - 1;
1308 }
1309
1310 /* Mark as locked temporarily to avoid kill */
1311 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1312 proc_list_unlock();
1313
1314 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1315 memorystatus_available_pages, 0, 0, 0, 0);
1316
1317 ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1318 if (ret == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1319 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1320 }
1321
1322 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1323 memorystatus_available_pages, aPid, 0, 0, 0);
1324
1325 DTRACE_MEMORYSTATUS6(memorystatus_freeze, proc_t, p, unsigned int, memorystatus_available_pages, boolean_t, purgeable, unsigned int, wired, uint32_t, clean, uint32_t, dirty);
1326
1327 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_process_sync: task_freeze %s for pid %d [%s] - "
1328 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n",
1329 (ret == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1330 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1331
1332 proc_list_lock();
1333
1334 if (ret == KERN_SUCCESS) {
1335 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1336
1337 p->p_memstat_freeze_sharedanon_pages += shared;
1338
1339 memorystatus_frozen_shared_mb += shared;
1340
1341 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
1342 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1343 memorystatus_frozen_count++;
1344 }
1345
1346 p->p_memstat_frozen_count++;
1347
1348 /*
1349 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1350 * to its higher jetsam band.
1351 */
1352 proc_list_unlock();
1353
1354 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1355
1356 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1357 ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
1358 memorystatus_freeze_jetsam_band, TRUE);
1359
1360 if (ret) {
1361 printf("Elevating the frozen process failed with %d\n", ret);
1362 /* not fatal */
1363 ret = 0;
1364 }
1365
1366
1367 /* Update stats */
1368 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1369 throttle_intervals[i].pageouts += dirty;
1370 }
1371 }
1372 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1373 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s] done memorystatus_freeze_budget_pages_remaining %llu froze %u pages",
1374 aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, dirty);
1375
1376 proc_list_lock();
1377
1378 memorystatus_freeze_pageouts += dirty;
1379
1380 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1381 /*
1382 * Add some eviction logic here? At some point should we
1383 * jetsam a process to get back its swap space so that we
1384 * can freeze a more eligible process at this moment in time?
1385 */
1386 }
1387 } else {
1388 char reason[128];
1389 if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
1390 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
1391 strlcpy(reason, "too much shared memory", 128);
1392 }
1393
1394 if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1395 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
1396 strlcpy(reason, "low private-shared pages ratio", 128);
1397 }
1398
1399 if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
1400 memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
1401 strlcpy(reason, "no compressor space", 128);
1402 }
1403
1404 if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) {
1405 memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
1406 strlcpy(reason, "no swap space", 128);
1407 }
1408
1409 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s]...skipped (%s)",
1410 aPid, ((p && *p->p_name) ? p->p_name : "unknown"), reason);
1411 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1412 }
1413
1414 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1415 wakeup(&p->p_memstat_state);
1416 proc_list_unlock();
1417 }
1418
1419exit:
1420 lck_mtx_unlock(&freezer_mutex);
1421
1422 return ret;
1423}
1424
1425/*
1426 * Caller must hold the freezer_mutex and it will be locked on return.
1427 */
1428static int
1429memorystatus_freeze_top_process(void)
1430{
1431 pid_t aPid = 0, coal_xpc_pid = 0;
1432 int ret = -1;
1433 proc_t p = PROC_NULL, next_p = PROC_NULL;
1434 unsigned int i = 0;
1435 unsigned int band = JETSAM_PRIORITY_IDLE;
1436 boolean_t refreeze_processes = FALSE;
1437 task_t curr_task = NULL;
1438 coalition_t coal = COALITION_NULL;
1439 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
1440 unsigned int ntasks = 0;
1441 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1442
1443 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0);
1444
1445 proc_list_lock();
1446
1447 if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
1448 /*
1449 * Freezer is already full but we are here and so let's
1450 * try to refreeze any processes we might have thawed
1451 * in the past and push out their compressed state out.
1452 */
1453 refreeze_processes = TRUE;
1454 band = (unsigned int) memorystatus_freeze_jetsam_band;
1455 }
1456
1457freeze_process:
1458
1459 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
1460 while (next_p) {
1461 kern_return_t kr;
1462 uint32_t purgeable, wired, clean, dirty, shared;
1463 uint32_t max_pages = 0;
1464 int freezer_error_code = 0;
1465
1466 p = next_p;
1467
1468 if (coal == NULL) {
1469 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1470 } else {
1471 /*
1472 * We have frozen a coalition leader and now are
1473 * dealing with its XPC services. We get our
1474 * next_p for each XPC service from the pid_list
1475 * acquired after a successful task_freeze call
1476 * on the coalition leader.
1477 */
1478
1479 if (ntasks > 0) {
1480 coal_xpc_pid = pid_list[--ntasks];
1481 next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */);
1482 /*
1483 * We grab a reference when we are about to freeze the process. So, drop
1484 * the reference that proc_findinternal() grabbed for us.
1485 * We also have the proc_list_lock and so this process is stable.
1486 */
1487 if (next_p) {
1488 proc_rele_locked(next_p);
1489 }
1490 } else {
1491 next_p = NULL;
1492 }
1493 }
1494
1495 aPid = p->p_pid;
1496
1497 if (p->p_memstat_effectivepriority != (int32_t) band) {
1498 /*
1499 * We shouldn't be freezing processes outside the
1500 * prescribed band.
1501 */
1502 break;
1503 }
1504
1505 /* Ensure the process is eligible for (re-)freezing */
1506 if (refreeze_processes) {
1507 /*
1508 * Has to have been frozen once before.
1509 */
1510 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
1511 continue;
1512 }
1513
1514 /*
1515 * Has to have been resumed once before.
1516 */
1517 if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == FALSE) {
1518 continue;
1519 }
1520
1521 /*
1522 * Not currently being looked at for something.
1523 */
1524 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1525 continue;
1526 }
1527
1528 /*
1529 * We are going to try and refreeze and so re-evaluate
1530 * the process. We don't want to double count the shared
1531 * memory. So deduct the old snapshot here.
1532 */
1533 memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1534 p->p_memstat_freeze_sharedanon_pages = 0;
1535
1536 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1537 memorystatus_refreeze_eligible_count--;
1538 } else {
1539 if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) {
1540 continue; // with lock held
1541 }
1542 }
1543
1544 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1545 /*
1546 * Freezer backed by the compressor and swap file(s)
1547 * will hold compressed data.
1548 */
1549
1550 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1551 } else {
1552 /*
1553 * We only have the compressor pool.
1554 */
1555 max_pages = UINT32_MAX - 1;
1556 }
1557
1558 /* Mark as locked temporarily to avoid kill */
1559 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1560
1561 p = proc_ref_locked(p);
1562 if (!p) {
1563 memorystatus_freezer_stats.mfs_error_other_count++;
1564 break;
1565 }
1566
1567 proc_list_unlock();
1568
1569 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1570 memorystatus_available_pages, 0, 0, 0, 0);
1571
1572 kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1573 if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1574 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1575 }
1576
1577 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1578 memorystatus_available_pages, aPid, 0, 0, 0);
1579
1580 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1581 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n",
1582 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1583 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1584
1585 proc_list_lock();
1586
1587 /* Success? */
1588 if (KERN_SUCCESS == kr) {
1589 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1590
1591 p->p_memstat_freeze_sharedanon_pages += shared;
1592
1593 memorystatus_frozen_shared_mb += shared;
1594
1595 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
1596 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1597 memorystatus_frozen_count++;
1598 }
1599
1600 p->p_memstat_frozen_count++;
1601
1602 /*
1603 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1604 * to its higher jetsam band.
1605 */
1606 proc_list_unlock();
1607
1608 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1609
1610 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1611 ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1612
1613 if (ret) {
1614 printf("Elevating the frozen process failed with %d\n", ret);
1615 /* not fatal */
1616 ret = 0;
1617 }
1618
1619 /* Update stats */
1620 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1621 throttle_intervals[i].pageouts += dirty;
1622 }
1623 }
1624 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1625 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1626 refreeze_processes? "re" : "", (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, refreeze_processes? "Re" : "", dirty);
1627
1628 proc_list_lock();
1629
1630 memorystatus_freeze_pageouts += dirty;
1631
1632 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1633 /*
1634 * Add some eviction logic here? At some point should we
1635 * jetsam a process to get back its swap space so that we
1636 * can freeze a more eligible process at this moment in time?
1637 */
1638 }
1639
1640 /* Return KERN_SUCCESS */
1641 ret = kr;
1642
1643 /*
1644 * We froze a process successfully. We can stop now
1645 * and see if that helped if this process isn't part
1646 * of a coalition.
1647 *
1648 * Else:
1649 * - if it is a leader, get the list of XPC services
1650 * that need to be frozen.
1651 * - if it is a XPC service whose leader was frozen
1652 * here, continue on to the next XPC service in the list.
1653 */
1654
1655 if (coal == NULL) {
1656 curr_task = proc_task(p);
1657 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1658 if (coalition_is_leader(curr_task, coal)) {
1659 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
1660 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
1661
1662 if (ntasks > MAX_XPC_SERVICE_PIDS) {
1663 ntasks = MAX_XPC_SERVICE_PIDS;
1664 }
1665 }
1666
1667 next_p = NULL;
1668
1669 if (ntasks > 0) {
1670 /*
1671 * Start off with our first next_p in this list.
1672 */
1673 coal_xpc_pid = pid_list[--ntasks];
1674 next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */);
1675
1676 /*
1677 * We grab a reference when we are about to freeze the process. So drop
1678 * the reference that proc_findinternal() grabbed for us.
1679 * We also have the proc_list_lock and so this process is stable.
1680 */
1681 if (next_p) {
1682 proc_rele_locked(next_p);
1683 }
1684 }
1685 }
1686
1687 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1688 wakeup(&p->p_memstat_state);
1689 proc_rele_locked(p);
1690
1691 if (coal && next_p) {
1692 continue;
1693 }
1694
1695 /*
1696 * No coalition leader was frozen. So we don't
1697 * need to evaluate any XPC services.
1698 *
1699 * OR
1700 *
1701 * We have frozen all eligible XPC services for
1702 * the current coalition leader.
1703 *
1704 * Either way, we can break here and see if freezing
1705 * helped.
1706 */
1707
1708 break;
1709 } else {
1710 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1711 wakeup(&p->p_memstat_state);
1712
1713 if (refreeze_processes == TRUE) {
1714 if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1715 (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
1716 /*
1717 * Keeping this prior-frozen process in this high band when
1718 * we failed to re-freeze it due to bad shared memory usage
1719 * could cause excessive pressure on the lower bands.
1720 * We need to demote it for now. It'll get re-evaluated next
1721 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
1722 * bit.
1723 */
1724
1725 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1726 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1727 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE);
1728 }
1729 } else {
1730 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1731 }
1732
1733 char reason[128];
1734 if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
1735 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
1736 strlcpy(reason, "too much shared memory", 128);
1737 }
1738
1739 if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1740 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
1741 strlcpy(reason, "low private-shared pages ratio", 128);
1742 }
1743
1744 if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
1745 memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
1746 strlcpy(reason, "no compressor space", 128);
1747 }
1748
1749 if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) {
1750 memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
1751 strlcpy(reason, "no swap space", 128);
1752 }
1753
1754 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (%s) pid %d [%s]...skipped (%s)\n",
1755 (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), reason);
1756
1757 proc_rele_locked(p);
1758
1759 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
1760 break;
1761 }
1762 }
1763 }
1764
1765 if ((ret == -1) &&
1766 (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD) &&
1767 (refreeze_processes == FALSE)) {
1768 /*
1769 * We failed to freeze a process from the IDLE
1770 * band AND we have some thawed processes
1771 * AND haven't tried refreezing as yet.
1772 * Let's try and re-freeze processes in the
1773 * frozen band that have been resumed in the past
1774 * and so have brought in state from disk.
1775 */
1776
1777 band = (unsigned int) memorystatus_freeze_jetsam_band;
1778
1779 refreeze_processes = TRUE;
1780
1781 goto freeze_process;
1782 }
1783
1784 proc_list_unlock();
1785
1786 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages, aPid, 0, 0, 0);
1787
1788 return ret;
1789}
1790
1791static inline boolean_t
1792memorystatus_can_freeze_processes(void)
1793{
1794 boolean_t ret;
1795
1796 proc_list_lock();
1797
1798 if (memorystatus_suspended_count) {
1799 memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
1800
1801 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
1802 ret = TRUE;
1803 } else {
1804 ret = FALSE;
1805 }
1806 } else {
1807 ret = FALSE;
1808 }
1809
1810 proc_list_unlock();
1811
1812 return ret;
1813}
1814
1815static boolean_t
1816memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
1817{
1818 boolean_t can_freeze = TRUE;
1819
1820 /* Only freeze if we're sufficiently low on memory; this holds off freeze right
1821 * after boot, and is generally is a no-op once we've reached steady state. */
1822 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
1823 return FALSE;
1824 }
1825
1826 /* Check minimum suspended process threshold. */
1827 if (!memorystatus_can_freeze_processes()) {
1828 return FALSE;
1829 }
1830 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
1831
1832 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1833 /*
1834 * In-core compressor used for freezing WITHOUT on-disk swap support.
1835 */
1836 if (vm_compressor_low_on_space()) {
1837 if (*memorystatus_freeze_swap_low) {
1838 *memorystatus_freeze_swap_low = TRUE;
1839 }
1840
1841 can_freeze = FALSE;
1842 } else {
1843 if (*memorystatus_freeze_swap_low) {
1844 *memorystatus_freeze_swap_low = FALSE;
1845 }
1846
1847 can_freeze = TRUE;
1848 }
1849 } else {
1850 /*
1851 * Freezing WITH on-disk swap support.
1852 *
1853 * In-core compressor fronts the swap.
1854 */
1855 if (vm_swap_low_on_space()) {
1856 if (*memorystatus_freeze_swap_low) {
1857 *memorystatus_freeze_swap_low = TRUE;
1858 }
1859
1860 can_freeze = FALSE;
1861 }
1862 }
1863
1864 return can_freeze;
1865}
1866
1867/*
1868 * This function evaluates if the currently frozen processes deserve
1869 * to stay in the higher jetsam band. There are 2 modes:
1870 * - 'force one == TRUE': (urgent mode)
1871 * We are out of budget and can't refreeze a process. The process's
1872 * state, if it was resumed, will stay in compressed memory. If we let it
1873 * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
1874 * the lower bands. So we force-demote the least-recently-used-and-thawed
1875 * process.
1876 *
1877 * - 'force_one == FALSE': (normal mode)
1878 * If the # of thaws of a process is below our threshold, then we
1879 * will demote that process into the IDLE band.
1880 * We don't immediately kill the process here because it already has
1881 * state on disk and so it might be worth giving it another shot at
1882 * getting thawed/resumed and used.
1883 */
1884static void
1885memorystatus_demote_frozen_processes(boolean_t force_one)
1886{
1887 unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
1888 unsigned int demoted_proc_count = 0;
1889 proc_t p = PROC_NULL, next_p = PROC_NULL;
1890 /* We demote to IDLE unless someone has asserted a higher priority on this process. */
1891 int maxpriority = JETSAM_PRIORITY_IDLE;
1892
1893 proc_list_lock();
1894
1895 if (memorystatus_freeze_enabled == FALSE) {
1896 /*
1897 * Freeze has been disabled likely to
1898 * reclaim swap space. So don't change
1899 * any state on the frozen processes.
1900 */
1901 proc_list_unlock();
1902 return;
1903 }
1904
1905 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
1906 while (next_p) {
1907 p = next_p;
1908 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1909
1910 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
1911 continue;
1912 }
1913
1914 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1915 continue;
1916 }
1917
1918 if (force_one == TRUE) {
1919 if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == 0) {
1920 /*
1921 * This process hasn't been thawed recently and so most of
1922 * its state sits on NAND and so we skip it -- jetsamming it
1923 * won't help with memory pressure.
1924 */
1925 continue;
1926 }
1927 } else {
1928 if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
1929 /*
1930 * This process has met / exceeded our thaw count demotion threshold
1931 * and so we let it live in the higher bands.
1932 */
1933 continue;
1934 }
1935 }
1936
1937 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1938 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1939
1940 maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority);
1941 memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE);
1942#if DEVELOPMENT || DEBUG
1943 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_demote_frozen_process(%s) pid %d [%s]",
1944 (force_one ? "urgent" : "normal"), (p ? p->p_pid : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
1945#endif /* DEVELOPMENT || DEBUG */
1946
1947 /*
1948 * The freezer thread will consider this a normal app to be frozen
1949 * because it is in the IDLE band. So we don't need the
1950 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
1951 * we'll correctly count it as eligible for re-freeze again.
1952 *
1953 * We don't drop the frozen count because this process still has
1954 * state on disk. So there's a chance it gets resumed and then it
1955 * should land in the higher jetsam band. For that it needs to
1956 * remain marked frozen.
1957 */
1958 if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) {
1959 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1960 memorystatus_refreeze_eligible_count--;
1961 }
1962
1963 demoted_proc_count++;
1964
1965 if ((force_one == TRUE) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
1966 break;
1967 }
1968 }
1969
1970 if (force_one == FALSE) {
1971 /*
1972 * We use these counters to track daily hit rates.
1973 * So we only reset them to 0 under the normal
1974 * mode.
1975 */
1976 memorystatus_thaw_count = 0;
1977 }
1978
1979 proc_list_unlock();
1980}
1981
1982/*
1983 * Calculate a new freezer budget.
1984 * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
1985 * @param burst_multiple The burst_multiple for the new period
1986 * @param interval_duration_min How many minutes will the new interval be?
1987 * @param rollover The amount to rollover from the previous budget.
1988 *
1989 * @return A budget for the new interval.
1990 */
1991static uint32_t
1992memorystatus_freeze_calculate_new_budget(
1993 unsigned int time_since_last_interval_expired_sec,
1994 unsigned int burst_multiple,
1995 unsigned int interval_duration_min,
1996 uint32_t rollover)
1997{
1998 uint64_t freeze_daily_budget = 0;
1999 unsigned int daily_budget_pageouts = 0;
2000 unsigned int freeze_daily_pageouts_max = 0;
2001 const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2002 /* Precision factor for days_missed. 2 decimal points. */
2003 const static unsigned int kFixedPointFactor = 100;
2004 unsigned int days_missed, budget_missed;
2005
2006 /* Get the daily budget from the storage layer */
2007 if (vm_swap_max_budget(&freeze_daily_budget)) {
2008 memorystatus_freeze_daily_mb_max = (freeze_daily_budget / (1024 * 1024));
2009 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2010 }
2011 /* Calculate the daily pageout budget */
2012 freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2013
2014 daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2015
2016 /*
2017 * Add additional budget for time since the interval expired.
2018 * For example, if the interval expired n days ago, we should get an additional n days
2019 * of budget since we didn't use any budget during those n days.
2020 */
2021 days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2022 budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2023 return rollover + daily_budget_pageouts + budget_missed;
2024}
2025
2026#if DEVELOPMENT || DEBUG
2027
2028static int
2029sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2030{
2031#pragma unused(arg1, arg2)
2032 int error = 0;
2033 unsigned int time_since_last_interval_expired_sec = 0;
2034 unsigned int new_budget;
2035
2036 error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2037 if (error || !req->newptr) {
2038 return error;
2039 }
2040 new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2041 return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2042}
2043
2044SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2045 0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2046
2047#endif /* DEVELOPMENT || DEBUG */
2048
2049/*
2050 * This function will do 4 things:
2051 *
2052 * 1) check to see if we are currently in a degraded freezer mode, and if so:
2053 * - check to see if our window has expired and we should exit this mode, OR,
2054 * - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2055 *
2056 * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2057 *
2058 * 3) check what the current normal window allows for a budget.
2059 *
2060 * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2061 * what we would normally expect, then we are running low on our daily budget and need to enter
2062 * degraded perf. mode.
2063 *
2064 * Caller must hold the freezer mutex
2065 * Caller must not hold the proc_list lock
2066 */
2067
2068static void
2069memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2070{
2071 clock_sec_t sec;
2072 clock_nsec_t nsec;
2073 mach_timespec_t now_ts;
2074 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2075 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2076
2077 unsigned int freeze_daily_pageouts_max = 0;
2078
2079#if DEVELOPMENT || DEBUG
2080 if (!memorystatus_freeze_throttle_enabled) {
2081 /*
2082 * No throttling...we can use the full budget everytime.
2083 */
2084 *budget_pages_allowed = UINT64_MAX;
2085 return;
2086 }
2087#endif
2088
2089 clock_get_system_nanotime(&sec, &nsec);
2090 now_ts.tv_sec = sec;
2091 now_ts.tv_nsec = nsec;
2092
2093 struct throttle_interval_t *interval = NULL;
2094
2095 if (memorystatus_freeze_degradation == TRUE) {
2096 interval = degraded_throttle_window;
2097
2098 if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2099 memorystatus_freeze_degradation = FALSE;
2100 interval->pageouts = 0;
2101 interval->max_pageouts = 0;
2102 } else {
2103 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2104 }
2105 }
2106
2107 interval = normal_throttle_window;
2108
2109 if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2110 /* How long has it been since the previous interval expired? */
2111 mach_timespec_t expiration_period_ts = now_ts;
2112 SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
2113
2114 interval->max_pageouts = memorystatus_freeze_calculate_new_budget(
2115 expiration_period_ts.tv_sec, interval->burst_multiple,
2116 interval->mins, interval->max_pageouts - interval->pageouts);
2117 interval->ts.tv_sec = interval->mins * 60;
2118 interval->ts.tv_nsec = 0;
2119 ADD_MACH_TIMESPEC(&interval->ts, &now_ts);
2120 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2121 if (interval->pageouts > interval->max_pageouts) {
2122 interval->pageouts -= interval->max_pageouts;
2123 } else {
2124 interval->pageouts = 0;
2125 }
2126 *budget_pages_allowed = interval->max_pageouts;
2127 memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2128
2129 memorystatus_demote_frozen_processes(FALSE); /* normal mode...don't force a demotion */
2130 } else {
2131 /*
2132 * Current throttle window.
2133 * Deny freezing if we have no budget left.
2134 * Try graceful degradation if we are within 25% of:
2135 * - the daily budget, and
2136 * - the current budget left is below our normal budget expectations.
2137 */
2138
2139#if DEVELOPMENT || DEBUG
2140 /*
2141 * This can only happen in the INTERNAL configs because we allow modifying the daily budget for testing.
2142 */
2143
2144 if (freeze_daily_pageouts_max > interval->max_pageouts) {
2145 /*
2146 * We just bumped the daily budget. Re-evaluate our normal window params.
2147 */
2148 interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * freeze_daily_pageouts_max) / NORMAL_WINDOW_MINS));
2149 memorystatus_freeze_degradation = FALSE; //we'll re-evaluate this below...
2150 }
2151#endif /* DEVELOPMENT || DEBUG */
2152
2153 if (memorystatus_freeze_degradation == FALSE) {
2154 if (interval->pageouts >= interval->max_pageouts) {
2155 *budget_pages_allowed = 0;
2156 } else {
2157 int budget_left = interval->max_pageouts - interval->pageouts;
2158 int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2159
2160 mach_timespec_t time_left = {0, 0};
2161
2162 time_left.tv_sec = interval->ts.tv_sec;
2163 time_left.tv_nsec = 0;
2164
2165 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2166
2167 if (budget_left <= budget_threshold) {
2168 /*
2169 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2170 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2171 * daily pageout budget.
2172 */
2173
2174 unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2175 unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2176
2177 /*
2178 * The current rate of pageouts is below what we would expect for
2179 * the normal rate i.e. we have below normal budget left and so...
2180 */
2181
2182 if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2183 memorystatus_freeze_degradation = TRUE;
2184 degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2185 degraded_throttle_window->pageouts = 0;
2186
2187 /*
2188 * Switch over to the degraded throttle window so the budget
2189 * doled out is based on that window.
2190 */
2191 interval = degraded_throttle_window;
2192 }
2193 }
2194
2195 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2196 }
2197 }
2198 }
2199
2200 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
2201 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts->tv_sec) / 60,
2202 interval->throttle ? "on" : "off");
2203}
2204
2205static void
2206memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2207{
2208 static boolean_t memorystatus_freeze_swap_low = FALSE;
2209
2210 lck_mtx_lock(&freezer_mutex);
2211
2212 if (memorystatus_freeze_enabled) {
2213 if ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2214 (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD)) {
2215 if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2216 /* Only freeze if we've not exceeded our pageout budgets.*/
2217 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2218
2219 if (memorystatus_freeze_budget_pages_remaining) {
2220 memorystatus_freeze_top_process();
2221 } else {
2222 memorystatus_demote_frozen_processes(TRUE); /* urgent mode..force one demotion */
2223 }
2224 }
2225 }
2226 }
2227
2228 /*
2229 * We use memorystatus_apps_idle_delay_time because if/when we adopt aging for applications,
2230 * it'll tie neatly into running the freezer once we age an application.
2231 *
2232 * Till then, it serves as a good interval that can be tuned via a sysctl too.
2233 */
2234 memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + memorystatus_apps_idle_delay_time;
2235
2236 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2237 lck_mtx_unlock(&freezer_mutex);
2238
2239 thread_block((thread_continue_t) memorystatus_freeze_thread);
2240}
2241
2242boolean_t
2243memorystatus_freeze_thread_should_run(void)
2244{
2245 /*
2246 * No freezer_mutex held here...see why near call-site
2247 * within memorystatus_pages_update().
2248 */
2249
2250 boolean_t should_run = FALSE;
2251
2252 if (memorystatus_freeze_enabled == FALSE) {
2253 goto out;
2254 }
2255
2256 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2257 goto out;
2258 }
2259
2260 memorystatus_freezer_stats.mfs_below_threshold_count++;
2261
2262 if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
2263 /*
2264 * Consider this as a skip even if we wake up to refreeze because
2265 * we won't freeze any new procs.
2266 */
2267 memorystatus_freezer_stats.mfs_skipped_full_count++;
2268 if (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD) {
2269 goto out;
2270 }
2271 }
2272
2273 if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
2274 memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++;
2275 goto out;
2276 }
2277
2278 uint64_t curr_time = mach_absolute_time();
2279
2280 if (curr_time < memorystatus_freezer_thread_next_run_ts) {
2281 goto out;
2282 }
2283
2284 should_run = TRUE;
2285
2286out:
2287 return should_run;
2288}
2289
2290int
2291memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2292{
2293 proc_t p = PROC_NULL;
2294
2295 if (pid == 0) {
2296 return EINVAL;
2297 }
2298
2299 p = proc_find(pid);
2300 if (!p) {
2301 return ESRCH;
2302 }
2303
2304 /*
2305 * Only allow this on the current proc for now.
2306 * We can check for privileges and allow targeting another process in the future.
2307 */
2308 if (p != current_proc()) {
2309 proc_rele(p);
2310 return EPERM;
2311 }
2312
2313 proc_list_lock();
2314 *is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
2315 proc_rele_locked(p);
2316 proc_list_unlock();
2317
2318 return 0;
2319}
2320
2321int
2322memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
2323{
2324 proc_t p = PROC_NULL;
2325
2326 if (pid == 0) {
2327 return EINVAL;
2328 }
2329
2330 /*
2331 * To enable freezable status, you need to be root or an entitlement.
2332 */
2333 if (is_freezable &&
2334 !kauth_cred_issuser(kauth_cred_get()) &&
2335 !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) {
2336 return EPERM;
2337 }
2338
2339 p = proc_find(pid);
2340 if (!p) {
2341 return ESRCH;
2342 }
2343
2344 /*
2345 * A process can change its own status. A coalition leader can
2346 * change the status of coalition members.
2347 */
2348 if (p != current_proc()) {
2349 coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
2350 if (!coalition_is_leader(proc_task(current_proc()), coal)) {
2351 proc_rele(p);
2352 return EPERM;
2353 }
2354 }
2355
2356 proc_list_lock();
2357 if (is_freezable == FALSE) {
2358 /* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
2359 p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
2360 printf("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
2361 p->p_pid, (*p->p_name ? p->p_name : "unknown"));
2362 } else {
2363 p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
2364 printf("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
2365 p->p_pid, (*p->p_name ? p->p_name : "unknown"));
2366 }
2367 proc_rele_locked(p);
2368 proc_list_unlock();
2369
2370 return 0;
2371}
2372
2373static int
2374sysctl_memorystatus_do_fastwake_warmup_all SYSCTL_HANDLER_ARGS
2375{
2376#pragma unused(oidp, arg1, arg2)
2377
2378 if (!req->newptr) {
2379 return EINVAL;
2380 }
2381
2382 /* Need to be root or have entitlement */
2383 if (!kauth_cred_issuser(kauth_cred_get()) && !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) {
2384 return EPERM;
2385 }
2386
2387 if (memorystatus_freeze_enabled == FALSE) {
2388 return ENOTSUP;
2389 }
2390
2391 do_fastwake_warmup_all();
2392
2393 return 0;
2394}
2395
2396SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
2397 0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
2398
2399#endif /* CONFIG_FREEZE */