]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_memorystatus_freeze.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / kern / kern_memorystatus_freeze.c
1 /*
2 * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_group.h>
40
41 #include <libkern/libkern.h>
42 #include <mach/coalition.h>
43 #include <mach/mach_time.h>
44 #include <mach/task.h>
45 #include <mach/host_priv.h>
46 #include <mach/mach_host.h>
47 #include <os/log.h>
48 #include <pexpert/pexpert.h>
49 #include <sys/coalition.h>
50 #include <sys/kern_event.h>
51 #include <sys/proc.h>
52 #include <sys/proc_info.h>
53 #include <sys/reason.h>
54 #include <sys/signal.h>
55 #include <sys/signalvar.h>
56 #include <sys/sysctl.h>
57 #include <sys/sysproto.h>
58 #include <sys/wait.h>
59 #include <sys/tree.h>
60 #include <sys/priv.h>
61 #include <vm/vm_pageout.h>
62 #include <vm/vm_protos.h>
63 #include <mach/machine/sdt.h>
64 #include <libkern/section_keywords.h>
65 #include <stdatomic.h>
66
67 #include <IOKit/IOBSD.h>
68
69 #if CONFIG_FREEZE
70 #include <vm/vm_map.h>
71 #endif /* CONFIG_FREEZE */
72
73 #include <sys/kern_memorystatus.h>
74 #include <sys/kern_memorystatus_freeze.h>
75 #include <sys/kern_memorystatus_notify.h>
76
77 #if CONFIG_JETSAM
78
79 extern unsigned int memorystatus_available_pages;
80 extern unsigned int memorystatus_available_pages_pressure;
81 extern unsigned int memorystatus_available_pages_critical;
82 extern unsigned int memorystatus_available_pages_critical_base;
83 extern unsigned int memorystatus_available_pages_critical_idle_offset;
84
85 #else /* CONFIG_JETSAM */
86
87 extern uint64_t memorystatus_available_pages;
88 extern uint64_t memorystatus_available_pages_pressure;
89 extern uint64_t memorystatus_available_pages_critical;
90
91 #endif /* CONFIG_JETSAM */
92
93 unsigned int memorystatus_frozen_count = 0;
94 unsigned int memorystatus_suspended_count = 0;
95 unsigned long freeze_threshold_percentage = 50;
96
97 #if CONFIG_FREEZE
98
99 static LCK_GRP_DECLARE(freezer_lck_grp, "freezer");
100 static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp);
101
102 /* Thresholds */
103 unsigned int memorystatus_freeze_threshold = 0;
104 unsigned int memorystatus_freeze_pages_min = 0;
105 unsigned int memorystatus_freeze_pages_max = 0;
106 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
107 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
108 uint64_t memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
109 boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
110
111 unsigned int memorystatus_max_frozen_demotions_daily = 0;
112 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
113
114 boolean_t memorystatus_freeze_enabled = FALSE;
115 int memorystatus_freeze_wakeup = 0;
116 int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
117
118 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
119
120 #ifdef XNU_KERNEL_PRIVATE
121
122 unsigned int memorystatus_frozen_processes_max = 0;
123 unsigned int memorystatus_frozen_shared_mb = 0;
124 unsigned int memorystatus_frozen_shared_mb_max = 0;
125 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
126 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
127 unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
128 uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
129 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
130
131 struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0};
132
133 #endif /* XNU_KERNEL_PRIVATE */
134
135 static inline boolean_t memorystatus_can_freeze_processes(void);
136 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
137 static boolean_t memorystatus_is_process_eligible_for_freeze(proc_t p);
138 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
139 static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
140
141 void memorystatus_disable_freeze(void);
142
143 /* Stats */
144 static uint64_t memorystatus_freeze_pageouts = 0;
145
146 /* Throttling */
147 #define DEGRADED_WINDOW_MINS (30)
148 #define NORMAL_WINDOW_MINS (24 * 60)
149
150 /* Protected by the freezer_mutex */
151 static throttle_interval_t throttle_intervals[] = {
152 { DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
153 { NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
154 };
155 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
156 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
157 uint32_t memorystatus_freeze_current_interval = 0;
158
159 extern uint64_t vm_swap_get_free_space(void);
160 extern boolean_t vm_swap_max_budget(uint64_t *);
161 extern int i_coal_jetsam_get_taskrole(coalition_t coal, task_t task);
162
163 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
164 static void memorystatus_demote_frozen_processes(boolean_t force_one);
165
166 static void memorystatus_freeze_handle_error(proc_t p, const int freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
167 static void memorystatus_freeze_out_of_slots(void);
168 static uint64_t memorystatus_freezer_thread_next_run_ts = 0;
169
170 /* Sysctls needed for aggd stats */
171
172 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
173 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
174 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
175 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
176 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, "");
177 #if DEVELOPMENT || DEBUG
178 static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
179 {
180 #pragma unused(arg1, arg2, oidp)
181 int error, changed;
182 uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
183 mach_timespec_t now_ts;
184 clock_sec_t sec;
185 clock_nsec_t nsec;
186
187 lck_mtx_lock(&freezer_mutex);
188
189 error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
190 if (changed) {
191 /* Start a new interval with this budget. */
192 clock_get_system_nanotime(&sec, &nsec);
193 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
194 now_ts.tv_nsec = nsec;
195 memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
196 /* Don't carry over any excess pageouts since we're forcing a new budget */
197 normal_throttle_window->pageouts = 0;
198 memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
199 }
200
201 lck_mtx_unlock(&freezer_mutex);
202 return error;
203 }
204
205 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
206 #else /* DEVELOPMENT || DEBUG */
207 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
208 #endif /* DEVELOPMENT || DEBUG */
209 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
210 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
211 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
212 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
213 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
214 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
215 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
216 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
217 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
218 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
219 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
220 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
221 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
222 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
223
224 static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
225
226
227 /*
228 * Calculates the hit rate for the freezer.
229 * The hit rate is defined as the percentage of procs that are currently in the
230 * freezer which we have thawed.
231 * A low hit rate means we're freezing bad candidates since they're not re-used.
232 */
233 static int sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
234 {
235 #pragma unused(arg1, arg2)
236 uint64_t thaw_count = 0, frozen_count = 0;
237 int thaw_percentage = 100;
238 frozen_count = os_atomic_load(&(memorystatus_freezer_stats.mfs_processes_frozen), relaxed);
239 thaw_count = os_atomic_load(&(memorystatus_freezer_stats.mfs_processes_thawed), relaxed);
240
241 if (frozen_count > 0) {
242 if (thaw_count > frozen_count) {
243 /*
244 * Both counts are using relaxed atomics & could be out of sync
245 * causing us to see thaw_percentage > 100.
246 */
247 thaw_percentage = 100;
248 } else {
249 thaw_percentage = (int)(100 * thaw_count / frozen_count);
250 }
251 }
252 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
253 }
254 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
255
256 #define FREEZER_ERROR_STRING_LENGTH 128
257
258 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, "");
259 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, "");
260 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, "");
261 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_MAX - 1, "");
262 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, "");
263 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, "");
264 /*
265 * max. # of frozen process demotions we will allow in our daily cycle.
266 */
267 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, "");
268
269 /*
270 * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
271 */
272 EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, "");
273
274 #if DEVELOPMENT || DEBUG
275
276 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
277 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
278 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
279 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
280
281 /*
282 * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
283 * "0" means no limit.
284 * Default is 10% of system-wide task limit.
285 */
286
287 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
288 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
289
290 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
291
292 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
293 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
294
295 /*
296 * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
297 * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
298 */
299 boolean_t memorystatus_freeze_to_memory = FALSE;
300 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
301
302 #define VM_PAGES_FOR_ALL_PROCS (2)
303
304 /*
305 * Manual trigger of freeze and thaw for dev / debug kernels only.
306 */
307 static int
308 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
309 {
310 #pragma unused(arg1, arg2)
311 int error, pid = 0;
312 proc_t p;
313 int freezer_error_code = 0;
314 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
315 int ntasks = 0;
316 coalition_t coal = COALITION_NULL;
317
318 if (memorystatus_freeze_enabled == FALSE) {
319 printf("sysctl_freeze: Freeze is DISABLED\n");
320 return ENOTSUP;
321 }
322
323 error = sysctl_handle_int(oidp, &pid, 0, req);
324 if (error || !req->newptr) {
325 return error;
326 }
327
328 if (pid == VM_PAGES_FOR_ALL_PROCS) {
329 vm_pageout_anonymous_pages();
330
331 return 0;
332 }
333
334 lck_mtx_lock(&freezer_mutex);
335
336 again:
337 p = proc_find(pid);
338 if (p != NULL) {
339 memorystatus_freezer_stats.mfs_process_considered_count++;
340 uint32_t purgeable, wired, clean, dirty, shared;
341 uint32_t max_pages = 0, state = 0;
342
343 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
344 /*
345 * Freezer backed by the compressor and swap file(s)
346 * will hold compressed data.
347 *
348 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
349 * being swapped out to disk. Note that this disables freezer swap support globally,
350 * not just for the process being frozen.
351 *
352 *
353 * We don't care about the global freezer budget or the process's (min/max) budget here.
354 * The freeze sysctl is meant to force-freeze a process.
355 *
356 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
357 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
358 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
359 */
360 max_pages = memorystatus_freeze_pages_max;
361 } else {
362 /*
363 * We only have the compressor without any swap.
364 */
365 max_pages = UINT32_MAX - 1;
366 }
367
368 proc_list_lock();
369 state = p->p_memstat_state;
370 proc_list_unlock();
371
372 /*
373 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
374 * We simply ensure that jetsam is not already working on the process and that the process has not
375 * explicitly disabled freezing.
376 */
377 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
378 printf("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
379 (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
380 (state & P_MEMSTAT_LOCKED) ? " locked" : "",
381 (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
382
383 proc_rele(p);
384 lck_mtx_unlock(&freezer_mutex);
385 return EPERM;
386 }
387
388 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
389 if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
390 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
391 }
392
393 if (error) {
394 memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
395 if (error == KERN_NO_SPACE) {
396 /* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
397 error = ENOSPC;
398 } else {
399 error = EIO;
400 }
401 } else {
402 proc_list_lock();
403 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
404 p->p_memstat_state |= P_MEMSTAT_FROZEN;
405 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
406 memorystatus_frozen_count++;
407 os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
408 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
409 memorystatus_freeze_out_of_slots();
410 }
411 } else {
412 // This was a re-freeze
413 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
414 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
415 memorystatus_freezer_stats.mfs_refreeze_count++;
416 }
417 }
418 p->p_memstat_frozen_count++;
419
420
421 proc_list_unlock();
422
423 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
424 /*
425 * We elevate only if we are going to swap out the data.
426 */
427 error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
428 memorystatus_freeze_jetsam_band, TRUE);
429
430 if (error) {
431 printf("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
432 }
433 }
434 }
435
436 if ((error == 0) && (coal == NULL)) {
437 /*
438 * We froze a process and so we check to see if it was
439 * a coalition leader and if it has XPC services that
440 * might need freezing.
441 * Only one leader can be frozen at a time and so we shouldn't
442 * enter this block more than once per call. Hence the
443 * check that 'coal' has to be NULL. We should make this an
444 * assert() or panic() once we have a much more concrete way
445 * to detect an app vs a daemon.
446 */
447
448 task_t curr_task = NULL;
449
450 curr_task = proc_task(p);
451 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
452 if (coalition_is_leader(curr_task, coal)) {
453 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
454 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
455
456 if (ntasks > MAX_XPC_SERVICE_PIDS) {
457 ntasks = MAX_XPC_SERVICE_PIDS;
458 }
459 }
460 }
461
462 proc_rele(p);
463
464 while (ntasks) {
465 pid = pid_list[--ntasks];
466 goto again;
467 }
468
469 lck_mtx_unlock(&freezer_mutex);
470 return error;
471 } else {
472 printf("sysctl_freeze: Invalid process\n");
473 }
474
475
476 lck_mtx_unlock(&freezer_mutex);
477 return EINVAL;
478 }
479
480 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
481 0, 0, &sysctl_memorystatus_freeze, "I", "");
482
483 /*
484 * Manual trigger of agressive frozen demotion for dev / debug kernels only.
485 */
486 static int
487 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
488 {
489 #pragma unused(arg1, arg2, oidp, req)
490 int error, val;
491 /*
492 * Only demote on write to prevent demoting during `sysctl -a`.
493 * The actual value written doesn't matter.
494 */
495 error = sysctl_handle_int(oidp, &val, 0, req);
496 if (error || !req->newptr) {
497 return error;
498 }
499 memorystatus_demote_frozen_processes(false);
500 return 0;
501 }
502
503 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
504
505 static int
506 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
507 {
508 #pragma unused(arg1, arg2)
509
510 int error, pid = 0;
511 proc_t p;
512
513 if (memorystatus_freeze_enabled == FALSE) {
514 return ENOTSUP;
515 }
516
517 error = sysctl_handle_int(oidp, &pid, 0, req);
518 if (error || !req->newptr) {
519 return error;
520 }
521
522 if (pid == VM_PAGES_FOR_ALL_PROCS) {
523 do_fastwake_warmup_all();
524 return 0;
525 } else {
526 p = proc_find(pid);
527 if (p != NULL) {
528 error = task_thaw(p->task);
529
530 if (error) {
531 error = EIO;
532 } else {
533 /*
534 * task_thaw() succeeded.
535 *
536 * We increment memorystatus_frozen_count on the sysctl freeze path.
537 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
538 * when this process exits.
539 *
540 * proc_list_lock();
541 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
542 * proc_list_unlock();
543 */
544 }
545 proc_rele(p);
546 return error;
547 }
548 }
549
550 return EINVAL;
551 }
552
553 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
554 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
555
556
557 typedef struct _global_freezable_status {
558 boolean_t freeze_pages_threshold_crossed;
559 boolean_t freeze_eligible_procs_available;
560 boolean_t freeze_scheduled_in_future;
561 }global_freezable_status_t;
562
563 typedef struct _proc_freezable_status {
564 boolean_t freeze_has_memstat_state;
565 boolean_t freeze_has_pages_min;
566 int freeze_has_probability;
567 int freeze_leader_eligible;
568 boolean_t freeze_attempted;
569 uint32_t p_memstat_state;
570 uint32_t p_pages;
571 int p_freeze_error_code;
572 int p_pid;
573 int p_leader_pid;
574 char p_name[MAXCOMLEN + 1];
575 }proc_freezable_status_t;
576
577 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
578
579 /*
580 * For coalition based freezing evaluations, we proceed as follows:
581 * - detect that the process is a coalition member and a XPC service
582 * - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
583 * - continue its freezability evaluation assuming its leader will be freezable too
584 *
585 * Once we are done evaluating all processes, we do a quick run thru all
586 * processes and for a coalition member XPC service we look up the 'freezable'
587 * status of its leader and iff:
588 * - the xpc service is freezable i.e. its individual freeze evaluation worked
589 * - and, its leader is also marked freezable
590 * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
591 */
592
593 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN (-1)
594 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS (1)
595 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE (2)
596
597 static int
598 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
599 {
600 uint32_t proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
601 global_freezable_status_t *list_head;
602 proc_freezable_status_t *list_entry, *list_entry_start;
603 size_t list_size = 0, entry_count = 0;
604 proc_t p, leader_proc;
605 memstat_bucket_t *bucket;
606 uint32_t state = 0, pages = 0;
607 boolean_t try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
608 int error = 0, probability_of_use = 0;
609 pid_t leader_pid = 0;
610
611
612 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
613 return ENOTSUP;
614 }
615
616 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
617
618 if (buffer_size < list_size) {
619 return EINVAL;
620 }
621
622 list_head = kheap_alloc(KHEAP_TEMP, list_size, Z_WAITOK | Z_ZERO);
623 if (list_head == NULL) {
624 return ENOMEM;
625 }
626
627 list_size = sizeof(global_freezable_status_t);
628
629 proc_list_lock();
630
631 uint64_t curr_time = mach_absolute_time();
632
633 list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
634 list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
635 list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
636
637 list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
638 list_entry = list_entry_start;
639
640 bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
641
642 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
643
644 p = memorystatus_get_first_proc_locked(&band, FALSE);
645 proc_count++;
646
647 while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
648 (p) &&
649 (list_size < buffer_size)) {
650 if (isSysProc(p)) {
651 /*
652 * Daemon:- We will consider freezing it iff:
653 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
654 * - its role in the coalition is XPC service.
655 *
656 * We skip memory size requirements in this case.
657 */
658
659 coalition_t coal = COALITION_NULL;
660 task_t leader_task = NULL, curr_task = NULL;
661 int task_role_in_coalition = 0;
662
663 curr_task = proc_task(p);
664 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
665
666 if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
667 /*
668 * By default, XPC services without an app
669 * will be the leader of their own single-member
670 * coalition.
671 */
672 goto skip_ineligible_xpc;
673 }
674
675 leader_task = coalition_get_leader(coal);
676 if (leader_task == TASK_NULL) {
677 /*
678 * This jetsam coalition is currently leader-less.
679 * This could happen if the app died, but XPC services
680 * have not yet exited.
681 */
682 goto skip_ineligible_xpc;
683 }
684
685 leader_proc = (proc_t)get_bsdtask_info(leader_task);
686 task_deallocate(leader_task);
687
688 if (leader_proc == PROC_NULL) {
689 /* leader task is exiting */
690 goto skip_ineligible_xpc;
691 }
692
693 task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task);
694
695 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
696 xpc_skip_size_probability_check = TRUE;
697 leader_pid = leader_proc->p_pid;
698 goto continue_eval;
699 }
700
701 skip_ineligible_xpc:
702 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
703 proc_count++;
704 continue;
705 }
706
707 continue_eval:
708 strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
709
710 list_entry->p_pid = p->p_pid;
711
712 state = p->p_memstat_state;
713
714 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
715 !(state & P_MEMSTAT_SUSPENDED)) {
716 try_freeze = list_entry->freeze_has_memstat_state = FALSE;
717 } else {
718 try_freeze = list_entry->freeze_has_memstat_state = TRUE;
719 }
720
721 list_entry->p_memstat_state = state;
722
723 if (xpc_skip_size_probability_check == TRUE) {
724 /*
725 * Assuming the coalition leader is freezable
726 * we don't care re. minimum pages and probability
727 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
728 * XPC services have to be explicity opted-out of the disabled
729 * state. And we checked that state above.
730 */
731 list_entry->freeze_has_pages_min = TRUE;
732 list_entry->p_pages = -1;
733 list_entry->freeze_has_probability = -1;
734
735 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
736 list_entry->p_leader_pid = leader_pid;
737
738 xpc_skip_size_probability_check = FALSE;
739 } else {
740 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
741 list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
742
743 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
744 if (pages < memorystatus_freeze_pages_min) {
745 try_freeze = list_entry->freeze_has_pages_min = FALSE;
746 } else {
747 list_entry->freeze_has_pages_min = TRUE;
748 }
749
750 list_entry->p_pages = pages;
751
752 if (entry_count) {
753 uint32_t j = 0;
754 for (j = 0; j < entry_count; j++) {
755 if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
756 p->p_name,
757 MAXCOMLEN) == 0) {
758 probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
759 break;
760 }
761 }
762
763 list_entry->freeze_has_probability = probability_of_use;
764
765 try_freeze = ((probability_of_use > 0) && try_freeze);
766 } else {
767 list_entry->freeze_has_probability = -1;
768 }
769 }
770
771 if (try_freeze) {
772 uint32_t purgeable, wired, clean, dirty, shared;
773 uint32_t max_pages = 0;
774 int freezer_error_code = 0;
775
776 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
777
778 if (error) {
779 list_entry->p_freeze_error_code = freezer_error_code;
780 }
781
782 list_entry->freeze_attempted = TRUE;
783 }
784
785 list_entry++;
786 freeze_eligible_proc_considered++;
787
788 list_size += sizeof(proc_freezable_status_t);
789
790 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
791 proc_count++;
792 }
793
794 proc_list_unlock();
795
796 list_entry = list_entry_start;
797
798 for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
799 if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
800 leader_pid = list_entry[xpc_index].p_leader_pid;
801
802 leader_proc = proc_find(leader_pid);
803
804 if (leader_proc) {
805 if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) {
806 /*
807 * Leader has already been frozen.
808 */
809 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
810 proc_rele(leader_proc);
811 continue;
812 }
813 proc_rele(leader_proc);
814 }
815
816 for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
817 if (list_entry[leader_index].p_pid == leader_pid) {
818 if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
819 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
820 } else {
821 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
822 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
823 }
824 break;
825 }
826 }
827
828 /*
829 * Didn't find the leader entry. This might be likely because
830 * the leader never made it down to band 0.
831 */
832 if (leader_index == freeze_eligible_proc_considered) {
833 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
834 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
835 }
836 }
837 }
838
839 buffer_size = MIN(list_size, INT32_MAX);
840
841 error = copyout(list_head, buffer, buffer_size);
842 if (error == 0) {
843 *retval = (int32_t) buffer_size;
844 } else {
845 *retval = 0;
846 }
847
848 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
849 kheap_free(KHEAP_TEMP, list_head, list_size);
850
851 MEMORYSTATUS_DEBUG(1, "memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)*list_size);
852
853 return error;
854 }
855
856 #endif /* DEVELOPMENT || DEBUG */
857
858 /*
859 * Get a list of all processes in the freezer band which are currently frozen.
860 * Used by powerlog to collect analytics on frozen process.
861 */
862 static int
863 memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
864 {
865 global_frozen_procs_t *frozen_procs = NULL;
866 uint32_t band = memorystatus_freeze_jetsam_band;
867 proc_t p;
868 uint32_t state;
869 int error;
870 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
871 return ENOTSUP;
872 }
873 if (buffer_size < sizeof(global_frozen_procs_t)) {
874 return EINVAL;
875 }
876 frozen_procs = kheap_alloc(KHEAP_TEMP, sizeof(global_frozen_procs_t),
877 Z_WAITOK | Z_ZERO);
878 if (frozen_procs == NULL) {
879 return ENOMEM;
880 }
881
882 proc_list_lock();
883 p = memorystatus_get_first_proc_locked(&band, FALSE);
884 while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
885 state = p->p_memstat_state;
886 if (state & P_MEMSTAT_FROZEN) {
887 frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = p->p_pid;
888 strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
889 p->p_name, sizeof(proc_name_t));
890 frozen_procs->gfp_num_frozen++;
891 }
892 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
893 }
894 proc_list_unlock();
895
896 buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
897 error = copyout(frozen_procs, buffer, buffer_size);
898 if (error == 0) {
899 *retval = (int32_t) buffer_size;
900 } else {
901 *retval = 0;
902 }
903 kheap_free(KHEAP_TEMP, frozen_procs, sizeof(global_frozen_procs_t));
904
905 return error;
906 }
907
908 int
909 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
910 {
911 int err = ENOTSUP;
912
913 #if DEVELOPMENT || DEBUG
914 if (flags == FREEZER_CONTROL_GET_STATUS) {
915 err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
916 }
917 #endif /* DEVELOPMENT || DEBUG */
918 if (flags == FREEZER_CONTROL_GET_PROCS) {
919 err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
920 }
921
922 return err;
923 }
924
925 extern void vm_swap_consider_defragmenting(int);
926 extern boolean_t memorystatus_kill_elevated_process(uint32_t, os_reason_t, unsigned int, int, uint32_t *, uint64_t *);
927
928 /*
929 * This routine will _jetsam_ all frozen processes
930 * and reclaim the swap space immediately.
931 *
932 * So freeze has to be DISABLED when we call this routine.
933 */
934
935 void
936 memorystatus_disable_freeze(void)
937 {
938 memstat_bucket_t *bucket;
939 int bucket_count = 0, retries = 0;
940 boolean_t retval = FALSE, killed = FALSE;
941 uint32_t errors = 0, errors_over_prev_iteration = 0;
942 os_reason_t jetsam_reason = 0;
943 unsigned int band = 0;
944 proc_t p = PROC_NULL, next_p = PROC_NULL;
945 uint64_t memory_reclaimed = 0, footprint = 0;
946
947 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
948 memorystatus_available_pages, 0, 0, 0, 0);
949
950 assert(memorystatus_freeze_enabled == FALSE);
951
952 jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
953 if (jetsam_reason == OS_REASON_NULL) {
954 printf("memorystatus_disable_freeze: failed to allocate jetsam reason\n");
955 }
956
957 /*
958 * Let's relocate all frozen processes into band 8. Demoted frozen processes
959 * are sitting in band 0 currently and it's possible to have a frozen process
960 * in the FG band being actively used. We don't reset its frozen state when
961 * it is resumed because it has state on disk.
962 *
963 * We choose to do this relocation rather than implement a new 'kill frozen'
964 * process function for these reasons:
965 * - duplication of code: too many kill functions exist and we need to rework them better.
966 * - disk-space-shortage kills are rare
967 * - not having the 'real' jetsam band at time of the this frozen kill won't preclude us
968 * from answering any imp. questions re. jetsam policy/effectiveness.
969 *
970 * This is essentially what memorystatus_update_inactive_jetsam_priority_band() does while
971 * avoiding the application of memory limits.
972 */
973
974 again:
975 proc_list_lock();
976
977 band = JETSAM_PRIORITY_IDLE;
978 p = PROC_NULL;
979 next_p = PROC_NULL;
980
981 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
982 while (next_p) {
983 p = next_p;
984 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
985
986 if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
987 break;
988 }
989
990 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
991 continue;
992 }
993
994 if (p->p_memstat_state & P_MEMSTAT_ERROR) {
995 p->p_memstat_state &= ~P_MEMSTAT_ERROR;
996 }
997
998 if (p->p_memstat_effectivepriority == memorystatus_freeze_jetsam_band) {
999 continue;
1000 }
1001
1002 /*
1003 * We explicitly add this flag here so the process looks like a normal
1004 * frozen process i.e. P_MEMSTAT_FROZEN and P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND.
1005 * We don't bother with assigning the 'active' memory
1006 * limits at this point because we are going to be killing it soon below.
1007 */
1008 p->p_memstat_state |= P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1009 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1010
1011 memorystatus_update_priority_locked(p, memorystatus_freeze_jetsam_band, FALSE, TRUE);
1012 }
1013
1014 bucket = &memstat_bucket[memorystatus_freeze_jetsam_band];
1015 bucket_count = bucket->count;
1016 proc_list_unlock();
1017
1018 /*
1019 * Bucket count is already stale at this point. But, we don't expect
1020 * freezing to continue since we have already disabled the freeze functionality.
1021 * However, an existing freeze might be in progress. So we might miss that process
1022 * in the first go-around. We hope to catch it in the next.
1023 */
1024
1025 errors_over_prev_iteration = 0;
1026 while (bucket_count) {
1027 bucket_count--;
1028
1029 /*
1030 * memorystatus_kill_elevated_process() drops a reference,
1031 * so take another one so we can continue to use this exit reason
1032 * even after it returns.
1033 */
1034
1035 os_reason_ref(jetsam_reason);
1036 retval = memorystatus_kill_elevated_process(
1037 kMemorystatusKilledDiskSpaceShortage,
1038 jetsam_reason,
1039 memorystatus_freeze_jetsam_band,
1040 0, /* the iteration of aggressive jetsam..ignored here */
1041 &errors,
1042 &footprint);
1043
1044 if (errors > 0) {
1045 printf("memorystatus_disable_freeze: memorystatus_kill_elevated_process returned %d error(s)\n", errors);
1046 errors_over_prev_iteration += errors;
1047 errors = 0;
1048 }
1049
1050 if (retval == 0) {
1051 /*
1052 * No frozen processes left to kill.
1053 */
1054 break;
1055 }
1056
1057 killed = TRUE;
1058 memory_reclaimed += footprint;
1059 }
1060
1061 proc_list_lock();
1062
1063 if (memorystatus_frozen_count) {
1064 /*
1065 * A frozen process snuck in and so
1066 * go back around to kill it. That
1067 * process may have been resumed and
1068 * put into the FG band too. So we
1069 * have to do the relocation again.
1070 */
1071 assert(memorystatus_freeze_enabled == FALSE);
1072
1073 retries++;
1074 if (retries < 3) {
1075 proc_list_unlock();
1076 goto again;
1077 }
1078 #if DEVELOPMENT || DEBUG
1079 panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d, errors = %d",
1080 memorystatus_frozen_count, errors_over_prev_iteration);
1081 #endif /* DEVELOPMENT || DEBUG */
1082 }
1083 proc_list_unlock();
1084
1085 os_reason_free(jetsam_reason);
1086
1087 if (killed) {
1088 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1089
1090 proc_list_lock();
1091 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1092 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1093 uint64_t timestamp_now = mach_absolute_time();
1094 memorystatus_jetsam_snapshot->notification_time = timestamp_now;
1095 memorystatus_jetsam_snapshot->js_gencount++;
1096 if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
1097 timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
1098 proc_list_unlock();
1099 int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1100 if (!ret) {
1101 proc_list_lock();
1102 memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
1103 proc_list_unlock();
1104 }
1105 } else {
1106 proc_list_unlock();
1107 }
1108 }
1109
1110 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1111 memorystatus_available_pages, memory_reclaimed, 0, 0, 0);
1112
1113 return;
1114 }
1115
1116 __private_extern__ void
1117 memorystatus_freeze_init(void)
1118 {
1119 kern_return_t result;
1120 thread_t thread;
1121
1122 /*
1123 * This is just the default value if the underlying
1124 * storage device doesn't have any specific budget.
1125 * We check with the storage layer in memorystatus_freeze_update_throttle()
1126 * before we start our freezing the first time.
1127 */
1128 memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1129
1130 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1131 if (result == KERN_SUCCESS) {
1132 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1133 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1134 thread_set_thread_name(thread, "VM_freezer");
1135
1136 thread_deallocate(thread);
1137 } else {
1138 panic("Could not create memorystatus_freeze_thread");
1139 }
1140 }
1141
1142 static boolean_t
1143 memorystatus_is_process_eligible_for_freeze(proc_t p)
1144 {
1145 /*
1146 * Called with proc_list_lock held.
1147 */
1148
1149 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1150
1151 boolean_t should_freeze = FALSE;
1152 uint32_t state = 0, pages = 0;
1153 int probability_of_use = 0;
1154 size_t entry_count = 0, i = 0;
1155 bool first_consideration = true;
1156
1157 state = p->p_memstat_state;
1158
1159 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) {
1160 if (state & P_MEMSTAT_FREEZE_DISABLED) {
1161 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonDisabled;
1162 }
1163 goto out;
1164 }
1165
1166 if (isSysProc(p)) {
1167 /*
1168 * Daemon:- We consider freezing it if:
1169 * - it belongs to a coalition and the leader is frozen, and,
1170 * - its role in the coalition is XPC service.
1171 *
1172 * We skip memory size requirements in this case.
1173 */
1174
1175 coalition_t coal = COALITION_NULL;
1176 task_t leader_task = NULL, curr_task = NULL;
1177 proc_t leader_proc = NULL;
1178 int task_role_in_coalition = 0;
1179
1180 curr_task = proc_task(p);
1181 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1182
1183 if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1184 /*
1185 * By default, XPC services without an app
1186 * will be the leader of their own single-member
1187 * coalition.
1188 */
1189 goto out;
1190 }
1191
1192 leader_task = coalition_get_leader(coal);
1193 if (leader_task == TASK_NULL) {
1194 /*
1195 * This jetsam coalition is currently leader-less.
1196 * This could happen if the app died, but XPC services
1197 * have not yet exited.
1198 */
1199 goto out;
1200 }
1201
1202 leader_proc = (proc_t)get_bsdtask_info(leader_task);
1203 task_deallocate(leader_task);
1204
1205 if (leader_proc == PROC_NULL) {
1206 /* leader task is exiting */
1207 goto out;
1208 }
1209
1210 if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) {
1211 goto out;
1212 }
1213
1214 task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task);
1215
1216 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
1217 should_freeze = TRUE;
1218 }
1219
1220 goto out;
1221 } else {
1222 /*
1223 * Application. In addition to the above states we need to make
1224 * sure we only consider suspended applications for freezing.
1225 */
1226 if (!(state & P_MEMSTAT_SUSPENDED)) {
1227 goto out;
1228 }
1229 }
1230
1231 /*
1232 * This proc is a suspended application.
1233 * We're interested in tracking what percentage of these
1234 * actually get frozen.
1235 * To avoid skewing the metrics towards processes which
1236 * are considered more frequently, we only track failures once
1237 * per process.
1238 */
1239 first_consideration = !(state & P_MEMSTAT_FREEZE_CONSIDERED);
1240
1241 if (first_consideration) {
1242 memorystatus_freezer_stats.mfs_process_considered_count++;
1243 p->p_memstat_state |= P_MEMSTAT_FREEZE_CONSIDERED;
1244 }
1245
1246 /* Only freeze applications meeting our minimum resident page criteria */
1247 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
1248 if (pages < memorystatus_freeze_pages_min) {
1249 if (first_consideration) {
1250 memorystatus_freezer_stats.mfs_error_below_min_pages_count++;
1251 }
1252 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonBelowMinPages;
1253 goto out;
1254 }
1255
1256 /* Don't freeze processes that are already exiting on core. It may have started exiting
1257 * after we chose it for freeze, but before we obtained the proc_list_lock.
1258 * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync.
1259 * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
1260 */
1261 if ((p->p_listflag & P_LIST_EXITED) != 0) {
1262 if (first_consideration) {
1263 memorystatus_freezer_stats.mfs_error_other_count++;
1264 }
1265 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOther;
1266 goto out;
1267 }
1268
1269 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1270
1271 if (entry_count) {
1272 for (i = 0; i < entry_count; i++) {
1273 /*
1274 * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes
1275 * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first
1276 * MAXCOMLEN bytes here since the name in the probabilities table could
1277 * be truncated from the proc_t's p_name.
1278 */
1279 if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1280 p->p_name,
1281 MAXCOMLEN) == 0) {
1282 probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1283 break;
1284 }
1285 }
1286
1287 if (probability_of_use == 0) {
1288 if (first_consideration) {
1289 memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++;
1290 }
1291 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonLowProbOfUse;
1292 goto out;
1293 }
1294 }
1295
1296 should_freeze = TRUE;
1297 out:
1298 if (should_freeze && !(state & P_MEMSTAT_FROZEN)) {
1299 /*
1300 * Reset the skip reason. If it's killed before we manage to actually freeze it
1301 * we failed to consider it early enough.
1302 */
1303 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1304 if (!first_consideration) {
1305 /*
1306 * We're freezing this for the first time and we previously considered it ineligible.
1307 * Bump the considered count so that we track this as 1 failure
1308 * and 1 success.
1309 */
1310 memorystatus_freezer_stats.mfs_process_considered_count++;
1311 }
1312 }
1313 return should_freeze;
1314 }
1315
1316 /*
1317 * Synchronously freeze the passed proc. Called with a reference to the proc held.
1318 *
1319 * Doesn't deal with:
1320 * - re-freezing because this is called on a specific process and
1321 * not by the freezer thread. If that changes, we'll have to teach it about
1322 * refreezing a frozen process.
1323 *
1324 * - grouped/coalition freezing because we are hoping to deprecate this
1325 * interface as it was used by user-space to freeze particular processes. But
1326 * we have moved away from that approach to having the kernel choose the optimal
1327 * candidates to be frozen.
1328 *
1329 * Returns EINVAL or the value returned by task_freeze().
1330 */
1331 int
1332 memorystatus_freeze_process_sync(proc_t p)
1333 {
1334 int ret = EINVAL;
1335 pid_t aPid = 0;
1336 boolean_t memorystatus_freeze_swap_low = FALSE;
1337 int freezer_error_code = 0;
1338
1339 lck_mtx_lock(&freezer_mutex);
1340
1341 if (p == NULL) {
1342 printf("memorystatus_freeze_process_sync: Invalid process\n");
1343 goto exit;
1344 }
1345
1346 if (memorystatus_freeze_enabled == FALSE) {
1347 printf("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
1348 goto exit;
1349 }
1350
1351 if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
1352 printf("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
1353 goto exit;
1354 }
1355
1356 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1357 if (!memorystatus_freeze_budget_pages_remaining) {
1358 printf("memorystatus_freeze_process_sync: exit with NO available budget\n");
1359 goto exit;
1360 }
1361
1362 proc_list_lock();
1363
1364 if (p != NULL) {
1365 uint32_t purgeable, wired, clean, dirty, shared;
1366 uint32_t i;
1367 uint64_t max_pages;
1368
1369 aPid = p->p_pid;
1370
1371 /* Ensure the process is eligible for freezing */
1372 if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) {
1373 proc_list_unlock();
1374 goto exit;
1375 }
1376
1377 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1378 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1379 } else {
1380 /*
1381 * We only have the compressor without any swap.
1382 */
1383 max_pages = UINT32_MAX - 1;
1384 }
1385
1386 /* Mark as locked temporarily to avoid kill */
1387 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1388 proc_list_unlock();
1389
1390 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1391 memorystatus_available_pages, 0, 0, 0, 0);
1392
1393 max_pages = MIN(max_pages, UINT32_MAX);
1394 ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1395 if (ret == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1396 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1397 }
1398
1399 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1400 memorystatus_available_pages, aPid, 0, 0, 0);
1401
1402 DTRACE_MEMORYSTATUS6(memorystatus_freeze, proc_t, p, unsigned int, memorystatus_available_pages, boolean_t, purgeable, unsigned int, wired, uint32_t, clean, uint32_t, dirty);
1403
1404 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_process_sync: task_freeze %s for pid %d [%s] - "
1405 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n",
1406 (ret == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1407 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1408
1409 proc_list_lock();
1410
1411 if (ret == KERN_SUCCESS) {
1412 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1413
1414 p->p_memstat_freeze_sharedanon_pages += shared;
1415
1416 memorystatus_frozen_shared_mb += shared;
1417
1418 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
1419 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1420 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1421 memorystatus_frozen_count++;
1422 os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1423 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1424 memorystatus_freeze_out_of_slots();
1425 }
1426 } else {
1427 // This was a re-freeze
1428 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1429 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1430 memorystatus_freezer_stats.mfs_refreeze_count++;
1431 }
1432 }
1433
1434 p->p_memstat_frozen_count++;
1435
1436 /*
1437 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1438 * to its higher jetsam band.
1439 */
1440 proc_list_unlock();
1441
1442 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1443
1444 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1445 ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
1446 memorystatus_freeze_jetsam_band, TRUE);
1447
1448 if (ret) {
1449 printf("Elevating the frozen process failed with %d\n", ret);
1450 /* not fatal */
1451 ret = 0;
1452 }
1453
1454
1455 /* Update stats */
1456 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1457 throttle_intervals[i].pageouts += dirty;
1458 }
1459 }
1460 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1461 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s] done memorystatus_freeze_budget_pages_remaining %llu froze %u pages",
1462 aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, dirty);
1463
1464 proc_list_lock();
1465
1466 memorystatus_freeze_pageouts += dirty;
1467
1468 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1469 /*
1470 * Add some eviction logic here? At some point should we
1471 * jetsam a process to get back its swap space so that we
1472 * can freeze a more eligible process at this moment in time?
1473 */
1474 }
1475 } else {
1476 memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, NULL, "memorystatus_freeze_process_sync");
1477 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1478 }
1479
1480 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1481 wakeup(&p->p_memstat_state);
1482 proc_list_unlock();
1483 }
1484
1485 exit:
1486 lck_mtx_unlock(&freezer_mutex);
1487
1488 return ret;
1489 }
1490
1491 /*
1492 * Caller must hold the freezer_mutex and it will be locked on return.
1493 */
1494 static int
1495 memorystatus_freeze_top_process(void)
1496 {
1497 pid_t aPid = 0, coal_xpc_pid = 0;
1498 int ret = -1;
1499 proc_t p = PROC_NULL, next_p = PROC_NULL;
1500 unsigned int i = 0;
1501 unsigned int band = JETSAM_PRIORITY_IDLE;
1502 bool refreeze_processes = false;
1503 task_t curr_task = NULL;
1504 coalition_t coal = COALITION_NULL;
1505 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
1506 unsigned int ntasks = 0;
1507 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1508
1509 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0);
1510
1511 proc_list_lock();
1512
1513 if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
1514 /*
1515 * Freezer is already full but we are here and so let's
1516 * try to refreeze any processes we might have thawed
1517 * in the past and push out their compressed state out.
1518 */
1519 refreeze_processes = true;
1520 band = (unsigned int) memorystatus_freeze_jetsam_band;
1521 }
1522
1523 freeze_process:
1524
1525 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
1526 while (next_p) {
1527 kern_return_t kr;
1528 uint32_t purgeable, wired, clean, dirty, shared;
1529 uint64_t max_pages = 0;
1530 int freezer_error_code = 0;
1531 bool was_refreeze = false;
1532
1533 p = next_p;
1534
1535 if (coal == NULL) {
1536 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1537 } else {
1538 /*
1539 * We have frozen a coalition leader and now are
1540 * dealing with its XPC services. We get our
1541 * next_p for each XPC service from the pid_list
1542 * acquired after a successful task_freeze call
1543 * on the coalition leader.
1544 */
1545
1546 if (ntasks > 0) {
1547 coal_xpc_pid = pid_list[--ntasks];
1548 next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */);
1549 /*
1550 * We grab a reference when we are about to freeze the process. So, drop
1551 * the reference that proc_findinternal() grabbed for us.
1552 * We also have the proc_list_lock and so this process is stable.
1553 */
1554 if (next_p) {
1555 proc_rele_locked(next_p);
1556 }
1557 } else {
1558 next_p = NULL;
1559 }
1560 }
1561
1562 aPid = p->p_pid;
1563
1564 if (p->p_memstat_effectivepriority != (int32_t) band) {
1565 /*
1566 * We shouldn't be freezing processes outside the
1567 * prescribed band.
1568 */
1569 break;
1570 }
1571
1572 /* Ensure the process is eligible for (re-)freezing */
1573 if (refreeze_processes) {
1574 /*
1575 * Has to have been frozen once before.
1576 */
1577 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
1578 continue;
1579 }
1580
1581 /*
1582 * Has to have been resumed once before.
1583 */
1584 if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == FALSE) {
1585 continue;
1586 }
1587
1588 /*
1589 * Not currently being looked at for something.
1590 */
1591 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1592 continue;
1593 }
1594
1595 /*
1596 * We are going to try and refreeze and so re-evaluate
1597 * the process. We don't want to double count the shared
1598 * memory. So deduct the old snapshot here.
1599 */
1600 memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1601 p->p_memstat_freeze_sharedanon_pages = 0;
1602
1603 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1604 memorystatus_refreeze_eligible_count--;
1605 } else {
1606 if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) {
1607 continue; // with lock held
1608 }
1609 }
1610
1611 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1612 /*
1613 * Freezer backed by the compressor and swap file(s)
1614 * will hold compressed data.
1615 */
1616
1617 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1618 } else {
1619 /*
1620 * We only have the compressor pool.
1621 */
1622 max_pages = UINT32_MAX - 1;
1623 }
1624
1625 /* Mark as locked temporarily to avoid kill */
1626 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1627
1628 p = proc_ref_locked(p);
1629 if (!p) {
1630 memorystatus_freezer_stats.mfs_error_other_count++;
1631 break;
1632 }
1633
1634 proc_list_unlock();
1635
1636 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1637 memorystatus_available_pages, 0, 0, 0, 0);
1638
1639 max_pages = MIN(max_pages, UINT32_MAX);
1640 kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1641 if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1642 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1643 }
1644
1645 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1646 memorystatus_available_pages, aPid, 0, 0, 0);
1647
1648 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1649 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n",
1650 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1651 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1652
1653 proc_list_lock();
1654
1655 /* Success? */
1656 if (KERN_SUCCESS == kr) {
1657 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1658
1659 p->p_memstat_freeze_sharedanon_pages += shared;
1660
1661 memorystatus_frozen_shared_mb += shared;
1662
1663 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
1664 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1665 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1666 memorystatus_frozen_count++;
1667 os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed);
1668 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1669 memorystatus_freeze_out_of_slots();
1670 }
1671 } else {
1672 // This was a re-freeze
1673 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1674 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1675 memorystatus_freezer_stats.mfs_refreeze_count++;
1676 }
1677 was_refreeze = true;
1678 }
1679
1680 p->p_memstat_frozen_count++;
1681
1682 /*
1683 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1684 * to its higher jetsam band.
1685 */
1686 proc_list_unlock();
1687
1688 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1689
1690 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1691 ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1692
1693 if (ret) {
1694 printf("Elevating the frozen process failed with %d\n", ret);
1695 /* not fatal */
1696 ret = 0;
1697 }
1698
1699 /* Update stats */
1700 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1701 throttle_intervals[i].pageouts += dirty;
1702 }
1703 }
1704 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1705 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1706 was_refreeze ? "re" : "", (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, was_refreeze ? "Re" : "", dirty);
1707
1708 proc_list_lock();
1709
1710 memorystatus_freeze_pageouts += dirty;
1711
1712 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1713 /*
1714 * Add some eviction logic here? At some point should we
1715 * jetsam a process to get back its swap space so that we
1716 * can freeze a more eligible process at this moment in time?
1717 */
1718 }
1719
1720 /* Return KERN_SUCCESS */
1721 ret = kr;
1722
1723 /*
1724 * We froze a process successfully. We can stop now
1725 * and see if that helped if this process isn't part
1726 * of a coalition.
1727 *
1728 * Else:
1729 * - if it is a leader, get the list of XPC services
1730 * that need to be frozen.
1731 * - if it is a XPC service whose leader was frozen
1732 * here, continue on to the next XPC service in the list.
1733 */
1734
1735 if (coal == NULL) {
1736 curr_task = proc_task(p);
1737 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1738 if (coalition_is_leader(curr_task, coal)) {
1739 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
1740 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
1741
1742 if (ntasks > MAX_XPC_SERVICE_PIDS) {
1743 ntasks = MAX_XPC_SERVICE_PIDS;
1744 }
1745 }
1746
1747 next_p = NULL;
1748
1749 if (ntasks > 0) {
1750 /*
1751 * Start off with our first next_p in this list.
1752 */
1753 coal_xpc_pid = pid_list[--ntasks];
1754 next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */);
1755
1756 /*
1757 * We grab a reference when we are about to freeze the process. So drop
1758 * the reference that proc_findinternal() grabbed for us.
1759 * We also have the proc_list_lock and so this process is stable.
1760 */
1761 if (next_p) {
1762 proc_rele_locked(next_p);
1763 }
1764 }
1765 }
1766
1767 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1768 wakeup(&p->p_memstat_state);
1769 proc_rele_locked(p);
1770
1771 if (coal && next_p) {
1772 continue;
1773 }
1774
1775 /*
1776 * No coalition leader was frozen. So we don't
1777 * need to evaluate any XPC services.
1778 *
1779 * OR
1780 *
1781 * We have frozen all eligible XPC services for
1782 * the current coalition leader.
1783 *
1784 * Either way, we can break here and see if freezing
1785 * helped.
1786 */
1787
1788 break;
1789 } else {
1790 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1791 wakeup(&p->p_memstat_state);
1792
1793 if (refreeze_processes) {
1794 if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1795 (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
1796 /*
1797 * Keeping this prior-frozen process in this high band when
1798 * we failed to re-freeze it due to bad shared memory usage
1799 * could cause excessive pressure on the lower bands.
1800 * We need to demote it for now. It'll get re-evaluated next
1801 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
1802 * bit.
1803 */
1804
1805 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1806 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1807 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE);
1808 }
1809 } else {
1810 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1811 }
1812 memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, coal, "memorystatus_freeze_top_process");
1813
1814 proc_rele_locked(p);
1815
1816 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
1817 break;
1818 }
1819 }
1820 }
1821
1822 if ((ret == -1) &&
1823 (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD) &&
1824 (!refreeze_processes)) {
1825 /*
1826 * We failed to freeze a process from the IDLE
1827 * band AND we have some thawed processes
1828 * AND haven't tried refreezing as yet.
1829 * Let's try and re-freeze processes in the
1830 * frozen band that have been resumed in the past
1831 * and so have brought in state from disk.
1832 */
1833
1834 band = (unsigned int) memorystatus_freeze_jetsam_band;
1835
1836 refreeze_processes = true;
1837
1838 goto freeze_process;
1839 }
1840
1841 proc_list_unlock();
1842
1843 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages, aPid, 0, 0, 0);
1844
1845 return ret;
1846 }
1847
1848 #if DEVELOPMENT || DEBUG
1849 /* For testing memorystatus_freeze_top_process */
1850 static int
1851 sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS
1852 {
1853 #pragma unused(arg1, arg2)
1854 int error, val;
1855 /*
1856 * Only freeze on write to prevent freezing during `sysctl -a`.
1857 * The actual value written doesn't matter.
1858 */
1859 error = sysctl_handle_int(oidp, &val, 0, req);
1860 if (error || !req->newptr) {
1861 return error;
1862 }
1863 lck_mtx_lock(&freezer_mutex);
1864 int ret = memorystatus_freeze_top_process();
1865 lck_mtx_unlock(&freezer_mutex);
1866 if (ret == -1) {
1867 ret = ESRCH;
1868 }
1869 return ret;
1870 }
1871 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
1872 0, 0, &sysctl_memorystatus_freeze_top_process, "I", "");
1873 #endif /* DEVELOPMENT || DEBUG */
1874
1875 static inline boolean_t
1876 memorystatus_can_freeze_processes(void)
1877 {
1878 boolean_t ret;
1879
1880 proc_list_lock();
1881
1882 if (memorystatus_suspended_count) {
1883 memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
1884
1885 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
1886 ret = TRUE;
1887 } else {
1888 ret = FALSE;
1889 }
1890 } else {
1891 ret = FALSE;
1892 }
1893
1894 proc_list_unlock();
1895
1896 return ret;
1897 }
1898
1899 static boolean_t
1900 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
1901 {
1902 boolean_t can_freeze = TRUE;
1903
1904 /* Only freeze if we're sufficiently low on memory; this holds off freeze right
1905 * after boot, and is generally is a no-op once we've reached steady state. */
1906 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
1907 return FALSE;
1908 }
1909
1910 /* Check minimum suspended process threshold. */
1911 if (!memorystatus_can_freeze_processes()) {
1912 return FALSE;
1913 }
1914 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
1915
1916 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1917 /*
1918 * In-core compressor used for freezing WITHOUT on-disk swap support.
1919 */
1920 if (vm_compressor_low_on_space()) {
1921 if (*memorystatus_freeze_swap_low) {
1922 *memorystatus_freeze_swap_low = TRUE;
1923 }
1924
1925 can_freeze = FALSE;
1926 } else {
1927 if (*memorystatus_freeze_swap_low) {
1928 *memorystatus_freeze_swap_low = FALSE;
1929 }
1930
1931 can_freeze = TRUE;
1932 }
1933 } else {
1934 /*
1935 * Freezing WITH on-disk swap support.
1936 *
1937 * In-core compressor fronts the swap.
1938 */
1939 if (vm_swap_low_on_space()) {
1940 if (*memorystatus_freeze_swap_low) {
1941 *memorystatus_freeze_swap_low = TRUE;
1942 }
1943
1944 can_freeze = FALSE;
1945 }
1946 }
1947
1948 return can_freeze;
1949 }
1950
1951 /*
1952 * This function evaluates if the currently frozen processes deserve
1953 * to stay in the higher jetsam band. There are 2 modes:
1954 * - 'force one == TRUE': (urgent mode)
1955 * We are out of budget and can't refreeze a process. The process's
1956 * state, if it was resumed, will stay in compressed memory. If we let it
1957 * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
1958 * the lower bands. So we force-demote the least-recently-used-and-thawed
1959 * process.
1960 *
1961 * - 'force_one == FALSE': (normal mode)
1962 * If the # of thaws of a process is below our threshold, then we
1963 * will demote that process into the IDLE band.
1964 * We don't immediately kill the process here because it already has
1965 * state on disk and so it might be worth giving it another shot at
1966 * getting thawed/resumed and used.
1967 */
1968 static void
1969 memorystatus_demote_frozen_processes(boolean_t force_one)
1970 {
1971 unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
1972 unsigned int demoted_proc_count = 0;
1973 proc_t p = PROC_NULL, next_p = PROC_NULL;
1974 /* We demote to IDLE unless someone has asserted a higher priority on this process. */
1975 int maxpriority = JETSAM_PRIORITY_IDLE;
1976
1977 proc_list_lock();
1978
1979 if (memorystatus_freeze_enabled == FALSE) {
1980 /*
1981 * Freeze has been disabled likely to
1982 * reclaim swap space. So don't change
1983 * any state on the frozen processes.
1984 */
1985 proc_list_unlock();
1986 return;
1987 }
1988
1989 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
1990 while (next_p) {
1991 p = next_p;
1992 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1993
1994 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
1995 continue;
1996 }
1997
1998 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1999 continue;
2000 }
2001
2002 if (force_one == TRUE) {
2003 if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == 0) {
2004 /*
2005 * This process hasn't been thawed recently and so most of
2006 * its state sits on NAND and so we skip it -- jetsamming it
2007 * won't help with memory pressure.
2008 */
2009 continue;
2010 }
2011 } else {
2012 if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2013 /*
2014 * This process has met / exceeded our thaw count demotion threshold
2015 * and so we let it live in the higher bands.
2016 */
2017 continue;
2018 }
2019 }
2020
2021 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2022 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2023
2024 maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority);
2025 memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE);
2026 #if DEVELOPMENT || DEBUG
2027 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_demote_frozen_process(%s) pid %d [%s]",
2028 (force_one ? "urgent" : "normal"), (p ? p->p_pid : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2029 #endif /* DEVELOPMENT || DEBUG */
2030
2031 /*
2032 * The freezer thread will consider this a normal app to be frozen
2033 * because it is in the IDLE band. So we don't need the
2034 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2035 * we'll correctly count it as eligible for re-freeze again.
2036 *
2037 * We don't drop the frozen count because this process still has
2038 * state on disk. So there's a chance it gets resumed and then it
2039 * should land in the higher jetsam band. For that it needs to
2040 * remain marked frozen.
2041 */
2042 if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) {
2043 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2044 memorystatus_refreeze_eligible_count--;
2045 }
2046
2047 demoted_proc_count++;
2048
2049 if ((force_one == TRUE) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2050 break;
2051 }
2052 }
2053
2054 if (force_one == FALSE) {
2055 /*
2056 * We use these counters to track daily hit rates.
2057 * So we only reset them to 0 under the normal
2058 * mode.
2059 */
2060 memorystatus_thaw_count = 0;
2061 }
2062
2063 proc_list_unlock();
2064 }
2065
2066 /*
2067 * Calculate a new freezer budget.
2068 * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2069 * @param burst_multiple The burst_multiple for the new period
2070 * @param interval_duration_min How many minutes will the new interval be?
2071 * @param rollover The amount to rollover from the previous budget.
2072 *
2073 * @return A budget for the new interval.
2074 */
2075 static uint32_t
2076 memorystatus_freeze_calculate_new_budget(
2077 unsigned int time_since_last_interval_expired_sec,
2078 unsigned int burst_multiple,
2079 unsigned int interval_duration_min,
2080 uint32_t rollover)
2081 {
2082 uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2083 const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2084 /* Precision factor for days_missed. 2 decimal points. */
2085 const static unsigned int kFixedPointFactor = 100;
2086 unsigned int days_missed;
2087
2088 /* Get the daily budget from the storage layer */
2089 if (vm_swap_max_budget(&freeze_daily_budget)) {
2090 freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2091 assert(freeze_daily_budget_mb <= UINT32_MAX);
2092 memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2093 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2094 }
2095 /* Calculate the daily pageout budget */
2096 freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2097
2098 daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2099
2100 /*
2101 * Add additional budget for time since the interval expired.
2102 * For example, if the interval expired n days ago, we should get an additional n days
2103 * of budget since we didn't use any budget during those n days.
2104 */
2105 days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2106 budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2107 new_budget = rollover + daily_budget_pageouts + budget_missed;
2108 return (uint32_t) MIN(new_budget, UINT32_MAX);
2109 }
2110
2111 /*
2112 * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2113 * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2114 * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2115 * it gets set for new processes.
2116 * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2117 */
2118 static void
2119 memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2120 {
2121 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2122 LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2123 unsigned int band = JETSAM_PRIORITY_IDLE;
2124 proc_t p;
2125
2126 if (!locked) {
2127 proc_list_lock();
2128 }
2129 p = memorystatus_get_first_proc_locked(&band, FALSE);
2130 while (p) {
2131 assert(p->p_memstat_effectivepriority == (int32_t) band);
2132 if (!(p->p_memstat_state & P_MEMSTAT_FROZEN) && memorystatus_is_process_eligible_for_freeze(p)) {
2133 assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2134 p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2135 }
2136 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2137 }
2138 if (!locked) {
2139 proc_list_unlock();
2140 }
2141 }
2142
2143 /*
2144 * Called after we fail to freeze a process.
2145 * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2146 */
2147 static void
2148 memorystatus_freeze_handle_error(
2149 proc_t p,
2150 const int freezer_error_code,
2151 bool was_refreeze,
2152 pid_t pid,
2153 const coalition_t coalition,
2154 const char* log_prefix)
2155 {
2156 const char *reason;
2157 memorystatus_freeze_skip_reason_t skip_reason;
2158
2159 switch (freezer_error_code) {
2160 case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2161 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2162 reason = "too much shared memory";
2163 skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2164 break;
2165 case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2166 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2167 reason = "private-shared pages ratio";
2168 skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2169 break;
2170 case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2171 memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2172 reason = "no compressor space";
2173 skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2174 break;
2175 case FREEZER_ERROR_NO_SWAP_SPACE:
2176 memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2177 reason = "no swap space";
2178 skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2179 break;
2180 default:
2181 reason = "unknown error";
2182 skip_reason = kMemorystatusFreezeSkipReasonOther;
2183 }
2184
2185 p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2186
2187 os_log_with_startup_serial(OS_LOG_DEFAULT, "%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2188 log_prefix, was_refreeze ? "re" : "",
2189 (coalition == NULL ? "general" : "coalition-driven"), pid,
2190 ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2191 }
2192
2193 /*
2194 * Start a new normal throttle interval with the given budget.
2195 * Caller must hold the freezer mutex
2196 */
2197 static void
2198 memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2199 {
2200 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2201 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2202
2203 normal_throttle_window->max_pageouts = new_budget;
2204 normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2205 normal_throttle_window->ts.tv_nsec = 0;
2206 ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2207 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2208 if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2209 normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2210 } else {
2211 normal_throttle_window->pageouts = 0;
2212 }
2213 /* Ensure the normal window is now active. */
2214 memorystatus_freeze_degradation = FALSE;
2215 memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2216 /*
2217 * Reset the thawed percentage to 0 so we re-evaluate in the new interval.
2218 */
2219 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release);
2220 os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release);
2221 os_atomic_inc(&memorystatus_freeze_current_interval, release);
2222 }
2223
2224 #if DEVELOPMENT || DEBUG
2225
2226 static int
2227 sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2228 {
2229 #pragma unused(arg1, arg2)
2230 int error = 0;
2231 unsigned int time_since_last_interval_expired_sec = 0;
2232 unsigned int new_budget;
2233
2234 error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2235 if (error || !req->newptr) {
2236 return error;
2237 }
2238 new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2239 return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2240 }
2241
2242 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2243 0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2244
2245 #endif /* DEVELOPMENT || DEBUG */
2246
2247 /*
2248 * Called when we first run out of budget in an interval.
2249 * Marks idle processes as not frozen due to lack of budget.
2250 * NB: It might be worth having a CA event here.
2251 */
2252 static void
2253 memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2254 {
2255 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2256 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2257
2258 mach_timespec_t time_left = {0, 0};
2259 mach_timespec_t now_ts;
2260 clock_sec_t sec;
2261 clock_nsec_t nsec;
2262
2263 time_left.tv_sec = interval->ts.tv_sec;
2264 time_left.tv_nsec = 0;
2265 clock_get_system_nanotime(&sec, &nsec);
2266 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2267 now_ts.tv_nsec = nsec;
2268
2269 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2270 os_log(OS_LOG_DEFAULT,
2271 "memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2272 time_left.tv_sec / 60, memorystatus_frozen_count);
2273
2274 memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2275 }
2276
2277 /*
2278 * Called when we cross over the threshold of maximum frozen processes allowed.
2279 * Marks remaining idle processes as not frozen due to lack of slots.
2280 */
2281 static void
2282 memorystatus_freeze_out_of_slots(void)
2283 {
2284 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2285 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2286 assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2287
2288 os_log(OS_LOG_DEFAULT,
2289 "memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2290 memorystatus_frozen_count);
2291
2292 memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2293 }
2294
2295 /*
2296 * This function will do 4 things:
2297 *
2298 * 1) check to see if we are currently in a degraded freezer mode, and if so:
2299 * - check to see if our window has expired and we should exit this mode, OR,
2300 * - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2301 *
2302 * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2303 *
2304 * 3) check what the current normal window allows for a budget.
2305 *
2306 * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2307 * what we would normally expect, then we are running low on our daily budget and need to enter
2308 * degraded perf. mode.
2309 *
2310 * Caller must hold the freezer mutex
2311 * Caller must not hold the proc_list lock
2312 */
2313
2314 static void
2315 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2316 {
2317 clock_sec_t sec;
2318 clock_nsec_t nsec;
2319 mach_timespec_t now_ts;
2320 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2321 LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2322
2323 unsigned int freeze_daily_pageouts_max = 0;
2324 uint32_t budget_rollover = 0;
2325 bool started_with_budget = (*budget_pages_allowed > 0);
2326
2327 #if DEVELOPMENT || DEBUG
2328 if (!memorystatus_freeze_throttle_enabled) {
2329 /*
2330 * No throttling...we can use the full budget everytime.
2331 */
2332 *budget_pages_allowed = UINT64_MAX;
2333 return;
2334 }
2335 #endif
2336
2337 clock_get_system_nanotime(&sec, &nsec);
2338 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2339 now_ts.tv_nsec = nsec;
2340
2341 struct throttle_interval_t *interval = NULL;
2342
2343 if (memorystatus_freeze_degradation == TRUE) {
2344 interval = degraded_throttle_window;
2345
2346 if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2347 interval->pageouts = 0;
2348 interval->max_pageouts = 0;
2349 } else {
2350 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2351 }
2352 }
2353
2354 interval = normal_throttle_window;
2355
2356 if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2357 /* How long has it been since the previous interval expired? */
2358 mach_timespec_t expiration_period_ts = now_ts;
2359 SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
2360 /* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
2361 budget_rollover = interval->pageouts > interval->max_pageouts ?
2362 0 : interval->max_pageouts - interval->pageouts;
2363
2364 memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
2365 expiration_period_ts.tv_sec, interval->burst_multiple,
2366 interval->mins, budget_rollover),
2367 now_ts);
2368 *budget_pages_allowed = interval->max_pageouts;
2369
2370 memorystatus_demote_frozen_processes(FALSE); /* normal mode...don't force a demotion */
2371 } else {
2372 /*
2373 * Current throttle window.
2374 * Deny freezing if we have no budget left.
2375 * Try graceful degradation if we are within 25% of:
2376 * - the daily budget, and
2377 * - the current budget left is below our normal budget expectations.
2378 */
2379
2380 if (memorystatus_freeze_degradation == FALSE) {
2381 if (interval->pageouts >= interval->max_pageouts) {
2382 *budget_pages_allowed = 0;
2383 if (started_with_budget) {
2384 memorystatus_freeze_out_of_budget(interval);
2385 }
2386 } else {
2387 int budget_left = interval->max_pageouts - interval->pageouts;
2388 int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2389
2390 mach_timespec_t time_left = {0, 0};
2391
2392 time_left.tv_sec = interval->ts.tv_sec;
2393 time_left.tv_nsec = 0;
2394
2395 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2396
2397 if (budget_left <= budget_threshold) {
2398 /*
2399 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2400 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2401 * daily pageout budget.
2402 */
2403
2404 unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2405 unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2406
2407 /*
2408 * The current rate of pageouts is below what we would expect for
2409 * the normal rate i.e. we have below normal budget left and so...
2410 */
2411
2412 if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2413 memorystatus_freeze_degradation = TRUE;
2414 degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2415 degraded_throttle_window->pageouts = 0;
2416
2417 /*
2418 * Switch over to the degraded throttle window so the budget
2419 * doled out is based on that window.
2420 */
2421 interval = degraded_throttle_window;
2422 }
2423 }
2424
2425 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2426 }
2427 }
2428 }
2429
2430 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
2431 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts->tv_sec) / 60,
2432 interval->throttle ? "on" : "off");
2433 }
2434
2435 static void
2436 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2437 {
2438 static boolean_t memorystatus_freeze_swap_low = FALSE;
2439
2440 lck_mtx_lock(&freezer_mutex);
2441
2442 if (memorystatus_freeze_enabled) {
2443 if ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2444 (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD)) {
2445 if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2446 /* Only freeze if we've not exceeded our pageout budgets.*/
2447 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2448
2449 if (memorystatus_freeze_budget_pages_remaining) {
2450 memorystatus_freeze_top_process();
2451 } else {
2452 memorystatus_demote_frozen_processes(TRUE); /* urgent mode..force one demotion */
2453 }
2454 }
2455 }
2456 }
2457
2458 /*
2459 * Give applications currently in the aging band a chance to age out into the idle band before
2460 * running the freezer again.
2461 */
2462 memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + memorystatus_apps_idle_delay_time;
2463
2464 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2465 lck_mtx_unlock(&freezer_mutex);
2466
2467 thread_block((thread_continue_t) memorystatus_freeze_thread);
2468 }
2469
2470 boolean_t
2471 memorystatus_freeze_thread_should_run(void)
2472 {
2473 /*
2474 * No freezer_mutex held here...see why near call-site
2475 * within memorystatus_pages_update().
2476 */
2477
2478 boolean_t should_run = FALSE;
2479
2480 if (memorystatus_freeze_enabled == FALSE) {
2481 goto out;
2482 }
2483
2484 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2485 goto out;
2486 }
2487
2488 memorystatus_freezer_stats.mfs_below_threshold_count++;
2489
2490 if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
2491 /*
2492 * Consider this as a skip even if we wake up to refreeze because
2493 * we won't freeze any new procs.
2494 */
2495 memorystatus_freezer_stats.mfs_skipped_full_count++;
2496 if (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD) {
2497 goto out;
2498 }
2499 }
2500
2501 if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
2502 memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++;
2503 goto out;
2504 }
2505
2506 uint64_t curr_time = mach_absolute_time();
2507
2508 if (curr_time < memorystatus_freezer_thread_next_run_ts) {
2509 goto out;
2510 }
2511
2512 should_run = TRUE;
2513
2514 out:
2515 return should_run;
2516 }
2517
2518 int
2519 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2520 {
2521 proc_t p = PROC_NULL;
2522
2523 if (pid == 0) {
2524 return EINVAL;
2525 }
2526
2527 p = proc_find(pid);
2528 if (!p) {
2529 return ESRCH;
2530 }
2531
2532 /*
2533 * Only allow this on the current proc for now.
2534 * We can check for privileges and allow targeting another process in the future.
2535 */
2536 if (p != current_proc()) {
2537 proc_rele(p);
2538 return EPERM;
2539 }
2540
2541 proc_list_lock();
2542 *is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
2543 proc_rele_locked(p);
2544 proc_list_unlock();
2545
2546 return 0;
2547 }
2548
2549 errno_t
2550 memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
2551 {
2552 proc_t p = PROC_NULL;
2553
2554 if (pid == 0) {
2555 return EINVAL;
2556 }
2557
2558 /*
2559 * Only allow this on the current proc for now.
2560 * We can check for privileges and allow targeting another process in the future.
2561 */
2562 p = current_proc();
2563 if (p->p_pid != pid) {
2564 return EPERM;
2565 }
2566
2567 proc_list_lock();
2568 *is_frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
2569 proc_list_unlock();
2570
2571 return 0;
2572 }
2573
2574 int
2575 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
2576 {
2577 proc_t p = PROC_NULL;
2578
2579 if (pid == 0) {
2580 return EINVAL;
2581 }
2582
2583 /*
2584 * To enable freezable status, you need to be root or an entitlement.
2585 */
2586 if (is_freezable &&
2587 !kauth_cred_issuser(kauth_cred_get()) &&
2588 !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) {
2589 return EPERM;
2590 }
2591
2592 p = proc_find(pid);
2593 if (!p) {
2594 return ESRCH;
2595 }
2596
2597 /*
2598 * A process can change its own status. A coalition leader can
2599 * change the status of coalition members.
2600 */
2601 if (p != current_proc()) {
2602 coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
2603 if (!coalition_is_leader(proc_task(current_proc()), coal)) {
2604 proc_rele(p);
2605 return EPERM;
2606 }
2607 }
2608
2609 proc_list_lock();
2610 if (is_freezable == FALSE) {
2611 /* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
2612 p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
2613 printf("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
2614 p->p_pid, (*p->p_name ? p->p_name : "unknown"));
2615 } else {
2616 p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
2617 printf("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
2618 p->p_pid, (*p->p_name ? p->p_name : "unknown"));
2619 }
2620 proc_rele_locked(p);
2621 proc_list_unlock();
2622
2623 return 0;
2624 }
2625
2626 /*
2627 * Called when process is created before it is added to a memorystatus bucket.
2628 */
2629 void
2630 memorystatus_freeze_init_proc(proc_t p)
2631 {
2632 /* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
2633 if (memorystatus_freeze_budget_pages_remaining == 0) {
2634 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
2635 } else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
2636 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
2637 } else {
2638 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
2639 }
2640 }
2641
2642
2643 static int
2644 sysctl_memorystatus_do_fastwake_warmup_all SYSCTL_HANDLER_ARGS
2645 {
2646 #pragma unused(oidp, arg1, arg2)
2647
2648 if (!req->newptr) {
2649 return EINVAL;
2650 }
2651
2652 /* Need to be root or have entitlement */
2653 if (!kauth_cred_issuser(kauth_cred_get()) && !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) {
2654 return EPERM;
2655 }
2656
2657 if (memorystatus_freeze_enabled == FALSE) {
2658 return ENOTSUP;
2659 }
2660
2661 do_fastwake_warmup_all();
2662
2663 return 0;
2664 }
2665
2666 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
2667 0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
2668
2669 #endif /* CONFIG_FREEZE */