]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_memorystatus_freeze.c
xnu-7195.60.75.tar.gz
[apple/xnu.git] / bsd / kern / kern_memorystatus_freeze.c
1 /*
2 * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_group.h>
40
41 #include <libkern/libkern.h>
42 #include <mach/coalition.h>
43 #include <mach/mach_time.h>
44 #include <mach/task.h>
45 #include <mach/host_priv.h>
46 #include <mach/mach_host.h>
47 #include <os/log.h>
48 #include <pexpert/pexpert.h>
49 #include <sys/coalition.h>
50 #include <sys/kern_event.h>
51 #include <sys/proc.h>
52 #include <sys/proc_info.h>
53 #include <sys/reason.h>
54 #include <sys/signal.h>
55 #include <sys/signalvar.h>
56 #include <sys/sysctl.h>
57 #include <sys/sysproto.h>
58 #include <sys/wait.h>
59 #include <sys/tree.h>
60 #include <sys/priv.h>
61 #include <vm/vm_pageout.h>
62 #include <vm/vm_protos.h>
63 #include <mach/machine/sdt.h>
64 #include <libkern/section_keywords.h>
65 #include <stdatomic.h>
66
67 #include <IOKit/IOBSD.h>
68
69 #if CONFIG_FREEZE
70 #include <vm/vm_map.h>
71 #endif /* CONFIG_FREEZE */
72
73 #include <sys/kern_memorystatus.h>
74 #include <sys/kern_memorystatus_freeze.h>
75 #include <sys/kern_memorystatus_notify.h>
76
77 #if CONFIG_JETSAM
78
79 extern unsigned int memorystatus_available_pages;
80 extern unsigned int memorystatus_available_pages_pressure;
81 extern unsigned int memorystatus_available_pages_critical;
82 extern unsigned int memorystatus_available_pages_critical_base;
83 extern unsigned int memorystatus_available_pages_critical_idle_offset;
84
85 #else /* CONFIG_JETSAM */
86
87 extern uint64_t memorystatus_available_pages;
88 extern uint64_t memorystatus_available_pages_pressure;
89 extern uint64_t memorystatus_available_pages_critical;
90
91 #endif /* CONFIG_JETSAM */
92
93 unsigned int memorystatus_frozen_count = 0;
94 unsigned int memorystatus_suspended_count = 0;
95 unsigned long freeze_threshold_percentage = 50;
96
97 #if CONFIG_FREEZE
98
99 lck_grp_attr_t *freezer_lck_grp_attr;
100 lck_grp_t *freezer_lck_grp;
101 static lck_mtx_t freezer_mutex;
102
103 /* Thresholds */
104 unsigned int memorystatus_freeze_threshold = 0;
105 unsigned int memorystatus_freeze_pages_min = 0;
106 unsigned int memorystatus_freeze_pages_max = 0;
107 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
108 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
109 uint64_t memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */
110 boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */
111
112 unsigned int memorystatus_max_frozen_demotions_daily = 0;
113 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
114
115 boolean_t memorystatus_freeze_enabled = FALSE;
116 int memorystatus_freeze_wakeup = 0;
117 int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
118
119 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
120
121 #ifdef XNU_KERNEL_PRIVATE
122
123 unsigned int memorystatus_frozen_processes_max = 0;
124 unsigned int memorystatus_frozen_shared_mb = 0;
125 unsigned int memorystatus_frozen_shared_mb_max = 0;
126 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
127 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
128 unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */
129 uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */
130 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
131
132 /* Freezer counters collected for telemtry */
133 static struct memorystatus_freezer_stats_t {
134 /*
135 * # of processes that we've considered freezing.
136 * Used to normalize the error reasons below.
137 */
138 uint64_t mfs_process_considered_count;
139
140 /*
141 * The following counters track how many times we've failed to freeze
142 * a process because of a specific FREEZER_ERROR.
143 */
144 /* EXCESS_SHARED_MEMORY */
145 uint64_t mfs_error_excess_shared_memory_count;
146 /* LOW_PRIVATE_SHARED_RATIO */
147 uint64_t mfs_error_low_private_shared_ratio_count;
148 /* NO_COMPRESSOR_SPACE */
149 uint64_t mfs_error_no_compressor_space_count;
150 /* NO_SWAP_SPACE */
151 uint64_t mfs_error_no_swap_space_count;
152 /* pages < memorystatus_freeze_pages_min */
153 uint64_t mfs_error_below_min_pages_count;
154 /* dasd determined it was unlikely to be relaunched. */
155 uint64_t mfs_error_low_probability_of_use_count;
156 /* transient reasons (like inability to acquire a lock). */
157 uint64_t mfs_error_other_count;
158
159 /*
160 * # of times that we saw memorystatus_available_pages <= memorystatus_freeze_threshold.
161 * Used to normalize skipped_full_count and shared_mb_high_count.
162 */
163 uint64_t mfs_below_threshold_count;
164
165 /* Skipped running the freezer because we were out of slots */
166 uint64_t mfs_skipped_full_count;
167
168 /* Skipped running the freezer because we were over the shared mb limit*/
169 uint64_t mfs_skipped_shared_mb_high_count;
170
171 /*
172 * How many pages have not been sent to swap because they were in a shared object?
173 * This is being used to gather telemtry so we can understand the impact we'd have
174 * on our NAND budget if we did swap out these pages.
175 */
176 uint64_t mfs_shared_pages_skipped;
177
178 /*
179 * A running sum of the total number of bytes sent to NAND during
180 * refreeze operations since boot.
181 */
182 uint64_t mfs_bytes_refrozen;
183 /* The number of refreeze operations since boot */
184 uint64_t mfs_refreeze_count;
185 } memorystatus_freezer_stats = {0};
186
187 #endif /* XNU_KERNEL_PRIVATE */
188
189 static inline boolean_t memorystatus_can_freeze_processes(void);
190 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
191 static boolean_t memorystatus_is_process_eligible_for_freeze(proc_t p);
192 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
193 static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts);
194
195 void memorystatus_disable_freeze(void);
196
197 /* Stats */
198 static uint64_t memorystatus_freeze_pageouts = 0;
199
200 /* Throttling */
201 #define DEGRADED_WINDOW_MINS (30)
202 #define NORMAL_WINDOW_MINS (24 * 60)
203
204 /* Protected by the freezer_mutex */
205 static throttle_interval_t throttle_intervals[] = {
206 { DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
207 { NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
208 };
209 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
210 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
211
212 extern uint64_t vm_swap_get_free_space(void);
213 extern boolean_t vm_swap_max_budget(uint64_t *);
214 extern int i_coal_jetsam_get_taskrole(coalition_t coal, task_t task);
215
216 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
217 static void memorystatus_demote_frozen_processes(boolean_t force_one);
218
219 static void memorystatus_freeze_handle_error(proc_t p, const int freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix);
220 static void memorystatus_freeze_out_of_slots(void);
221 static uint64_t memorystatus_freezer_thread_next_run_ts = 0;
222
223 /* Sysctls needed for aggd stats */
224
225 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
226 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
227 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, "");
228 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
229 #if DEVELOPMENT || DEBUG
230 static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS
231 {
232 #pragma unused(arg1, arg2, oidp)
233 int error, changed;
234 uint64_t new_budget = memorystatus_freeze_budget_pages_remaining;
235 mach_timespec_t now_ts;
236 clock_sec_t sec;
237 clock_nsec_t nsec;
238
239 lck_mtx_lock(&freezer_mutex);
240
241 error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed);
242 if (changed) {
243 /* Start a new interval with this budget. */
244 clock_get_system_nanotime(&sec, &nsec);
245 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
246 now_ts.tv_nsec = nsec;
247 memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts);
248 /* Don't carry over any excess pageouts since we're forcing a new budget */
249 normal_throttle_window->pageouts = 0;
250 memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts;
251 }
252
253 lck_mtx_unlock(&freezer_mutex);
254 return error;
255 }
256
257 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", "");
258 #else /* DEVELOPMENT || DEBUG */
259 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
260 #endif /* DEVELOPMENT || DEBUG */
261 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, "");
262 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, "");
263 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, "");
264 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, "");
265 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, "");
266 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, "");
267 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, "");
268 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, "");
269 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, "");
270 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, "");
271 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, "");
272 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, "");
273 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, "");
274 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, "");
275
276 static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX);
277
278
279 /*
280 * Calculates the hit rate for the freezer.
281 * The hit rate is defined as the percentage of procs that are currently in the
282 * freezer which we have thawed.
283 * A low hit rate means we're freezing bad candidates since they're not re-used.
284 */
285 static int sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS
286 {
287 #pragma unused(arg1, arg2)
288 size_t thaw_count = 0, frozen_count = 0;
289 int thaw_percentage = 100;
290 unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
291 proc_t p = PROC_NULL;
292 proc_list_lock();
293
294 p = memorystatus_get_first_proc_locked(&band, FALSE);
295
296 while (p) {
297 if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
298 if (p->p_memstat_thaw_count > 0) {
299 thaw_count++;
300 }
301 frozen_count++;
302 }
303 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
304 }
305 proc_list_unlock();
306 if (frozen_count > 0) {
307 assert(thaw_count <= frozen_count);
308 thaw_percentage = (int)(100 * thaw_count / frozen_count);
309 }
310 return sysctl_handle_int(oidp, &thaw_percentage, 0, req);
311 }
312 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", "");
313
314 #define FREEZER_ERROR_STRING_LENGTH 128
315
316 #if DEVELOPMENT || DEBUG
317
318 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_jetsam_band, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_jetsam_band, 0, "");
319 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
320 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
321 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
322 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, "");
323 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, "");
324 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
325 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_processes_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_processes_max, 0, "");
326
327 /*
328 * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
329 * "0" means no limit.
330 * Default is 10% of system-wide task limit.
331 */
332
333 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
334 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
335
336 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
337 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_private_shared_pages_ratio, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_private_shared_pages_ratio, 0, "");
338
339 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, "");
340
341 /*
342 * max. # of frozen process demotions we will allow in our daily cycle.
343 */
344 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_max_freeze_demotions_daily, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_max_frozen_demotions_daily, 0, "");
345 /*
346 * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
347 */
348 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count_demotion_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_thaw_count_demotion_threshold, 0, "");
349
350 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
351 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
352
353 /*
354 * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
355 * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
356 */
357 boolean_t memorystatus_freeze_to_memory = FALSE;
358 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
359
360 #define VM_PAGES_FOR_ALL_PROCS (2)
361
362 /*
363 * Manual trigger of freeze and thaw for dev / debug kernels only.
364 */
365 static int
366 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
367 {
368 #pragma unused(arg1, arg2)
369 int error, pid = 0;
370 proc_t p;
371 int freezer_error_code = 0;
372 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
373 int ntasks = 0;
374 coalition_t coal = COALITION_NULL;
375
376 if (memorystatus_freeze_enabled == FALSE) {
377 printf("sysctl_freeze: Freeze is DISABLED\n");
378 return ENOTSUP;
379 }
380
381 error = sysctl_handle_int(oidp, &pid, 0, req);
382 if (error || !req->newptr) {
383 return error;
384 }
385
386 if (pid == VM_PAGES_FOR_ALL_PROCS) {
387 vm_pageout_anonymous_pages();
388
389 return 0;
390 }
391
392 lck_mtx_lock(&freezer_mutex);
393
394 again:
395 p = proc_find(pid);
396 if (p != NULL) {
397 memorystatus_freezer_stats.mfs_process_considered_count++;
398 uint32_t purgeable, wired, clean, dirty, shared;
399 uint32_t max_pages = 0, state = 0;
400
401 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
402 /*
403 * Freezer backed by the compressor and swap file(s)
404 * will hold compressed data.
405 *
406 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
407 * being swapped out to disk. Note that this disables freezer swap support globally,
408 * not just for the process being frozen.
409 *
410 *
411 * We don't care about the global freezer budget or the process's (min/max) budget here.
412 * The freeze sysctl is meant to force-freeze a process.
413 *
414 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
415 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
416 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
417 */
418 max_pages = memorystatus_freeze_pages_max;
419 } else {
420 /*
421 * We only have the compressor without any swap.
422 */
423 max_pages = UINT32_MAX - 1;
424 }
425
426 proc_list_lock();
427 state = p->p_memstat_state;
428 proc_list_unlock();
429
430 /*
431 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
432 * We simply ensure that jetsam is not already working on the process and that the process has not
433 * explicitly disabled freezing.
434 */
435 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
436 printf("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
437 (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
438 (state & P_MEMSTAT_LOCKED) ? " locked" : "",
439 (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
440
441 proc_rele(p);
442 lck_mtx_unlock(&freezer_mutex);
443 return EPERM;
444 }
445
446 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
447 if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
448 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
449 }
450
451 if (error) {
452 memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze");
453 if (error == KERN_NO_SPACE) {
454 /* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
455 error = ENOSPC;
456 } else {
457 error = EIO;
458 }
459 } else {
460 proc_list_lock();
461 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
462 p->p_memstat_state |= P_MEMSTAT_FROZEN;
463 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
464 memorystatus_frozen_count++;
465 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
466 memorystatus_freeze_out_of_slots();
467 }
468 } else {
469 // This was a re-freeze
470 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
471 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
472 memorystatus_freezer_stats.mfs_refreeze_count++;
473 }
474 }
475 p->p_memstat_frozen_count++;
476
477
478 proc_list_unlock();
479
480 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
481 /*
482 * We elevate only if we are going to swap out the data.
483 */
484 error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
485 memorystatus_freeze_jetsam_band, TRUE);
486
487 if (error) {
488 printf("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
489 }
490 }
491 }
492
493 if ((error == 0) && (coal == NULL)) {
494 /*
495 * We froze a process and so we check to see if it was
496 * a coalition leader and if it has XPC services that
497 * might need freezing.
498 * Only one leader can be frozen at a time and so we shouldn't
499 * enter this block more than once per call. Hence the
500 * check that 'coal' has to be NULL. We should make this an
501 * assert() or panic() once we have a much more concrete way
502 * to detect an app vs a daemon.
503 */
504
505 task_t curr_task = NULL;
506
507 curr_task = proc_task(p);
508 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
509 if (coalition_is_leader(curr_task, coal)) {
510 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
511 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
512
513 if (ntasks > MAX_XPC_SERVICE_PIDS) {
514 ntasks = MAX_XPC_SERVICE_PIDS;
515 }
516 }
517 }
518
519 proc_rele(p);
520
521 while (ntasks) {
522 pid = pid_list[--ntasks];
523 goto again;
524 }
525
526 lck_mtx_unlock(&freezer_mutex);
527 return error;
528 } else {
529 printf("sysctl_freeze: Invalid process\n");
530 }
531
532
533 lck_mtx_unlock(&freezer_mutex);
534 return EINVAL;
535 }
536
537 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
538 0, 0, &sysctl_memorystatus_freeze, "I", "");
539
540 /*
541 * Manual trigger of agressive frozen demotion for dev / debug kernels only.
542 */
543 static int
544 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
545 {
546 #pragma unused(arg1, arg2, oidp, req)
547 int error, val;
548 /*
549 * Only demote on write to prevent demoting during `sysctl -a`.
550 * The actual value written doesn't matter.
551 */
552 error = sysctl_handle_int(oidp, &val, 0, req);
553 if (error || !req->newptr) {
554 return error;
555 }
556 memorystatus_demote_frozen_processes(false);
557 return 0;
558 }
559
560 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
561
562 static int
563 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
564 {
565 #pragma unused(arg1, arg2)
566
567 int error, pid = 0;
568 proc_t p;
569
570 if (memorystatus_freeze_enabled == FALSE) {
571 return ENOTSUP;
572 }
573
574 error = sysctl_handle_int(oidp, &pid, 0, req);
575 if (error || !req->newptr) {
576 return error;
577 }
578
579 if (pid == VM_PAGES_FOR_ALL_PROCS) {
580 do_fastwake_warmup_all();
581 return 0;
582 } else {
583 p = proc_find(pid);
584 if (p != NULL) {
585 error = task_thaw(p->task);
586
587 if (error) {
588 error = EIO;
589 } else {
590 /*
591 * task_thaw() succeeded.
592 *
593 * We increment memorystatus_frozen_count on the sysctl freeze path.
594 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
595 * when this process exits.
596 *
597 * proc_list_lock();
598 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
599 * proc_list_unlock();
600 */
601 }
602 proc_rele(p);
603 return error;
604 }
605 }
606
607 return EINVAL;
608 }
609
610 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
611 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
612
613
614 typedef struct _global_freezable_status {
615 boolean_t freeze_pages_threshold_crossed;
616 boolean_t freeze_eligible_procs_available;
617 boolean_t freeze_scheduled_in_future;
618 }global_freezable_status_t;
619
620 typedef struct _proc_freezable_status {
621 boolean_t freeze_has_memstat_state;
622 boolean_t freeze_has_pages_min;
623 int freeze_has_probability;
624 int freeze_leader_eligible;
625 boolean_t freeze_attempted;
626 uint32_t p_memstat_state;
627 uint32_t p_pages;
628 int p_freeze_error_code;
629 int p_pid;
630 int p_leader_pid;
631 char p_name[MAXCOMLEN + 1];
632 }proc_freezable_status_t;
633
634 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
635
636 /*
637 * For coalition based freezing evaluations, we proceed as follows:
638 * - detect that the process is a coalition member and a XPC service
639 * - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
640 * - continue its freezability evaluation assuming its leader will be freezable too
641 *
642 * Once we are done evaluating all processes, we do a quick run thru all
643 * processes and for a coalition member XPC service we look up the 'freezable'
644 * status of its leader and iff:
645 * - the xpc service is freezable i.e. its individual freeze evaluation worked
646 * - and, its leader is also marked freezable
647 * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
648 */
649
650 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN (-1)
651 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS (1)
652 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE (2)
653
654 static int
655 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
656 {
657 uint32_t proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
658 global_freezable_status_t *list_head;
659 proc_freezable_status_t *list_entry, *list_entry_start;
660 size_t list_size = 0, entry_count = 0;
661 proc_t p, leader_proc;
662 memstat_bucket_t *bucket;
663 uint32_t state = 0, pages = 0;
664 boolean_t try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
665 int error = 0, probability_of_use = 0;
666 pid_t leader_pid = 0;
667
668
669 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
670 return ENOTSUP;
671 }
672
673 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
674
675 if (buffer_size < list_size) {
676 return EINVAL;
677 }
678
679 list_head = kheap_alloc(KHEAP_TEMP, list_size, Z_WAITOK | Z_ZERO);
680 if (list_head == NULL) {
681 return ENOMEM;
682 }
683
684 list_size = sizeof(global_freezable_status_t);
685
686 proc_list_lock();
687
688 uint64_t curr_time = mach_absolute_time();
689
690 list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
691 list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
692 list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
693
694 list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
695 list_entry = list_entry_start;
696
697 bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
698
699 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
700
701 p = memorystatus_get_first_proc_locked(&band, FALSE);
702 proc_count++;
703
704 while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
705 (p) &&
706 (list_size < buffer_size)) {
707 if (isSysProc(p)) {
708 /*
709 * Daemon:- We will consider freezing it iff:
710 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
711 * - its role in the coalition is XPC service.
712 *
713 * We skip memory size requirements in this case.
714 */
715
716 coalition_t coal = COALITION_NULL;
717 task_t leader_task = NULL, curr_task = NULL;
718 int task_role_in_coalition = 0;
719
720 curr_task = proc_task(p);
721 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
722
723 if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
724 /*
725 * By default, XPC services without an app
726 * will be the leader of their own single-member
727 * coalition.
728 */
729 goto skip_ineligible_xpc;
730 }
731
732 leader_task = coalition_get_leader(coal);
733 if (leader_task == TASK_NULL) {
734 /*
735 * This jetsam coalition is currently leader-less.
736 * This could happen if the app died, but XPC services
737 * have not yet exited.
738 */
739 goto skip_ineligible_xpc;
740 }
741
742 leader_proc = (proc_t)get_bsdtask_info(leader_task);
743 task_deallocate(leader_task);
744
745 if (leader_proc == PROC_NULL) {
746 /* leader task is exiting */
747 goto skip_ineligible_xpc;
748 }
749
750 task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task);
751
752 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
753 xpc_skip_size_probability_check = TRUE;
754 leader_pid = leader_proc->p_pid;
755 goto continue_eval;
756 }
757
758 skip_ineligible_xpc:
759 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
760 proc_count++;
761 continue;
762 }
763
764 continue_eval:
765 strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
766
767 list_entry->p_pid = p->p_pid;
768
769 state = p->p_memstat_state;
770
771 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
772 !(state & P_MEMSTAT_SUSPENDED)) {
773 try_freeze = list_entry->freeze_has_memstat_state = FALSE;
774 } else {
775 try_freeze = list_entry->freeze_has_memstat_state = TRUE;
776 }
777
778 list_entry->p_memstat_state = state;
779
780 if (xpc_skip_size_probability_check == TRUE) {
781 /*
782 * Assuming the coalition leader is freezable
783 * we don't care re. minimum pages and probability
784 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
785 * XPC services have to be explicity opted-out of the disabled
786 * state. And we checked that state above.
787 */
788 list_entry->freeze_has_pages_min = TRUE;
789 list_entry->p_pages = -1;
790 list_entry->freeze_has_probability = -1;
791
792 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
793 list_entry->p_leader_pid = leader_pid;
794
795 xpc_skip_size_probability_check = FALSE;
796 } else {
797 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
798 list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
799
800 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
801 if (pages < memorystatus_freeze_pages_min) {
802 try_freeze = list_entry->freeze_has_pages_min = FALSE;
803 } else {
804 list_entry->freeze_has_pages_min = TRUE;
805 }
806
807 list_entry->p_pages = pages;
808
809 if (entry_count) {
810 uint32_t j = 0;
811 for (j = 0; j < entry_count; j++) {
812 if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
813 p->p_name,
814 MAXCOMLEN + 1) == 0) {
815 probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
816 break;
817 }
818 }
819
820 list_entry->freeze_has_probability = probability_of_use;
821
822 try_freeze = ((probability_of_use > 0) && try_freeze);
823 } else {
824 list_entry->freeze_has_probability = -1;
825 }
826 }
827
828 if (try_freeze) {
829 uint32_t purgeable, wired, clean, dirty, shared;
830 uint32_t max_pages = 0;
831 int freezer_error_code = 0;
832
833 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
834
835 if (error) {
836 list_entry->p_freeze_error_code = freezer_error_code;
837 }
838
839 list_entry->freeze_attempted = TRUE;
840 }
841
842 list_entry++;
843 freeze_eligible_proc_considered++;
844
845 list_size += sizeof(proc_freezable_status_t);
846
847 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
848 proc_count++;
849 }
850
851 proc_list_unlock();
852
853 list_entry = list_entry_start;
854
855 for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
856 if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
857 leader_pid = list_entry[xpc_index].p_leader_pid;
858
859 leader_proc = proc_find(leader_pid);
860
861 if (leader_proc) {
862 if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) {
863 /*
864 * Leader has already been frozen.
865 */
866 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
867 proc_rele(leader_proc);
868 continue;
869 }
870 proc_rele(leader_proc);
871 }
872
873 for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
874 if (list_entry[leader_index].p_pid == leader_pid) {
875 if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
876 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
877 } else {
878 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
879 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
880 }
881 break;
882 }
883 }
884
885 /*
886 * Didn't find the leader entry. This might be likely because
887 * the leader never made it down to band 0.
888 */
889 if (leader_index == freeze_eligible_proc_considered) {
890 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
891 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
892 }
893 }
894 }
895
896 buffer_size = MIN(list_size, INT32_MAX);
897
898 error = copyout(list_head, buffer, buffer_size);
899 if (error == 0) {
900 *retval = (int32_t) buffer_size;
901 } else {
902 *retval = 0;
903 }
904
905 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
906 kheap_free(KHEAP_TEMP, list_head, list_size);
907
908 MEMORYSTATUS_DEBUG(1, "memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)*list_size);
909
910 return error;
911 }
912
913 #endif /* DEVELOPMENT || DEBUG */
914
915 /*
916 * Get a list of all processes in the freezer band which are currently frozen.
917 * Used by powerlog to collect analytics on frozen process.
918 */
919 static int
920 memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval)
921 {
922 global_frozen_procs_t *frozen_procs = NULL;
923 uint32_t band = memorystatus_freeze_jetsam_band;
924 proc_t p;
925 uint32_t state;
926 int error;
927 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
928 return ENOTSUP;
929 }
930 if (buffer_size < sizeof(global_frozen_procs_t)) {
931 return EINVAL;
932 }
933 frozen_procs = kheap_alloc(KHEAP_TEMP, sizeof(global_frozen_procs_t),
934 Z_WAITOK | Z_ZERO);
935 if (frozen_procs == NULL) {
936 return ENOMEM;
937 }
938
939 proc_list_lock();
940 p = memorystatus_get_first_proc_locked(&band, FALSE);
941 while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) {
942 state = p->p_memstat_state;
943 if (state & P_MEMSTAT_FROZEN) {
944 frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = p->p_pid;
945 strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name,
946 p->p_name, sizeof(proc_name_t));
947 frozen_procs->gfp_num_frozen++;
948 }
949 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
950 }
951 proc_list_unlock();
952
953 buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t));
954 error = copyout(frozen_procs, buffer, buffer_size);
955 if (error == 0) {
956 *retval = (int32_t) buffer_size;
957 } else {
958 *retval = 0;
959 }
960 kheap_free(KHEAP_TEMP, frozen_procs, sizeof(global_frozen_procs_t));
961
962 return error;
963 }
964
965 int
966 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
967 {
968 int err = ENOTSUP;
969
970 #if DEVELOPMENT || DEBUG
971 if (flags == FREEZER_CONTROL_GET_STATUS) {
972 err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
973 }
974 #endif /* DEVELOPMENT || DEBUG */
975 if (flags == FREEZER_CONTROL_GET_PROCS) {
976 err = memorystatus_freezer_get_procs(buffer, buffer_size, retval);
977 }
978
979 return err;
980 }
981
982 extern void vm_swap_consider_defragmenting(int);
983 extern boolean_t memorystatus_kill_elevated_process(uint32_t, os_reason_t, unsigned int, int, uint32_t *, uint64_t *);
984
985 /*
986 * This routine will _jetsam_ all frozen processes
987 * and reclaim the swap space immediately.
988 *
989 * So freeze has to be DISABLED when we call this routine.
990 */
991
992 void
993 memorystatus_disable_freeze(void)
994 {
995 memstat_bucket_t *bucket;
996 int bucket_count = 0, retries = 0;
997 boolean_t retval = FALSE, killed = FALSE;
998 uint32_t errors = 0, errors_over_prev_iteration = 0;
999 os_reason_t jetsam_reason = 0;
1000 unsigned int band = 0;
1001 proc_t p = PROC_NULL, next_p = PROC_NULL;
1002 uint64_t memory_reclaimed = 0, footprint = 0;
1003
1004 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
1005 memorystatus_available_pages, 0, 0, 0, 0);
1006
1007 assert(memorystatus_freeze_enabled == FALSE);
1008
1009 jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
1010 if (jetsam_reason == OS_REASON_NULL) {
1011 printf("memorystatus_disable_freeze: failed to allocate jetsam reason\n");
1012 }
1013
1014 /*
1015 * Let's relocate all frozen processes into band 8. Demoted frozen processes
1016 * are sitting in band 0 currently and it's possible to have a frozen process
1017 * in the FG band being actively used. We don't reset its frozen state when
1018 * it is resumed because it has state on disk.
1019 *
1020 * We choose to do this relocation rather than implement a new 'kill frozen'
1021 * process function for these reasons:
1022 * - duplication of code: too many kill functions exist and we need to rework them better.
1023 * - disk-space-shortage kills are rare
1024 * - not having the 'real' jetsam band at time of the this frozen kill won't preclude us
1025 * from answering any imp. questions re. jetsam policy/effectiveness.
1026 *
1027 * This is essentially what memorystatus_update_inactive_jetsam_priority_band() does while
1028 * avoiding the application of memory limits.
1029 */
1030
1031 again:
1032 proc_list_lock();
1033
1034 band = JETSAM_PRIORITY_IDLE;
1035 p = PROC_NULL;
1036 next_p = PROC_NULL;
1037
1038 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
1039 while (next_p) {
1040 p = next_p;
1041 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
1042
1043 if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
1044 break;
1045 }
1046
1047 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
1048 continue;
1049 }
1050
1051 if (p->p_memstat_state & P_MEMSTAT_ERROR) {
1052 p->p_memstat_state &= ~P_MEMSTAT_ERROR;
1053 }
1054
1055 if (p->p_memstat_effectivepriority == memorystatus_freeze_jetsam_band) {
1056 continue;
1057 }
1058
1059 /*
1060 * We explicitly add this flag here so the process looks like a normal
1061 * frozen process i.e. P_MEMSTAT_FROZEN and P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND.
1062 * We don't bother with assigning the 'active' memory
1063 * limits at this point because we are going to be killing it soon below.
1064 */
1065 p->p_memstat_state |= P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1066 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1067
1068 memorystatus_update_priority_locked(p, memorystatus_freeze_jetsam_band, FALSE, TRUE);
1069 }
1070
1071 bucket = &memstat_bucket[memorystatus_freeze_jetsam_band];
1072 bucket_count = bucket->count;
1073 proc_list_unlock();
1074
1075 /*
1076 * Bucket count is already stale at this point. But, we don't expect
1077 * freezing to continue since we have already disabled the freeze functionality.
1078 * However, an existing freeze might be in progress. So we might miss that process
1079 * in the first go-around. We hope to catch it in the next.
1080 */
1081
1082 errors_over_prev_iteration = 0;
1083 while (bucket_count) {
1084 bucket_count--;
1085
1086 /*
1087 * memorystatus_kill_elevated_process() drops a reference,
1088 * so take another one so we can continue to use this exit reason
1089 * even after it returns.
1090 */
1091
1092 os_reason_ref(jetsam_reason);
1093 retval = memorystatus_kill_elevated_process(
1094 kMemorystatusKilledDiskSpaceShortage,
1095 jetsam_reason,
1096 memorystatus_freeze_jetsam_band,
1097 0, /* the iteration of aggressive jetsam..ignored here */
1098 &errors,
1099 &footprint);
1100
1101 if (errors > 0) {
1102 printf("memorystatus_disable_freeze: memorystatus_kill_elevated_process returned %d error(s)\n", errors);
1103 errors_over_prev_iteration += errors;
1104 errors = 0;
1105 }
1106
1107 if (retval == 0) {
1108 /*
1109 * No frozen processes left to kill.
1110 */
1111 break;
1112 }
1113
1114 killed = TRUE;
1115 memory_reclaimed += footprint;
1116 }
1117
1118 proc_list_lock();
1119
1120 if (memorystatus_frozen_count) {
1121 /*
1122 * A frozen process snuck in and so
1123 * go back around to kill it. That
1124 * process may have been resumed and
1125 * put into the FG band too. So we
1126 * have to do the relocation again.
1127 */
1128 assert(memorystatus_freeze_enabled == FALSE);
1129
1130 retries++;
1131 if (retries < 3) {
1132 proc_list_unlock();
1133 goto again;
1134 }
1135 #if DEVELOPMENT || DEBUG
1136 panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d, errors = %d",
1137 memorystatus_frozen_count, errors_over_prev_iteration);
1138 #endif /* DEVELOPMENT || DEBUG */
1139 }
1140 proc_list_unlock();
1141
1142 os_reason_free(jetsam_reason);
1143
1144 if (killed) {
1145 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
1146
1147 proc_list_lock();
1148 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1149 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1150 uint64_t timestamp_now = mach_absolute_time();
1151 memorystatus_jetsam_snapshot->notification_time = timestamp_now;
1152 memorystatus_jetsam_snapshot->js_gencount++;
1153 if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
1154 timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
1155 proc_list_unlock();
1156 int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1157 if (!ret) {
1158 proc_list_lock();
1159 memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
1160 proc_list_unlock();
1161 }
1162 } else {
1163 proc_list_unlock();
1164 }
1165 }
1166
1167 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
1168 memorystatus_available_pages, memory_reclaimed, 0, 0, 0);
1169
1170 return;
1171 }
1172
1173 __private_extern__ void
1174 memorystatus_freeze_init(void)
1175 {
1176 kern_return_t result;
1177 thread_t thread;
1178
1179 freezer_lck_grp_attr = lck_grp_attr_alloc_init();
1180 freezer_lck_grp = lck_grp_alloc_init("freezer", freezer_lck_grp_attr);
1181
1182 lck_mtx_init(&freezer_mutex, freezer_lck_grp, NULL);
1183
1184 /*
1185 * This is just the default value if the underlying
1186 * storage device doesn't have any specific budget.
1187 * We check with the storage layer in memorystatus_freeze_update_throttle()
1188 * before we start our freezing the first time.
1189 */
1190 memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
1191
1192 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
1193 if (result == KERN_SUCCESS) {
1194 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
1195 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
1196 thread_set_thread_name(thread, "VM_freezer");
1197
1198 thread_deallocate(thread);
1199 } else {
1200 panic("Could not create memorystatus_freeze_thread");
1201 }
1202 }
1203
1204 static boolean_t
1205 memorystatus_is_process_eligible_for_freeze(proc_t p)
1206 {
1207 /*
1208 * Called with proc_list_lock held.
1209 */
1210
1211 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1212
1213 boolean_t should_freeze = FALSE;
1214 uint32_t state = 0, pages = 0;
1215 int probability_of_use = 0;
1216 size_t entry_count = 0, i = 0;
1217 bool first_consideration = true;
1218
1219 state = p->p_memstat_state;
1220
1221 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) {
1222 if (state & P_MEMSTAT_FREEZE_DISABLED) {
1223 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonDisabled;
1224 }
1225 goto out;
1226 }
1227
1228 if (isSysProc(p)) {
1229 /*
1230 * Daemon:- We consider freezing it if:
1231 * - it belongs to a coalition and the leader is frozen, and,
1232 * - its role in the coalition is XPC service.
1233 *
1234 * We skip memory size requirements in this case.
1235 */
1236
1237 coalition_t coal = COALITION_NULL;
1238 task_t leader_task = NULL, curr_task = NULL;
1239 proc_t leader_proc = NULL;
1240 int task_role_in_coalition = 0;
1241
1242 curr_task = proc_task(p);
1243 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1244
1245 if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1246 /*
1247 * By default, XPC services without an app
1248 * will be the leader of their own single-member
1249 * coalition.
1250 */
1251 goto out;
1252 }
1253
1254 leader_task = coalition_get_leader(coal);
1255 if (leader_task == TASK_NULL) {
1256 /*
1257 * This jetsam coalition is currently leader-less.
1258 * This could happen if the app died, but XPC services
1259 * have not yet exited.
1260 */
1261 goto out;
1262 }
1263
1264 leader_proc = (proc_t)get_bsdtask_info(leader_task);
1265 task_deallocate(leader_task);
1266
1267 if (leader_proc == PROC_NULL) {
1268 /* leader task is exiting */
1269 goto out;
1270 }
1271
1272 if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) {
1273 goto out;
1274 }
1275
1276 task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task);
1277
1278 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
1279 should_freeze = TRUE;
1280 }
1281
1282 goto out;
1283 } else {
1284 /*
1285 * Application. In addition to the above states we need to make
1286 * sure we only consider suspended applications for freezing.
1287 */
1288 if (!(state & P_MEMSTAT_SUSPENDED)) {
1289 goto out;
1290 }
1291 }
1292
1293 /*
1294 * This proc is a suspended application.
1295 * We're interested in tracking what percentage of these
1296 * actually get frozen.
1297 * To avoid skewing the metrics towards processes which
1298 * are considered more frequently, we only track failures once
1299 * per process.
1300 */
1301 first_consideration = !(state & P_MEMSTAT_FREEZE_CONSIDERED);
1302
1303 if (first_consideration) {
1304 memorystatus_freezer_stats.mfs_process_considered_count++;
1305 p->p_memstat_state |= P_MEMSTAT_FREEZE_CONSIDERED;
1306 }
1307
1308 /* Only freeze applications meeting our minimum resident page criteria */
1309 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
1310 if (pages < memorystatus_freeze_pages_min) {
1311 if (first_consideration) {
1312 memorystatus_freezer_stats.mfs_error_below_min_pages_count++;
1313 }
1314 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonBelowMinPages;
1315 goto out;
1316 }
1317
1318 /* Don't freeze processes that are already exiting on core. It may have started exiting
1319 * after we chose it for freeze, but before we obtained the proc_list_lock.
1320 * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync.
1321 * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
1322 */
1323 if ((p->p_listflag & P_LIST_EXITED) != 0) {
1324 if (first_consideration) {
1325 memorystatus_freezer_stats.mfs_error_other_count++;
1326 }
1327 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOther;
1328 goto out;
1329 }
1330
1331 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1332
1333 if (entry_count) {
1334 for (i = 0; i < entry_count; i++) {
1335 if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1336 p->p_name,
1337 MAXCOMLEN + 1) == 0) {
1338 probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1339 break;
1340 }
1341 }
1342
1343 if (probability_of_use == 0) {
1344 if (first_consideration) {
1345 memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++;
1346 }
1347 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonLowProbOfUse;
1348 goto out;
1349 }
1350 }
1351
1352 should_freeze = TRUE;
1353 out:
1354 if (should_freeze && !(state & P_MEMSTAT_FROZEN)) {
1355 /*
1356 * Reset the skip reason. If it's killed before we manage to actually freeze it
1357 * we failed to consider it early enough.
1358 */
1359 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1360 if (!first_consideration) {
1361 /*
1362 * We're freezing this for the first time and we previously considered it ineligible.
1363 * Bump the considered count so that we track this as 1 failure
1364 * and 1 success.
1365 */
1366 memorystatus_freezer_stats.mfs_process_considered_count++;
1367 }
1368 }
1369 return should_freeze;
1370 }
1371
1372 /*
1373 * Synchronously freeze the passed proc. Called with a reference to the proc held.
1374 *
1375 * Doesn't deal with:
1376 * - re-freezing because this is called on a specific process and
1377 * not by the freezer thread. If that changes, we'll have to teach it about
1378 * refreezing a frozen process.
1379 *
1380 * - grouped/coalition freezing because we are hoping to deprecate this
1381 * interface as it was used by user-space to freeze particular processes. But
1382 * we have moved away from that approach to having the kernel choose the optimal
1383 * candidates to be frozen.
1384 *
1385 * Returns EINVAL or the value returned by task_freeze().
1386 */
1387 int
1388 memorystatus_freeze_process_sync(proc_t p)
1389 {
1390 int ret = EINVAL;
1391 pid_t aPid = 0;
1392 boolean_t memorystatus_freeze_swap_low = FALSE;
1393 int freezer_error_code = 0;
1394
1395 lck_mtx_lock(&freezer_mutex);
1396
1397 if (p == NULL) {
1398 printf("memorystatus_freeze_process_sync: Invalid process\n");
1399 goto exit;
1400 }
1401
1402 if (memorystatus_freeze_enabled == FALSE) {
1403 printf("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
1404 goto exit;
1405 }
1406
1407 if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
1408 printf("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
1409 goto exit;
1410 }
1411
1412 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1413 if (!memorystatus_freeze_budget_pages_remaining) {
1414 printf("memorystatus_freeze_process_sync: exit with NO available budget\n");
1415 goto exit;
1416 }
1417
1418 proc_list_lock();
1419
1420 if (p != NULL) {
1421 uint32_t purgeable, wired, clean, dirty, shared;
1422 uint32_t i;
1423 uint64_t max_pages;
1424
1425 aPid = p->p_pid;
1426
1427 /* Ensure the process is eligible for freezing */
1428 if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) {
1429 proc_list_unlock();
1430 goto exit;
1431 }
1432
1433 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1434 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1435 } else {
1436 /*
1437 * We only have the compressor without any swap.
1438 */
1439 max_pages = UINT32_MAX - 1;
1440 }
1441
1442 /* Mark as locked temporarily to avoid kill */
1443 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1444 proc_list_unlock();
1445
1446 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1447 memorystatus_available_pages, 0, 0, 0, 0);
1448
1449 max_pages = MIN(max_pages, UINT32_MAX);
1450 ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1451 if (ret == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1452 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1453 }
1454
1455 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1456 memorystatus_available_pages, aPid, 0, 0, 0);
1457
1458 DTRACE_MEMORYSTATUS6(memorystatus_freeze, proc_t, p, unsigned int, memorystatus_available_pages, boolean_t, purgeable, unsigned int, wired, uint32_t, clean, uint32_t, dirty);
1459
1460 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_process_sync: task_freeze %s for pid %d [%s] - "
1461 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n",
1462 (ret == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1463 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1464
1465 proc_list_lock();
1466
1467 if (ret == KERN_SUCCESS) {
1468 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1469
1470 p->p_memstat_freeze_sharedanon_pages += shared;
1471
1472 memorystatus_frozen_shared_mb += shared;
1473
1474 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
1475 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1476 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1477 memorystatus_frozen_count++;
1478 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1479 memorystatus_freeze_out_of_slots();
1480 }
1481 } else {
1482 // This was a re-freeze
1483 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1484 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1485 memorystatus_freezer_stats.mfs_refreeze_count++;
1486 }
1487 }
1488
1489 p->p_memstat_frozen_count++;
1490
1491 /*
1492 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1493 * to its higher jetsam band.
1494 */
1495 proc_list_unlock();
1496
1497 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1498
1499 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1500 ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
1501 memorystatus_freeze_jetsam_band, TRUE);
1502
1503 if (ret) {
1504 printf("Elevating the frozen process failed with %d\n", ret);
1505 /* not fatal */
1506 ret = 0;
1507 }
1508
1509
1510 /* Update stats */
1511 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1512 throttle_intervals[i].pageouts += dirty;
1513 }
1514 }
1515 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1516 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s] done memorystatus_freeze_budget_pages_remaining %llu froze %u pages",
1517 aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, dirty);
1518
1519 proc_list_lock();
1520
1521 memorystatus_freeze_pageouts += dirty;
1522
1523 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1524 /*
1525 * Add some eviction logic here? At some point should we
1526 * jetsam a process to get back its swap space so that we
1527 * can freeze a more eligible process at this moment in time?
1528 */
1529 }
1530 } else {
1531 memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, NULL, "memorystatus_freeze_process_sync");
1532 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1533 }
1534
1535 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1536 wakeup(&p->p_memstat_state);
1537 proc_list_unlock();
1538 }
1539
1540 exit:
1541 lck_mtx_unlock(&freezer_mutex);
1542
1543 return ret;
1544 }
1545
1546 /*
1547 * Caller must hold the freezer_mutex and it will be locked on return.
1548 */
1549 static int
1550 memorystatus_freeze_top_process(void)
1551 {
1552 pid_t aPid = 0, coal_xpc_pid = 0;
1553 int ret = -1;
1554 proc_t p = PROC_NULL, next_p = PROC_NULL;
1555 unsigned int i = 0;
1556 unsigned int band = JETSAM_PRIORITY_IDLE;
1557 bool refreeze_processes = false;
1558 task_t curr_task = NULL;
1559 coalition_t coal = COALITION_NULL;
1560 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
1561 unsigned int ntasks = 0;
1562 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
1563
1564 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0);
1565
1566 proc_list_lock();
1567
1568 if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
1569 /*
1570 * Freezer is already full but we are here and so let's
1571 * try to refreeze any processes we might have thawed
1572 * in the past and push out their compressed state out.
1573 */
1574 refreeze_processes = true;
1575 band = (unsigned int) memorystatus_freeze_jetsam_band;
1576 }
1577
1578 freeze_process:
1579
1580 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
1581 while (next_p) {
1582 kern_return_t kr;
1583 uint32_t purgeable, wired, clean, dirty, shared;
1584 uint64_t max_pages = 0;
1585 int freezer_error_code = 0;
1586 bool was_refreeze = false;
1587
1588 p = next_p;
1589
1590 if (coal == NULL) {
1591 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1592 } else {
1593 /*
1594 * We have frozen a coalition leader and now are
1595 * dealing with its XPC services. We get our
1596 * next_p for each XPC service from the pid_list
1597 * acquired after a successful task_freeze call
1598 * on the coalition leader.
1599 */
1600
1601 if (ntasks > 0) {
1602 coal_xpc_pid = pid_list[--ntasks];
1603 next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */);
1604 /*
1605 * We grab a reference when we are about to freeze the process. So, drop
1606 * the reference that proc_findinternal() grabbed for us.
1607 * We also have the proc_list_lock and so this process is stable.
1608 */
1609 if (next_p) {
1610 proc_rele_locked(next_p);
1611 }
1612 } else {
1613 next_p = NULL;
1614 }
1615 }
1616
1617 aPid = p->p_pid;
1618
1619 if (p->p_memstat_effectivepriority != (int32_t) band) {
1620 /*
1621 * We shouldn't be freezing processes outside the
1622 * prescribed band.
1623 */
1624 break;
1625 }
1626
1627 /* Ensure the process is eligible for (re-)freezing */
1628 if (refreeze_processes) {
1629 /*
1630 * Has to have been frozen once before.
1631 */
1632 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
1633 continue;
1634 }
1635
1636 /*
1637 * Has to have been resumed once before.
1638 */
1639 if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == FALSE) {
1640 continue;
1641 }
1642
1643 /*
1644 * Not currently being looked at for something.
1645 */
1646 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1647 continue;
1648 }
1649
1650 /*
1651 * We are going to try and refreeze and so re-evaluate
1652 * the process. We don't want to double count the shared
1653 * memory. So deduct the old snapshot here.
1654 */
1655 memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1656 p->p_memstat_freeze_sharedanon_pages = 0;
1657
1658 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1659 memorystatus_refreeze_eligible_count--;
1660 } else {
1661 if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) {
1662 continue; // with lock held
1663 }
1664 }
1665
1666 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1667 /*
1668 * Freezer backed by the compressor and swap file(s)
1669 * will hold compressed data.
1670 */
1671
1672 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1673 } else {
1674 /*
1675 * We only have the compressor pool.
1676 */
1677 max_pages = UINT32_MAX - 1;
1678 }
1679
1680 /* Mark as locked temporarily to avoid kill */
1681 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1682
1683 p = proc_ref_locked(p);
1684 if (!p) {
1685 memorystatus_freezer_stats.mfs_error_other_count++;
1686 break;
1687 }
1688
1689 proc_list_unlock();
1690
1691 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1692 memorystatus_available_pages, 0, 0, 0, 0);
1693
1694 max_pages = MIN(max_pages, UINT32_MAX);
1695 kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1696 if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1697 memorystatus_freezer_stats.mfs_shared_pages_skipped += shared;
1698 }
1699
1700 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1701 memorystatus_available_pages, aPid, 0, 0, 0);
1702
1703 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1704 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n",
1705 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1706 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1707
1708 proc_list_lock();
1709
1710 /* Success? */
1711 if (KERN_SUCCESS == kr) {
1712 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1713
1714 p->p_memstat_freeze_sharedanon_pages += shared;
1715
1716 memorystatus_frozen_shared_mb += shared;
1717
1718 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
1719 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1720 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
1721 memorystatus_frozen_count++;
1722 if (memorystatus_frozen_count == memorystatus_frozen_processes_max) {
1723 memorystatus_freeze_out_of_slots();
1724 }
1725 } else {
1726 // This was a re-freeze
1727 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1728 memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE;
1729 memorystatus_freezer_stats.mfs_refreeze_count++;
1730 }
1731 was_refreeze = true;
1732 }
1733
1734 p->p_memstat_frozen_count++;
1735
1736 /*
1737 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1738 * to its higher jetsam band.
1739 */
1740 proc_list_unlock();
1741
1742 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1743
1744 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1745 ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1746
1747 if (ret) {
1748 printf("Elevating the frozen process failed with %d\n", ret);
1749 /* not fatal */
1750 ret = 0;
1751 }
1752
1753 /* Update stats */
1754 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1755 throttle_intervals[i].pageouts += dirty;
1756 }
1757 }
1758 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1759 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1760 was_refreeze ? "re" : "", (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, was_refreeze ? "Re" : "", dirty);
1761
1762 proc_list_lock();
1763
1764 memorystatus_freeze_pageouts += dirty;
1765
1766 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1767 /*
1768 * Add some eviction logic here? At some point should we
1769 * jetsam a process to get back its swap space so that we
1770 * can freeze a more eligible process at this moment in time?
1771 */
1772 }
1773
1774 /* Return KERN_SUCCESS */
1775 ret = kr;
1776
1777 /*
1778 * We froze a process successfully. We can stop now
1779 * and see if that helped if this process isn't part
1780 * of a coalition.
1781 *
1782 * Else:
1783 * - if it is a leader, get the list of XPC services
1784 * that need to be frozen.
1785 * - if it is a XPC service whose leader was frozen
1786 * here, continue on to the next XPC service in the list.
1787 */
1788
1789 if (coal == NULL) {
1790 curr_task = proc_task(p);
1791 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1792 if (coalition_is_leader(curr_task, coal)) {
1793 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
1794 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
1795
1796 if (ntasks > MAX_XPC_SERVICE_PIDS) {
1797 ntasks = MAX_XPC_SERVICE_PIDS;
1798 }
1799 }
1800
1801 next_p = NULL;
1802
1803 if (ntasks > 0) {
1804 /*
1805 * Start off with our first next_p in this list.
1806 */
1807 coal_xpc_pid = pid_list[--ntasks];
1808 next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */);
1809
1810 /*
1811 * We grab a reference when we are about to freeze the process. So drop
1812 * the reference that proc_findinternal() grabbed for us.
1813 * We also have the proc_list_lock and so this process is stable.
1814 */
1815 if (next_p) {
1816 proc_rele_locked(next_p);
1817 }
1818 }
1819 }
1820
1821 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1822 wakeup(&p->p_memstat_state);
1823 proc_rele_locked(p);
1824
1825 if (coal && next_p) {
1826 continue;
1827 }
1828
1829 /*
1830 * No coalition leader was frozen. So we don't
1831 * need to evaluate any XPC services.
1832 *
1833 * OR
1834 *
1835 * We have frozen all eligible XPC services for
1836 * the current coalition leader.
1837 *
1838 * Either way, we can break here and see if freezing
1839 * helped.
1840 */
1841
1842 break;
1843 } else {
1844 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1845 wakeup(&p->p_memstat_state);
1846
1847 if (refreeze_processes) {
1848 if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1849 (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
1850 /*
1851 * Keeping this prior-frozen process in this high band when
1852 * we failed to re-freeze it due to bad shared memory usage
1853 * could cause excessive pressure on the lower bands.
1854 * We need to demote it for now. It'll get re-evaluated next
1855 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
1856 * bit.
1857 */
1858
1859 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1860 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1861 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE);
1862 }
1863 } else {
1864 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1865 }
1866 memorystatus_freeze_handle_error(p, p->p_memstat_state & P_MEMSTAT_FROZEN, freezer_error_code, aPid, coal, "memorystatus_freeze_top_process");
1867
1868 proc_rele_locked(p);
1869
1870 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
1871 break;
1872 }
1873 }
1874 }
1875
1876 if ((ret == -1) &&
1877 (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD) &&
1878 (!refreeze_processes)) {
1879 /*
1880 * We failed to freeze a process from the IDLE
1881 * band AND we have some thawed processes
1882 * AND haven't tried refreezing as yet.
1883 * Let's try and re-freeze processes in the
1884 * frozen band that have been resumed in the past
1885 * and so have brought in state from disk.
1886 */
1887
1888 band = (unsigned int) memorystatus_freeze_jetsam_band;
1889
1890 refreeze_processes = true;
1891
1892 goto freeze_process;
1893 }
1894
1895 proc_list_unlock();
1896
1897 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages, aPid, 0, 0, 0);
1898
1899 return ret;
1900 }
1901
1902 static inline boolean_t
1903 memorystatus_can_freeze_processes(void)
1904 {
1905 boolean_t ret;
1906
1907 proc_list_lock();
1908
1909 if (memorystatus_suspended_count) {
1910 memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
1911
1912 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
1913 ret = TRUE;
1914 } else {
1915 ret = FALSE;
1916 }
1917 } else {
1918 ret = FALSE;
1919 }
1920
1921 proc_list_unlock();
1922
1923 return ret;
1924 }
1925
1926 static boolean_t
1927 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
1928 {
1929 boolean_t can_freeze = TRUE;
1930
1931 /* Only freeze if we're sufficiently low on memory; this holds off freeze right
1932 * after boot, and is generally is a no-op once we've reached steady state. */
1933 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
1934 return FALSE;
1935 }
1936
1937 /* Check minimum suspended process threshold. */
1938 if (!memorystatus_can_freeze_processes()) {
1939 return FALSE;
1940 }
1941 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
1942
1943 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1944 /*
1945 * In-core compressor used for freezing WITHOUT on-disk swap support.
1946 */
1947 if (vm_compressor_low_on_space()) {
1948 if (*memorystatus_freeze_swap_low) {
1949 *memorystatus_freeze_swap_low = TRUE;
1950 }
1951
1952 can_freeze = FALSE;
1953 } else {
1954 if (*memorystatus_freeze_swap_low) {
1955 *memorystatus_freeze_swap_low = FALSE;
1956 }
1957
1958 can_freeze = TRUE;
1959 }
1960 } else {
1961 /*
1962 * Freezing WITH on-disk swap support.
1963 *
1964 * In-core compressor fronts the swap.
1965 */
1966 if (vm_swap_low_on_space()) {
1967 if (*memorystatus_freeze_swap_low) {
1968 *memorystatus_freeze_swap_low = TRUE;
1969 }
1970
1971 can_freeze = FALSE;
1972 }
1973 }
1974
1975 return can_freeze;
1976 }
1977
1978 /*
1979 * This function evaluates if the currently frozen processes deserve
1980 * to stay in the higher jetsam band. There are 2 modes:
1981 * - 'force one == TRUE': (urgent mode)
1982 * We are out of budget and can't refreeze a process. The process's
1983 * state, if it was resumed, will stay in compressed memory. If we let it
1984 * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
1985 * the lower bands. So we force-demote the least-recently-used-and-thawed
1986 * process.
1987 *
1988 * - 'force_one == FALSE': (normal mode)
1989 * If the # of thaws of a process is below our threshold, then we
1990 * will demote that process into the IDLE band.
1991 * We don't immediately kill the process here because it already has
1992 * state on disk and so it might be worth giving it another shot at
1993 * getting thawed/resumed and used.
1994 */
1995 static void
1996 memorystatus_demote_frozen_processes(boolean_t force_one)
1997 {
1998 unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
1999 unsigned int demoted_proc_count = 0;
2000 proc_t p = PROC_NULL, next_p = PROC_NULL;
2001 /* We demote to IDLE unless someone has asserted a higher priority on this process. */
2002 int maxpriority = JETSAM_PRIORITY_IDLE;
2003
2004 proc_list_lock();
2005
2006 if (memorystatus_freeze_enabled == FALSE) {
2007 /*
2008 * Freeze has been disabled likely to
2009 * reclaim swap space. So don't change
2010 * any state on the frozen processes.
2011 */
2012 proc_list_unlock();
2013 return;
2014 }
2015
2016 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
2017 while (next_p) {
2018 p = next_p;
2019 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2020
2021 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
2022 continue;
2023 }
2024
2025 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2026 continue;
2027 }
2028
2029 if (force_one == TRUE) {
2030 if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == 0) {
2031 /*
2032 * This process hasn't been thawed recently and so most of
2033 * its state sits on NAND and so we skip it -- jetsamming it
2034 * won't help with memory pressure.
2035 */
2036 continue;
2037 }
2038 } else {
2039 if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
2040 /*
2041 * This process has met / exceeded our thaw count demotion threshold
2042 * and so we let it live in the higher bands.
2043 */
2044 continue;
2045 }
2046 }
2047
2048 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
2049 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2050
2051 maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority);
2052 memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE);
2053 #if DEVELOPMENT || DEBUG
2054 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_demote_frozen_process(%s) pid %d [%s]",
2055 (force_one ? "urgent" : "normal"), (p ? p->p_pid : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
2056 #endif /* DEVELOPMENT || DEBUG */
2057
2058 /*
2059 * The freezer thread will consider this a normal app to be frozen
2060 * because it is in the IDLE band. So we don't need the
2061 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
2062 * we'll correctly count it as eligible for re-freeze again.
2063 *
2064 * We don't drop the frozen count because this process still has
2065 * state on disk. So there's a chance it gets resumed and then it
2066 * should land in the higher jetsam band. For that it needs to
2067 * remain marked frozen.
2068 */
2069 if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) {
2070 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2071 memorystatus_refreeze_eligible_count--;
2072 }
2073
2074 demoted_proc_count++;
2075
2076 if ((force_one == TRUE) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
2077 break;
2078 }
2079 }
2080
2081 if (force_one == FALSE) {
2082 /*
2083 * We use these counters to track daily hit rates.
2084 * So we only reset them to 0 under the normal
2085 * mode.
2086 */
2087 memorystatus_thaw_count = 0;
2088 }
2089
2090 proc_list_unlock();
2091 }
2092
2093 /*
2094 * Calculate a new freezer budget.
2095 * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired.
2096 * @param burst_multiple The burst_multiple for the new period
2097 * @param interval_duration_min How many minutes will the new interval be?
2098 * @param rollover The amount to rollover from the previous budget.
2099 *
2100 * @return A budget for the new interval.
2101 */
2102 static uint32_t
2103 memorystatus_freeze_calculate_new_budget(
2104 unsigned int time_since_last_interval_expired_sec,
2105 unsigned int burst_multiple,
2106 unsigned int interval_duration_min,
2107 uint32_t rollover)
2108 {
2109 uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0;
2110 const static unsigned int kNumSecondsInDay = 60 * 60 * 24;
2111 /* Precision factor for days_missed. 2 decimal points. */
2112 const static unsigned int kFixedPointFactor = 100;
2113 unsigned int days_missed;
2114
2115 /* Get the daily budget from the storage layer */
2116 if (vm_swap_max_budget(&freeze_daily_budget)) {
2117 freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024);
2118 assert(freeze_daily_budget_mb <= UINT32_MAX);
2119 memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb;
2120 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
2121 }
2122 /* Calculate the daily pageout budget */
2123 freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
2124
2125 daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60)));
2126
2127 /*
2128 * Add additional budget for time since the interval expired.
2129 * For example, if the interval expired n days ago, we should get an additional n days
2130 * of budget since we didn't use any budget during those n days.
2131 */
2132 days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay;
2133 budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor;
2134 new_budget = rollover + daily_budget_pageouts + budget_missed;
2135 return (uint32_t) MIN(new_budget, UINT32_MAX);
2136 }
2137
2138 /*
2139 * Mark all non frozen, freezer-eligible processes as skipped for the given reason.
2140 * Used when we hit some system freeze limit and know that we won't be considering remaining processes.
2141 * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that
2142 * it gets set for new processes.
2143 * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze.
2144 */
2145 static void
2146 memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked)
2147 {
2148 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2149 LCK_MTX_ASSERT(proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
2150 unsigned int band = JETSAM_PRIORITY_IDLE;
2151 proc_t p;
2152
2153 if (!locked) {
2154 proc_list_lock();
2155 }
2156 p = memorystatus_get_first_proc_locked(&band, FALSE);
2157 while (p) {
2158 assert(p->p_memstat_effectivepriority == (int32_t) band);
2159 if (!(p->p_memstat_state & P_MEMSTAT_FROZEN) && memorystatus_is_process_eligible_for_freeze(p)) {
2160 assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone);
2161 p->p_memstat_freeze_skip_reason = (uint8_t) reason;
2162 }
2163 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
2164 }
2165 if (!locked) {
2166 proc_list_unlock();
2167 }
2168 }
2169
2170 /*
2171 * Called after we fail to freeze a process.
2172 * Logs the failure, marks the process with the failure reason, and updates freezer stats.
2173 */
2174 static void
2175 memorystatus_freeze_handle_error(
2176 proc_t p,
2177 const int freezer_error_code,
2178 bool was_refreeze,
2179 pid_t pid,
2180 const coalition_t coalition,
2181 const char* log_prefix)
2182 {
2183 const char *reason;
2184 memorystatus_freeze_skip_reason_t skip_reason;
2185
2186 switch (freezer_error_code) {
2187 case FREEZER_ERROR_EXCESS_SHARED_MEMORY:
2188 memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++;
2189 reason = "too much shared memory";
2190 skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory;
2191 break;
2192 case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO:
2193 memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++;
2194 reason = "private-shared pages ratio";
2195 skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio;
2196 break;
2197 case FREEZER_ERROR_NO_COMPRESSOR_SPACE:
2198 memorystatus_freezer_stats.mfs_error_no_compressor_space_count++;
2199 reason = "no compressor space";
2200 skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace;
2201 break;
2202 case FREEZER_ERROR_NO_SWAP_SPACE:
2203 memorystatus_freezer_stats.mfs_error_no_swap_space_count++;
2204 reason = "no swap space";
2205 skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace;
2206 break;
2207 default:
2208 reason = "unknown error";
2209 skip_reason = kMemorystatusFreezeSkipReasonOther;
2210 }
2211
2212 p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason;
2213
2214 os_log_with_startup_serial(OS_LOG_DEFAULT, "%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n",
2215 log_prefix, was_refreeze ? "re" : "",
2216 (coalition == NULL ? "general" : "coalition-driven"), pid,
2217 ((p && *p->p_name) ? p->p_name : "unknown"), reason);
2218 }
2219
2220 /*
2221 * Start a new normal throttle interval with the given budget.
2222 * Caller must hold the freezer mutex
2223 */
2224 static void
2225 memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts)
2226 {
2227 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2228 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2229
2230 normal_throttle_window->max_pageouts = new_budget;
2231 normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60;
2232 normal_throttle_window->ts.tv_nsec = 0;
2233 ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts);
2234 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2235 if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) {
2236 normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts;
2237 } else {
2238 normal_throttle_window->pageouts = 0;
2239 }
2240 /* Ensure the normal window is now active. */
2241 memorystatus_freeze_degradation = FALSE;
2242 }
2243
2244 #if DEVELOPMENT || DEBUG
2245
2246 static int
2247 sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS
2248 {
2249 #pragma unused(arg1, arg2)
2250 int error = 0;
2251 unsigned int time_since_last_interval_expired_sec = 0;
2252 unsigned int new_budget;
2253
2254 error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req);
2255 if (error || !req->newptr) {
2256 return error;
2257 }
2258 new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0);
2259 return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget)));
2260 }
2261
2262 SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED,
2263 0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", "");
2264
2265 #endif /* DEVELOPMENT || DEBUG */
2266
2267 /*
2268 * Called when we first run out of budget in an interval.
2269 * Marks idle processes as not frozen due to lack of budget.
2270 * NB: It might be worth having a CA event here.
2271 */
2272 static void
2273 memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval)
2274 {
2275 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2276 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2277
2278 mach_timespec_t time_left = {0, 0};
2279 mach_timespec_t now_ts;
2280 clock_sec_t sec;
2281 clock_nsec_t nsec;
2282
2283 time_left.tv_sec = interval->ts.tv_sec;
2284 time_left.tv_nsec = 0;
2285 clock_get_system_nanotime(&sec, &nsec);
2286 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2287 now_ts.tv_nsec = nsec;
2288
2289 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2290 os_log(OS_LOG_DEFAULT,
2291 "memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n",
2292 time_left.tv_sec / 60, memorystatus_frozen_count);
2293
2294 memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false);
2295 }
2296
2297 /*
2298 * Called when we cross over the threshold of maximum frozen processes allowed.
2299 * Marks remaining idle processes as not frozen due to lack of slots.
2300 */
2301 static void
2302 memorystatus_freeze_out_of_slots(void)
2303 {
2304 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2305 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
2306 assert(memorystatus_frozen_count == memorystatus_frozen_processes_max);
2307
2308 os_log(OS_LOG_DEFAULT,
2309 "memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n",
2310 memorystatus_frozen_count);
2311
2312 memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true);
2313 }
2314
2315 /*
2316 * This function will do 4 things:
2317 *
2318 * 1) check to see if we are currently in a degraded freezer mode, and if so:
2319 * - check to see if our window has expired and we should exit this mode, OR,
2320 * - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
2321 *
2322 * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
2323 *
2324 * 3) check what the current normal window allows for a budget.
2325 *
2326 * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
2327 * what we would normally expect, then we are running low on our daily budget and need to enter
2328 * degraded perf. mode.
2329 *
2330 * Caller must hold the freezer mutex
2331 * Caller must not hold the proc_list lock
2332 */
2333
2334 static void
2335 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
2336 {
2337 clock_sec_t sec;
2338 clock_nsec_t nsec;
2339 mach_timespec_t now_ts;
2340 LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED);
2341 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED);
2342
2343 unsigned int freeze_daily_pageouts_max = 0;
2344 uint32_t budget_rollover = 0;
2345 bool started_with_budget = (*budget_pages_allowed > 0);
2346
2347 #if DEVELOPMENT || DEBUG
2348 if (!memorystatus_freeze_throttle_enabled) {
2349 /*
2350 * No throttling...we can use the full budget everytime.
2351 */
2352 *budget_pages_allowed = UINT64_MAX;
2353 return;
2354 }
2355 #endif
2356
2357 clock_get_system_nanotime(&sec, &nsec);
2358 now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX));
2359 now_ts.tv_nsec = nsec;
2360
2361 struct throttle_interval_t *interval = NULL;
2362
2363 if (memorystatus_freeze_degradation == TRUE) {
2364 interval = degraded_throttle_window;
2365
2366 if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2367 interval->pageouts = 0;
2368 interval->max_pageouts = 0;
2369 } else {
2370 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2371 }
2372 }
2373
2374 interval = normal_throttle_window;
2375
2376 if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) {
2377 /* How long has it been since the previous interval expired? */
2378 mach_timespec_t expiration_period_ts = now_ts;
2379 SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts);
2380 /* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */
2381 budget_rollover = interval->pageouts > interval->max_pageouts ?
2382 0 : interval->max_pageouts - interval->pageouts;
2383
2384 memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget(
2385 expiration_period_ts.tv_sec, interval->burst_multiple,
2386 interval->mins, budget_rollover),
2387 now_ts);
2388 *budget_pages_allowed = interval->max_pageouts;
2389 memorystatus_freezer_stats.mfs_shared_pages_skipped = 0;
2390
2391 memorystatus_demote_frozen_processes(FALSE); /* normal mode...don't force a demotion */
2392 } else {
2393 /*
2394 * Current throttle window.
2395 * Deny freezing if we have no budget left.
2396 * Try graceful degradation if we are within 25% of:
2397 * - the daily budget, and
2398 * - the current budget left is below our normal budget expectations.
2399 */
2400
2401 if (memorystatus_freeze_degradation == FALSE) {
2402 if (interval->pageouts >= interval->max_pageouts) {
2403 *budget_pages_allowed = 0;
2404 if (started_with_budget) {
2405 memorystatus_freeze_out_of_budget(interval);
2406 }
2407 } else {
2408 int budget_left = interval->max_pageouts - interval->pageouts;
2409 int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
2410
2411 mach_timespec_t time_left = {0, 0};
2412
2413 time_left.tv_sec = interval->ts.tv_sec;
2414 time_left.tv_nsec = 0;
2415
2416 SUB_MACH_TIMESPEC(&time_left, &now_ts);
2417
2418 if (budget_left <= budget_threshold) {
2419 /*
2420 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
2421 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
2422 * daily pageout budget.
2423 */
2424
2425 unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
2426 unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
2427
2428 /*
2429 * The current rate of pageouts is below what we would expect for
2430 * the normal rate i.e. we have below normal budget left and so...
2431 */
2432
2433 if (current_budget_rate_allowed < normal_budget_rate_allowed) {
2434 memorystatus_freeze_degradation = TRUE;
2435 degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
2436 degraded_throttle_window->pageouts = 0;
2437
2438 /*
2439 * Switch over to the degraded throttle window so the budget
2440 * doled out is based on that window.
2441 */
2442 interval = degraded_throttle_window;
2443 }
2444 }
2445
2446 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2447 }
2448 }
2449 }
2450
2451 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
2452 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts->tv_sec) / 60,
2453 interval->throttle ? "on" : "off");
2454 }
2455
2456 static void
2457 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2458 {
2459 static boolean_t memorystatus_freeze_swap_low = FALSE;
2460
2461 lck_mtx_lock(&freezer_mutex);
2462
2463 if (memorystatus_freeze_enabled) {
2464 if ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2465 (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD)) {
2466 if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2467 /* Only freeze if we've not exceeded our pageout budgets.*/
2468 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2469
2470 if (memorystatus_freeze_budget_pages_remaining) {
2471 memorystatus_freeze_top_process();
2472 } else {
2473 memorystatus_demote_frozen_processes(TRUE); /* urgent mode..force one demotion */
2474 }
2475 }
2476 }
2477 }
2478
2479 /*
2480 * Give applications currently in the aging band a chance to age out into the idle band before
2481 * running the freezer again.
2482 */
2483 memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + memorystatus_apps_idle_delay_time;
2484
2485 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2486 lck_mtx_unlock(&freezer_mutex);
2487
2488 thread_block((thread_continue_t) memorystatus_freeze_thread);
2489 }
2490
2491 boolean_t
2492 memorystatus_freeze_thread_should_run(void)
2493 {
2494 /*
2495 * No freezer_mutex held here...see why near call-site
2496 * within memorystatus_pages_update().
2497 */
2498
2499 boolean_t should_run = FALSE;
2500
2501 if (memorystatus_freeze_enabled == FALSE) {
2502 goto out;
2503 }
2504
2505 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2506 goto out;
2507 }
2508
2509 memorystatus_freezer_stats.mfs_below_threshold_count++;
2510
2511 if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
2512 /*
2513 * Consider this as a skip even if we wake up to refreeze because
2514 * we won't freeze any new procs.
2515 */
2516 memorystatus_freezer_stats.mfs_skipped_full_count++;
2517 if (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD) {
2518 goto out;
2519 }
2520 }
2521
2522 if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
2523 memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++;
2524 goto out;
2525 }
2526
2527 uint64_t curr_time = mach_absolute_time();
2528
2529 if (curr_time < memorystatus_freezer_thread_next_run_ts) {
2530 goto out;
2531 }
2532
2533 should_run = TRUE;
2534
2535 out:
2536 return should_run;
2537 }
2538
2539 int
2540 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2541 {
2542 proc_t p = PROC_NULL;
2543
2544 if (pid == 0) {
2545 return EINVAL;
2546 }
2547
2548 p = proc_find(pid);
2549 if (!p) {
2550 return ESRCH;
2551 }
2552
2553 /*
2554 * Only allow this on the current proc for now.
2555 * We can check for privileges and allow targeting another process in the future.
2556 */
2557 if (p != current_proc()) {
2558 proc_rele(p);
2559 return EPERM;
2560 }
2561
2562 proc_list_lock();
2563 *is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
2564 proc_rele_locked(p);
2565 proc_list_unlock();
2566
2567 return 0;
2568 }
2569
2570 errno_t
2571 memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen)
2572 {
2573 proc_t p = PROC_NULL;
2574
2575 if (pid == 0) {
2576 return EINVAL;
2577 }
2578
2579 /*
2580 * Only allow this on the current proc for now.
2581 * We can check for privileges and allow targeting another process in the future.
2582 */
2583 p = current_proc();
2584 if (p->p_pid != pid) {
2585 return EPERM;
2586 }
2587
2588 proc_list_lock();
2589 *is_frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0;
2590 proc_list_unlock();
2591
2592 return 0;
2593 }
2594
2595 int
2596 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
2597 {
2598 proc_t p = PROC_NULL;
2599
2600 if (pid == 0) {
2601 return EINVAL;
2602 }
2603
2604 /*
2605 * To enable freezable status, you need to be root or an entitlement.
2606 */
2607 if (is_freezable &&
2608 !kauth_cred_issuser(kauth_cred_get()) &&
2609 !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) {
2610 return EPERM;
2611 }
2612
2613 p = proc_find(pid);
2614 if (!p) {
2615 return ESRCH;
2616 }
2617
2618 /*
2619 * A process can change its own status. A coalition leader can
2620 * change the status of coalition members.
2621 */
2622 if (p != current_proc()) {
2623 coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
2624 if (!coalition_is_leader(proc_task(current_proc()), coal)) {
2625 proc_rele(p);
2626 return EPERM;
2627 }
2628 }
2629
2630 proc_list_lock();
2631 if (is_freezable == FALSE) {
2632 /* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
2633 p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
2634 printf("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
2635 p->p_pid, (*p->p_name ? p->p_name : "unknown"));
2636 } else {
2637 p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
2638 printf("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
2639 p->p_pid, (*p->p_name ? p->p_name : "unknown"));
2640 }
2641 proc_rele_locked(p);
2642 proc_list_unlock();
2643
2644 return 0;
2645 }
2646
2647 /*
2648 * Called when process is created before it is added to a memorystatus bucket.
2649 */
2650 void
2651 memorystatus_freeze_init_proc(proc_t p)
2652 {
2653 /* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */
2654 if (memorystatus_freeze_budget_pages_remaining == 0) {
2655 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget;
2656 } else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) {
2657 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots;
2658 } else {
2659 p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone;
2660 }
2661 }
2662
2663
2664 static int
2665 sysctl_memorystatus_do_fastwake_warmup_all SYSCTL_HANDLER_ARGS
2666 {
2667 #pragma unused(oidp, arg1, arg2)
2668
2669 if (!req->newptr) {
2670 return EINVAL;
2671 }
2672
2673 /* Need to be root or have entitlement */
2674 if (!kauth_cred_issuser(kauth_cred_get()) && !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) {
2675 return EPERM;
2676 }
2677
2678 if (memorystatus_freeze_enabled == FALSE) {
2679 return ENOTSUP;
2680 }
2681
2682 do_fastwake_warmup_all();
2683
2684 return 0;
2685 }
2686
2687 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
2688 0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
2689
2690 #endif /* CONFIG_FREEZE */