]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_memorystatus_freeze.c
xnu-6153.41.3.tar.gz
[apple/xnu.git] / bsd / kern / kern_memorystatus_freeze.c
1 /*
2 * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_group.h>
40
41 #include <IOKit/IOBSD.h>
42
43 #include <libkern/libkern.h>
44 #include <mach/coalition.h>
45 #include <mach/mach_time.h>
46 #include <mach/task.h>
47 #include <mach/host_priv.h>
48 #include <mach/mach_host.h>
49 #include <os/log.h>
50 #include <pexpert/pexpert.h>
51 #include <sys/coalition.h>
52 #include <sys/kern_event.h>
53 #include <sys/proc.h>
54 #include <sys/proc_info.h>
55 #include <sys/reason.h>
56 #include <sys/signal.h>
57 #include <sys/signalvar.h>
58 #include <sys/sysctl.h>
59 #include <sys/sysproto.h>
60 #include <sys/wait.h>
61 #include <sys/tree.h>
62 #include <sys/priv.h>
63 #include <vm/vm_pageout.h>
64 #include <vm/vm_protos.h>
65 #include <mach/machine/sdt.h>
66 #include <libkern/section_keywords.h>
67 #include <stdatomic.h>
68
69 #if CONFIG_FREEZE
70 #include <vm/vm_map.h>
71 #endif /* CONFIG_FREEZE */
72
73 #include <sys/kern_memorystatus.h>
74 #include <sys/kern_memorystatus_freeze.h>
75 #include <sys/kern_memorystatus_notify.h>
76
77 #if CONFIG_JETSAM
78
79 extern unsigned int memorystatus_available_pages;
80 extern unsigned int memorystatus_available_pages_pressure;
81 extern unsigned int memorystatus_available_pages_critical;
82 extern unsigned int memorystatus_available_pages_critical_base;
83 extern unsigned int memorystatus_available_pages_critical_idle_offset;
84
85 #else /* CONFIG_JETSAM */
86
87 extern uint64_t memorystatus_available_pages;
88 extern uint64_t memorystatus_available_pages_pressure;
89 extern uint64_t memorystatus_available_pages_critical;
90
91 #endif /* CONFIG_JETSAM */
92
93 unsigned int memorystatus_frozen_count = 0;
94 unsigned int memorystatus_suspended_count = 0;
95 unsigned long freeze_threshold_percentage = 50;
96
97 #if CONFIG_FREEZE
98
99 lck_grp_attr_t *freezer_lck_grp_attr;
100 lck_grp_t *freezer_lck_grp;
101 static lck_mtx_t freezer_mutex;
102
103 /* Thresholds */
104 unsigned int memorystatus_freeze_threshold = 0;
105 unsigned int memorystatus_freeze_pages_min = 0;
106 unsigned int memorystatus_freeze_pages_max = 0;
107 unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
108 unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT;
109 uint64_t memorystatus_freeze_budget_pages_remaining = 0; //remaining # of pages that can be frozen to disk
110 boolean_t memorystatus_freeze_degradation = FALSE; //protected by the freezer mutex. Signals we are in a degraded freeze mode.
111
112 unsigned int memorystatus_max_frozen_demotions_daily = 0;
113 unsigned int memorystatus_thaw_count_demotion_threshold = 0;
114
115 boolean_t memorystatus_freeze_enabled = FALSE;
116 int memorystatus_freeze_wakeup = 0;
117 int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */
118
119 #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */
120
121 #ifdef XNU_KERNEL_PRIVATE
122
123 unsigned int memorystatus_frozen_processes_max = 0;
124 unsigned int memorystatus_frozen_shared_mb = 0;
125 unsigned int memorystatus_frozen_shared_mb_max = 0;
126 unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */
127 unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */
128 unsigned int memorystatus_thaw_count = 0;
129 unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */
130
131 #endif /* XNU_KERNEL_PRIVATE */
132
133 static inline boolean_t memorystatus_can_freeze_processes(void);
134 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
135 static boolean_t memorystatus_is_process_eligible_for_freeze(proc_t p);
136 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
137
138 void memorystatus_disable_freeze(void);
139
140 /* Stats */
141 static uint64_t memorystatus_freeze_pageouts = 0;
142
143 /* Throttling */
144 #define DEGRADED_WINDOW_MINS (30)
145 #define NORMAL_WINDOW_MINS (24 * 60)
146
147 static throttle_interval_t throttle_intervals[] = {
148 { DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
149 { NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }},
150 };
151 throttle_interval_t *degraded_throttle_window = &throttle_intervals[0];
152 throttle_interval_t *normal_throttle_window = &throttle_intervals[1];
153
154 extern uint64_t vm_swap_get_free_space(void);
155 extern boolean_t vm_swap_max_budget(uint64_t *);
156 extern int i_coal_jetsam_get_taskrole(coalition_t coal, task_t task);
157
158 static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed);
159 static void memorystatus_demote_frozen_processes(boolean_t force_one);
160
161 static uint64_t memorystatus_freezer_thread_next_run_ts = 0;
162
163 /* Sysctls needed for aggd stats */
164
165 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, "");
166 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, "");
167 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
168 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, "");
169
170
171 #if DEVELOPMENT || DEBUG
172
173 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_jetsam_band, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_jetsam_band, 0, "");
174 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, "");
175 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, "");
176 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
177 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, "");
178 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, "");
179 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, "");
180 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_processes_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_processes_max, 0, "");
181
182 /*
183 * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band.
184 * "0" means no limit.
185 * Default is 10% of system-wide task limit.
186 */
187
188 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, "");
189 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, "");
190
191 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, "");
192 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_private_shared_pages_ratio, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_private_shared_pages_ratio, 0, "");
193
194 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, "");
195
196 /*
197 * max. # of frozen process demotions we will allow in our daily cycle.
198 */
199 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_max_freeze_demotions_daily, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_max_frozen_demotions_daily, 0, "");
200 /*
201 * min # of thaws needed by a process to protect it from getting demoted into the IDLE band.
202 */
203 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count_demotion_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_thaw_count_demotion_threshold, 0, "");
204
205 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
206 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
207
208 /*
209 * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk.
210 * Exposed via the sysctl kern.memorystatus_freeze_to_memory.
211 */
212 boolean_t memorystatus_freeze_to_memory = FALSE;
213 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, "");
214
215 #define VM_PAGES_FOR_ALL_PROCS (2)
216 /*
217 * Manual trigger of freeze and thaw for dev / debug kernels only.
218 */
219 static int
220 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
221 {
222 #pragma unused(arg1, arg2)
223 int error, pid = 0;
224 proc_t p;
225 int freezer_error_code = 0;
226 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
227 int ntasks = 0;
228 coalition_t coal = COALITION_NULL;
229
230 if (memorystatus_freeze_enabled == FALSE) {
231 printf("sysctl_freeze: Freeze is DISABLED\n");
232 return ENOTSUP;
233 }
234
235 error = sysctl_handle_int(oidp, &pid, 0, req);
236 if (error || !req->newptr) {
237 return error;
238 }
239
240 if (pid == VM_PAGES_FOR_ALL_PROCS) {
241 vm_pageout_anonymous_pages();
242
243 return 0;
244 }
245
246 lck_mtx_lock(&freezer_mutex);
247
248 again:
249 p = proc_find(pid);
250 if (p != NULL) {
251 uint32_t purgeable, wired, clean, dirty, shared;
252 uint32_t max_pages = 0, state = 0;
253
254 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
255 /*
256 * Freezer backed by the compressor and swap file(s)
257 * will hold compressed data.
258 *
259 * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from
260 * being swapped out to disk. Note that this disables freezer swap support globally,
261 * not just for the process being frozen.
262 *
263 *
264 * We don't care about the global freezer budget or the process's (min/max) budget here.
265 * The freeze sysctl is meant to force-freeze a process.
266 *
267 * We also don't update any global or process stats on this path, so that the jetsam/ freeze
268 * logic remains unaffected. The tasks we're performing here are: freeze the process, set the
269 * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active).
270 */
271 max_pages = memorystatus_freeze_pages_max;
272 } else {
273 /*
274 * We only have the compressor without any swap.
275 */
276 max_pages = UINT32_MAX - 1;
277 }
278
279 proc_list_lock();
280 state = p->p_memstat_state;
281 proc_list_unlock();
282
283 /*
284 * The jetsam path also verifies that the process is a suspended App. We don't care about that here.
285 * We simply ensure that jetsam is not already working on the process and that the process has not
286 * explicitly disabled freezing.
287 */
288 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) {
289 printf("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n",
290 (state & P_MEMSTAT_TERMINATED) ? " terminated" : "",
291 (state & P_MEMSTAT_LOCKED) ? " locked" : "",
292 (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : "");
293
294 proc_rele(p);
295 lck_mtx_unlock(&freezer_mutex);
296 return EPERM;
297 }
298
299 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
300
301 if (error) {
302 char reason[128];
303 if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
304 strlcpy(reason, "too much shared memory", 128);
305 }
306
307 if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
308 strlcpy(reason, "low private-shared pages ratio", 128);
309 }
310
311 if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
312 strlcpy(reason, "no compressor space", 128);
313 }
314
315 if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) {
316 strlcpy(reason, "no swap space", 128);
317 }
318
319 printf("sysctl_freeze: task_freeze failed: %s\n", reason);
320
321 if (error == KERN_NO_SPACE) {
322 /* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */
323 error = ENOSPC;
324 } else {
325 error = EIO;
326 }
327 } else {
328 proc_list_lock();
329 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
330 p->p_memstat_state |= P_MEMSTAT_FROZEN;
331 memorystatus_frozen_count++;
332 }
333 p->p_memstat_frozen_count++;
334
335
336 proc_list_unlock();
337
338 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
339 /*
340 * We elevate only if we are going to swap out the data.
341 */
342 error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
343 memorystatus_freeze_jetsam_band, TRUE);
344
345 if (error) {
346 printf("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error);
347 }
348 }
349 }
350
351 if ((error == 0) && (coal == NULL)) {
352 /*
353 * We froze a process and so we check to see if it was
354 * a coalition leader and if it has XPC services that
355 * might need freezing.
356 * Only one leader can be frozen at a time and so we shouldn't
357 * enter this block more than once per call. Hence the
358 * check that 'coal' has to be NULL. We should make this an
359 * assert() or panic() once we have a much more concrete way
360 * to detect an app vs a daemon.
361 */
362
363 task_t curr_task = NULL;
364
365 curr_task = proc_task(p);
366 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
367 if (coalition_is_leader(curr_task, coal)) {
368 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
369 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
370
371 if (ntasks > MAX_XPC_SERVICE_PIDS) {
372 ntasks = MAX_XPC_SERVICE_PIDS;
373 }
374 }
375 }
376
377 proc_rele(p);
378
379 while (ntasks) {
380 pid = pid_list[--ntasks];
381 goto again;
382 }
383
384 lck_mtx_unlock(&freezer_mutex);
385 return error;
386 } else {
387 printf("sysctl_freeze: Invalid process\n");
388 }
389
390
391 lck_mtx_unlock(&freezer_mutex);
392 return EINVAL;
393 }
394
395 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
396 0, 0, &sysctl_memorystatus_freeze, "I", "");
397
398 /*
399 * Manual trigger of agressive frozen demotion for dev / debug kernels only.
400 */
401 static int
402 sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS
403 {
404 #pragma unused(arg1, arg2, oidp, req)
405 memorystatus_demote_frozen_processes(false);
406 return 0;
407 }
408
409 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", "");
410
411 static int
412 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
413 {
414 #pragma unused(arg1, arg2)
415
416 int error, pid = 0;
417 proc_t p;
418
419 if (memorystatus_freeze_enabled == FALSE) {
420 return ENOTSUP;
421 }
422
423 error = sysctl_handle_int(oidp, &pid, 0, req);
424 if (error || !req->newptr) {
425 return error;
426 }
427
428 if (pid == VM_PAGES_FOR_ALL_PROCS) {
429 do_fastwake_warmup_all();
430 return 0;
431 } else {
432 p = proc_find(pid);
433 if (p != NULL) {
434 error = task_thaw(p->task);
435
436 if (error) {
437 error = EIO;
438 } else {
439 /*
440 * task_thaw() succeeded.
441 *
442 * We increment memorystatus_frozen_count on the sysctl freeze path.
443 * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count
444 * when this process exits.
445 *
446 * proc_list_lock();
447 * p->p_memstat_state &= ~P_MEMSTAT_FROZEN;
448 * proc_list_unlock();
449 */
450 }
451 proc_rele(p);
452 return error;
453 }
454 }
455
456 return EINVAL;
457 }
458
459 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
460 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
461
462
463 typedef struct _global_freezable_status {
464 boolean_t freeze_pages_threshold_crossed;
465 boolean_t freeze_eligible_procs_available;
466 boolean_t freeze_scheduled_in_future;
467 }global_freezable_status_t;
468
469 typedef struct _proc_freezable_status {
470 boolean_t freeze_has_memstat_state;
471 boolean_t freeze_has_pages_min;
472 int freeze_has_probability;
473 int freeze_leader_eligible;
474 boolean_t freeze_attempted;
475 uint32_t p_memstat_state;
476 uint32_t p_pages;
477 int p_freeze_error_code;
478 int p_pid;
479 int p_leader_pid;
480 char p_name[MAXCOMLEN + 1];
481 }proc_freezable_status_t;
482
483 #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */
484
485 /*
486 * For coalition based freezing evaluations, we proceed as follows:
487 * - detect that the process is a coalition member and a XPC service
488 * - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN
489 * - continue its freezability evaluation assuming its leader will be freezable too
490 *
491 * Once we are done evaluating all processes, we do a quick run thru all
492 * processes and for a coalition member XPC service we look up the 'freezable'
493 * status of its leader and iff:
494 * - the xpc service is freezable i.e. its individual freeze evaluation worked
495 * - and, its leader is also marked freezable
496 * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS.
497 */
498
499 #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN (-1)
500 #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS (1)
501 #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE (2)
502
503 static int
504 memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval)
505 {
506 uint32_t proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0;
507 global_freezable_status_t *list_head;
508 proc_freezable_status_t *list_entry, *list_entry_start;
509 size_t list_size = 0;
510 proc_t p, leader_proc;
511 memstat_bucket_t *bucket;
512 uint32_t state = 0, pages = 0, entry_count = 0;
513 boolean_t try_freeze = TRUE, xpc_skip_size_probability_check = FALSE;
514 int error = 0, probability_of_use = 0;
515 pid_t leader_pid = 0;
516
517
518 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
519 return ENOTSUP;
520 }
521
522 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
523
524 if (buffer_size < list_size) {
525 return EINVAL;
526 }
527
528 list_head = (global_freezable_status_t*)kalloc(list_size);
529 if (list_head == NULL) {
530 return ENOMEM;
531 }
532
533 memset(list_head, 0, list_size);
534
535 list_size = sizeof(global_freezable_status_t);
536
537 proc_list_lock();
538
539 uint64_t curr_time = mach_absolute_time();
540
541 list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold);
542 list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold);
543 list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts);
544
545 list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t));
546 list_entry = list_entry_start;
547
548 bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
549
550 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
551
552 p = memorystatus_get_first_proc_locked(&band, FALSE);
553 proc_count++;
554
555 while ((proc_count <= MAX_FREEZABLE_PROCESSES) &&
556 (p) &&
557 (list_size < buffer_size)) {
558 if (isSysProc(p)) {
559 /*
560 * Daemon:- We will consider freezing it iff:
561 * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation)
562 * - its role in the coalition is XPC service.
563 *
564 * We skip memory size requirements in this case.
565 */
566
567 coalition_t coal = COALITION_NULL;
568 task_t leader_task = NULL, curr_task = NULL;
569 int task_role_in_coalition = 0;
570
571 curr_task = proc_task(p);
572 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
573
574 if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) {
575 /*
576 * By default, XPC services without an app
577 * will be the leader of their own single-member
578 * coalition.
579 */
580 goto skip_ineligible_xpc;
581 }
582
583 leader_task = coalition_get_leader(coal);
584 if (leader_task == TASK_NULL) {
585 /*
586 * This jetsam coalition is currently leader-less.
587 * This could happen if the app died, but XPC services
588 * have not yet exited.
589 */
590 goto skip_ineligible_xpc;
591 }
592
593 leader_proc = (proc_t)get_bsdtask_info(leader_task);
594 task_deallocate(leader_task);
595
596 if (leader_proc == PROC_NULL) {
597 /* leader task is exiting */
598 goto skip_ineligible_xpc;
599 }
600
601 task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task);
602
603 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
604 xpc_skip_size_probability_check = TRUE;
605 leader_pid = leader_proc->p_pid;
606 goto continue_eval;
607 }
608
609 skip_ineligible_xpc:
610 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
611 proc_count++;
612 continue;
613 }
614
615 continue_eval:
616 strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1);
617
618 list_entry->p_pid = p->p_pid;
619
620 state = p->p_memstat_state;
621
622 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) ||
623 !(state & P_MEMSTAT_SUSPENDED)) {
624 try_freeze = list_entry->freeze_has_memstat_state = FALSE;
625 } else {
626 try_freeze = list_entry->freeze_has_memstat_state = TRUE;
627 }
628
629 list_entry->p_memstat_state = state;
630
631 if (xpc_skip_size_probability_check == TRUE) {
632 /*
633 * Assuming the coalition leader is freezable
634 * we don't care re. minimum pages and probability
635 * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED.
636 * XPC services have to be explicity opted-out of the disabled
637 * state. And we checked that state above.
638 */
639 list_entry->freeze_has_pages_min = TRUE;
640 list_entry->p_pages = -1;
641 list_entry->freeze_has_probability = -1;
642
643 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN;
644 list_entry->p_leader_pid = leader_pid;
645
646 xpc_skip_size_probability_check = FALSE;
647 } else {
648 list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */
649 list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */
650
651 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
652 if (pages < memorystatus_freeze_pages_min) {
653 try_freeze = list_entry->freeze_has_pages_min = FALSE;
654 } else {
655 list_entry->freeze_has_pages_min = TRUE;
656 }
657
658 list_entry->p_pages = pages;
659
660 if (entry_count) {
661 uint32_t j = 0;
662 for (j = 0; j < entry_count; j++) {
663 if (strncmp(memorystatus_global_probabilities_table[j].proc_name,
664 p->p_name,
665 MAXCOMLEN + 1) == 0) {
666 probability_of_use = memorystatus_global_probabilities_table[j].use_probability;
667 break;
668 }
669 }
670
671 list_entry->freeze_has_probability = probability_of_use;
672
673 try_freeze = ((probability_of_use > 0) && try_freeze);
674 } else {
675 list_entry->freeze_has_probability = -1;
676 }
677 }
678
679 if (try_freeze) {
680 uint32_t purgeable, wired, clean, dirty, shared;
681 uint32_t max_pages = 0;
682 int freezer_error_code = 0;
683
684 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */);
685
686 if (error) {
687 list_entry->p_freeze_error_code = freezer_error_code;
688 }
689
690 list_entry->freeze_attempted = TRUE;
691 }
692
693 list_entry++;
694 freeze_eligible_proc_considered++;
695
696 list_size += sizeof(proc_freezable_status_t);
697
698 p = memorystatus_get_next_proc_locked(&band, p, FALSE);
699 proc_count++;
700 }
701
702 proc_list_unlock();
703
704 list_entry = list_entry_start;
705
706 for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) {
707 if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) {
708 leader_pid = list_entry[xpc_index].p_leader_pid;
709
710 leader_proc = proc_find(leader_pid);
711
712 if (leader_proc) {
713 if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) {
714 /*
715 * Leader has already been frozen.
716 */
717 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
718 proc_rele(leader_proc);
719 continue;
720 }
721 proc_rele(leader_proc);
722 }
723
724 for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) {
725 if (list_entry[leader_index].p_pid == leader_pid) {
726 if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) {
727 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS;
728 } else {
729 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
730 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
731 }
732 break;
733 }
734 }
735
736 /*
737 * Didn't find the leader entry. This might be likely because
738 * the leader never made it down to band 0.
739 */
740 if (leader_index == freeze_eligible_proc_considered) {
741 list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE;
742 list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC;
743 }
744 }
745 }
746
747 buffer_size = list_size;
748
749 error = copyout(list_head, buffer, buffer_size);
750 if (error == 0) {
751 *retval = buffer_size;
752 } else {
753 *retval = 0;
754 }
755
756 list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES);
757 kfree(list_head, list_size);
758
759 MEMORYSTATUS_DEBUG(1, "memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)*list_size);
760
761 return error;
762 }
763
764 int
765 memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
766 {
767 int err = ENOTSUP;
768
769 if (flags == FREEZER_CONTROL_GET_STATUS) {
770 err = memorystatus_freezer_get_status(buffer, buffer_size, retval);
771 }
772
773 return err;
774 }
775
776 #endif /* DEVELOPMENT || DEBUG */
777
778 extern void vm_swap_consider_defragmenting(int);
779 extern boolean_t memorystatus_kill_elevated_process(uint32_t, os_reason_t, unsigned int, int, uint32_t *, uint64_t *);
780
781 /*
782 * This routine will _jetsam_ all frozen processes
783 * and reclaim the swap space immediately.
784 *
785 * So freeze has to be DISABLED when we call this routine.
786 */
787
788 void
789 memorystatus_disable_freeze(void)
790 {
791 memstat_bucket_t *bucket;
792 int bucket_count = 0, retries = 0;
793 boolean_t retval = FALSE, killed = FALSE;
794 uint32_t errors = 0, errors_over_prev_iteration = 0;
795 os_reason_t jetsam_reason = 0;
796 unsigned int band = 0;
797 proc_t p = PROC_NULL, next_p = PROC_NULL;
798 uint64_t memory_reclaimed = 0, footprint = 0;
799
800 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START,
801 memorystatus_available_pages, 0, 0, 0, 0);
802
803 assert(memorystatus_freeze_enabled == FALSE);
804
805 jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
806 if (jetsam_reason == OS_REASON_NULL) {
807 printf("memorystatus_disable_freeze: failed to allocate jetsam reason\n");
808 }
809
810 /*
811 * Let's relocate all frozen processes into band 8. Demoted frozen processes
812 * are sitting in band 0 currently and it's possible to have a frozen process
813 * in the FG band being actively used. We don't reset its frozen state when
814 * it is resumed because it has state on disk.
815 *
816 * We choose to do this relocation rather than implement a new 'kill frozen'
817 * process function for these reasons:
818 * - duplication of code: too many kill functions exist and we need to rework them better.
819 * - disk-space-shortage kills are rare
820 * - not having the 'real' jetsam band at time of the this frozen kill won't preclude us
821 * from answering any imp. questions re. jetsam policy/effectiveness.
822 *
823 * This is essentially what memorystatus_update_inactive_jetsam_priority_band() does while
824 * avoiding the application of memory limits.
825 */
826
827 again:
828 proc_list_lock();
829
830 band = JETSAM_PRIORITY_IDLE;
831 p = PROC_NULL;
832 next_p = PROC_NULL;
833
834 next_p = memorystatus_get_first_proc_locked(&band, TRUE);
835 while (next_p) {
836 p = next_p;
837 next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
838
839 if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
840 break;
841 }
842
843 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
844 continue;
845 }
846
847 if (p->p_memstat_state & P_MEMSTAT_ERROR) {
848 p->p_memstat_state &= ~P_MEMSTAT_ERROR;
849 }
850
851 if (p->p_memstat_effectivepriority == memorystatus_freeze_jetsam_band) {
852 continue;
853 }
854
855 /*
856 * We explicitly add this flag here so the process looks like a normal
857 * frozen process i.e. P_MEMSTAT_FROZEN and P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND.
858 * We don't bother with assigning the 'active' memory
859 * limits at this point because we are going to be killing it soon below.
860 */
861 p->p_memstat_state |= P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
862 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
863
864 memorystatus_update_priority_locked(p, memorystatus_freeze_jetsam_band, FALSE, TRUE);
865 }
866
867 bucket = &memstat_bucket[memorystatus_freeze_jetsam_band];
868 bucket_count = bucket->count;
869 proc_list_unlock();
870
871 /*
872 * Bucket count is already stale at this point. But, we don't expect
873 * freezing to continue since we have already disabled the freeze functionality.
874 * However, an existing freeze might be in progress. So we might miss that process
875 * in the first go-around. We hope to catch it in the next.
876 */
877
878 errors_over_prev_iteration = 0;
879 while (bucket_count) {
880 bucket_count--;
881
882 /*
883 * memorystatus_kill_elevated_process() drops a reference,
884 * so take another one so we can continue to use this exit reason
885 * even after it returns.
886 */
887
888 os_reason_ref(jetsam_reason);
889 retval = memorystatus_kill_elevated_process(
890 kMemorystatusKilledDiskSpaceShortage,
891 jetsam_reason,
892 memorystatus_freeze_jetsam_band,
893 0, /* the iteration of aggressive jetsam..ignored here */
894 &errors,
895 &footprint);
896
897 if (errors > 0) {
898 printf("memorystatus_disable_freeze: memorystatus_kill_elevated_process returned %d error(s)\n", errors);
899 errors_over_prev_iteration += errors;
900 errors = 0;
901 }
902
903 if (retval == 0) {
904 /*
905 * No frozen processes left to kill.
906 */
907 break;
908 }
909
910 killed = TRUE;
911 memory_reclaimed += footprint;
912 }
913
914 proc_list_lock();
915
916 if (memorystatus_frozen_count) {
917 /*
918 * A frozen process snuck in and so
919 * go back around to kill it. That
920 * process may have been resumed and
921 * put into the FG band too. So we
922 * have to do the relocation again.
923 */
924 assert(memorystatus_freeze_enabled == FALSE);
925
926 retries++;
927 if (retries < 3) {
928 proc_list_unlock();
929 goto again;
930 }
931 #if DEVELOPMENT || DEBUG
932 panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d, errors = %d",
933 memorystatus_frozen_count, errors_over_prev_iteration);
934 #endif /* DEVELOPMENT || DEBUG */
935 }
936 proc_list_unlock();
937
938 os_reason_free(jetsam_reason);
939
940 if (killed) {
941 vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
942
943 proc_list_lock();
944 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
945 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
946 uint64_t timestamp_now = mach_absolute_time();
947 memorystatus_jetsam_snapshot->notification_time = timestamp_now;
948 memorystatus_jetsam_snapshot->js_gencount++;
949 if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
950 timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
951 proc_list_unlock();
952 int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
953 if (!ret) {
954 proc_list_lock();
955 memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
956 proc_list_unlock();
957 }
958 } else {
959 proc_list_unlock();
960 }
961 }
962
963 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END,
964 memorystatus_available_pages, memory_reclaimed, 0, 0, 0);
965
966 return;
967 }
968
969 __private_extern__ void
970 memorystatus_freeze_init(void)
971 {
972 kern_return_t result;
973 thread_t thread;
974
975 freezer_lck_grp_attr = lck_grp_attr_alloc_init();
976 freezer_lck_grp = lck_grp_alloc_init("freezer", freezer_lck_grp_attr);
977
978 lck_mtx_init(&freezer_mutex, freezer_lck_grp, NULL);
979
980 /*
981 * This is just the default value if the underlying
982 * storage device doesn't have any specific budget.
983 * We check with the storage layer in memorystatus_freeze_update_throttle()
984 * before we start our freezing the first time.
985 */
986 memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE;
987
988 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
989 if (result == KERN_SUCCESS) {
990 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2);
991 proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE);
992 thread_set_thread_name(thread, "VM_freezer");
993
994 thread_deallocate(thread);
995 } else {
996 panic("Could not create memorystatus_freeze_thread");
997 }
998 }
999
1000 static boolean_t
1001 memorystatus_is_process_eligible_for_freeze(proc_t p)
1002 {
1003 /*
1004 * Called with proc_list_lock held.
1005 */
1006
1007 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
1008
1009 boolean_t should_freeze = FALSE;
1010 uint32_t state = 0, entry_count = 0, pages = 0, i = 0;
1011 int probability_of_use = 0;
1012
1013 state = p->p_memstat_state;
1014
1015 if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) {
1016 goto out;
1017 }
1018
1019 if (isSysProc(p)) {
1020 /*
1021 * Daemon:- We consider freezing it if:
1022 * - it belongs to a coalition and the leader is frozen, and,
1023 * - its role in the coalition is XPC service.
1024 *
1025 * We skip memory size requirements in this case.
1026 */
1027
1028 coalition_t coal = COALITION_NULL;
1029 task_t leader_task = NULL, curr_task = NULL;
1030 proc_t leader_proc = NULL;
1031 int task_role_in_coalition = 0;
1032
1033 curr_task = proc_task(p);
1034 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1035
1036 if (coal == NULL || coalition_is_leader(curr_task, coal)) {
1037 /*
1038 * By default, XPC services without an app
1039 * will be the leader of their own single-member
1040 * coalition.
1041 */
1042 goto out;
1043 }
1044
1045 leader_task = coalition_get_leader(coal);
1046 if (leader_task == TASK_NULL) {
1047 /*
1048 * This jetsam coalition is currently leader-less.
1049 * This could happen if the app died, but XPC services
1050 * have not yet exited.
1051 */
1052 goto out;
1053 }
1054
1055 leader_proc = (proc_t)get_bsdtask_info(leader_task);
1056 task_deallocate(leader_task);
1057
1058 if (leader_proc == PROC_NULL) {
1059 /* leader task is exiting */
1060 goto out;
1061 }
1062
1063 if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) {
1064 goto out;
1065 }
1066
1067 task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task);
1068
1069 if (task_role_in_coalition == COALITION_TASKROLE_XPC) {
1070 should_freeze = TRUE;
1071 }
1072
1073 goto out;
1074 } else {
1075 /*
1076 * Application. In addition to the above states we need to make
1077 * sure we only consider suspended applications for freezing.
1078 */
1079 if (!(state & P_MEMSTAT_SUSPENDED)) {
1080 goto out;
1081 }
1082 }
1083
1084
1085 /* Only freeze applications meeting our minimum resident page criteria */
1086 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
1087 if (pages < memorystatus_freeze_pages_min) {
1088 goto out;
1089 }
1090
1091 /* Don't freeze processes that are already exiting on core. It may have started exiting
1092 * after we chose it for freeze, but before we obtained the proc_list_lock.
1093 * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync.
1094 * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands.
1095 */
1096 if ((p->p_listflag & P_LIST_EXITED) != 0) {
1097 goto out;
1098 }
1099
1100 entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t));
1101
1102 if (entry_count) {
1103 for (i = 0; i < entry_count; i++) {
1104 if (strncmp(memorystatus_global_probabilities_table[i].proc_name,
1105 p->p_name,
1106 MAXCOMLEN + 1) == 0) {
1107 probability_of_use = memorystatus_global_probabilities_table[i].use_probability;
1108 break;
1109 }
1110 }
1111
1112 if (probability_of_use == 0) {
1113 goto out;
1114 }
1115 }
1116
1117 should_freeze = TRUE;
1118 out:
1119 return should_freeze;
1120 }
1121
1122 /*
1123 * Synchronously freeze the passed proc. Called with a reference to the proc held.
1124 *
1125 * Doesn't deal with:
1126 * - re-freezing because this is called on a specific process and
1127 * not by the freezer thread. If that changes, we'll have to teach it about
1128 * refreezing a frozen process.
1129 *
1130 * - grouped/coalition freezing because we are hoping to deprecate this
1131 * interface as it was used by user-space to freeze particular processes. But
1132 * we have moved away from that approach to having the kernel choose the optimal
1133 * candidates to be frozen.
1134 *
1135 * Returns EINVAL or the value returned by task_freeze().
1136 */
1137 int
1138 memorystatus_freeze_process_sync(proc_t p)
1139 {
1140 int ret = EINVAL;
1141 pid_t aPid = 0;
1142 boolean_t memorystatus_freeze_swap_low = FALSE;
1143 int freezer_error_code = 0;
1144
1145 lck_mtx_lock(&freezer_mutex);
1146
1147 if (p == NULL) {
1148 printf("memorystatus_freeze_process_sync: Invalid process\n");
1149 goto exit;
1150 }
1151
1152 if (memorystatus_freeze_enabled == FALSE) {
1153 printf("memorystatus_freeze_process_sync: Freezing is DISABLED\n");
1154 goto exit;
1155 }
1156
1157 if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
1158 printf("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n");
1159 goto exit;
1160 }
1161
1162 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1163 if (!memorystatus_freeze_budget_pages_remaining) {
1164 printf("memorystatus_freeze_process_sync: exit with NO available budget\n");
1165 goto exit;
1166 }
1167
1168 proc_list_lock();
1169
1170 if (p != NULL) {
1171 uint32_t purgeable, wired, clean, dirty, shared;
1172 uint32_t max_pages, i;
1173
1174 aPid = p->p_pid;
1175
1176 /* Ensure the process is eligible for freezing */
1177 if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) {
1178 proc_list_unlock();
1179 goto exit;
1180 }
1181
1182 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1183 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1184 } else {
1185 /*
1186 * We only have the compressor without any swap.
1187 */
1188 max_pages = UINT32_MAX - 1;
1189 }
1190
1191 /* Mark as locked temporarily to avoid kill */
1192 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1193 proc_list_unlock();
1194
1195 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1196 memorystatus_available_pages, 0, 0, 0, 0);
1197
1198 ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1199
1200 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1201 memorystatus_available_pages, aPid, 0, 0, 0);
1202
1203 DTRACE_MEMORYSTATUS6(memorystatus_freeze, proc_t, p, unsigned int, memorystatus_available_pages, boolean_t, purgeable, unsigned int, wired, uint32_t, clean, uint32_t, dirty);
1204
1205 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_process_sync: task_freeze %s for pid %d [%s] - "
1206 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n",
1207 (ret == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1208 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1209
1210 proc_list_lock();
1211
1212 if (ret == KERN_SUCCESS) {
1213 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1214
1215 p->p_memstat_freeze_sharedanon_pages += shared;
1216
1217 memorystatus_frozen_shared_mb += shared;
1218
1219 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
1220 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1221 memorystatus_frozen_count++;
1222 }
1223
1224 p->p_memstat_frozen_count++;
1225
1226 /*
1227 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1228 * to its higher jetsam band.
1229 */
1230 proc_list_unlock();
1231
1232 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1233
1234 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1235 ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE,
1236 memorystatus_freeze_jetsam_band, TRUE);
1237
1238 if (ret) {
1239 printf("Elevating the frozen process failed with %d\n", ret);
1240 /* not fatal */
1241 ret = 0;
1242 }
1243
1244 proc_list_lock();
1245
1246 /* Update stats */
1247 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1248 throttle_intervals[i].pageouts += dirty;
1249 }
1250 } else {
1251 proc_list_lock();
1252 }
1253
1254 memorystatus_freeze_pageouts += dirty;
1255
1256 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1257 /*
1258 * Add some eviction logic here? At some point should we
1259 * jetsam a process to get back its swap space so that we
1260 * can freeze a more eligible process at this moment in time?
1261 */
1262 }
1263
1264 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1265 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s] done memorystatus_freeze_budget_pages_remaining %llu froze %u pages",
1266 aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, dirty);
1267 } else {
1268 char reason[128];
1269 if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
1270 strlcpy(reason, "too much shared memory", 128);
1271 }
1272
1273 if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1274 strlcpy(reason, "low private-shared pages ratio", 128);
1275 }
1276
1277 if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
1278 strlcpy(reason, "no compressor space", 128);
1279 }
1280
1281 if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) {
1282 strlcpy(reason, "no swap space", 128);
1283 }
1284
1285 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s]...skipped (%s)",
1286 aPid, ((p && *p->p_name) ? p->p_name : "unknown"), reason);
1287 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1288 }
1289
1290 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1291 wakeup(&p->p_memstat_state);
1292 proc_list_unlock();
1293 }
1294
1295 exit:
1296 lck_mtx_unlock(&freezer_mutex);
1297
1298 return ret;
1299 }
1300
1301 static int
1302 memorystatus_freeze_top_process(void)
1303 {
1304 pid_t aPid = 0, coal_xpc_pid = 0;
1305 int ret = -1;
1306 proc_t p = PROC_NULL, next_p = PROC_NULL;
1307 unsigned int i = 0;
1308 unsigned int band = JETSAM_PRIORITY_IDLE;
1309 boolean_t refreeze_processes = FALSE;
1310 task_t curr_task = NULL;
1311 coalition_t coal = COALITION_NULL;
1312 pid_t pid_list[MAX_XPC_SERVICE_PIDS];
1313 unsigned int ntasks = 0;
1314
1315 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0);
1316
1317 proc_list_lock();
1318
1319 if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) {
1320 /*
1321 * Freezer is already full but we are here and so let's
1322 * try to refreeze any processes we might have thawed
1323 * in the past and push out their compressed state out.
1324 */
1325 refreeze_processes = TRUE;
1326 band = (unsigned int) memorystatus_freeze_jetsam_band;
1327 }
1328
1329 freeze_process:
1330
1331 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
1332 while (next_p) {
1333 kern_return_t kr;
1334 uint32_t purgeable, wired, clean, dirty, shared;
1335 uint32_t max_pages = 0;
1336 int freezer_error_code = 0;
1337
1338 p = next_p;
1339
1340 if (coal == NULL) {
1341 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1342 } else {
1343 /*
1344 * We have frozen a coalition leader and now are
1345 * dealing with its XPC services. We get our
1346 * next_p for each XPC service from the pid_list
1347 * acquired after a successful task_freeze call
1348 * on the coalition leader.
1349 */
1350
1351 if (ntasks > 0) {
1352 coal_xpc_pid = pid_list[--ntasks];
1353 next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */);
1354 /*
1355 * We grab a reference when we are about to freeze the process. So, drop
1356 * the reference that proc_findinternal() grabbed for us.
1357 * We also have the proc_list_lock and so this process is stable.
1358 */
1359 if (next_p) {
1360 proc_rele_locked(next_p);
1361 }
1362 } else {
1363 next_p = NULL;
1364 }
1365 }
1366
1367 aPid = p->p_pid;
1368
1369 if (p->p_memstat_effectivepriority != (int32_t) band) {
1370 /*
1371 * We shouldn't be freezing processes outside the
1372 * prescribed band.
1373 */
1374 break;
1375 }
1376
1377 /* Ensure the process is eligible for (re-)freezing */
1378 if (refreeze_processes) {
1379 /*
1380 * Has to have been frozen once before.
1381 */
1382 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
1383 continue;
1384 }
1385
1386 /*
1387 * Has to have been resumed once before.
1388 */
1389 if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == FALSE) {
1390 continue;
1391 }
1392
1393 /*
1394 * Not currently being looked at for something.
1395 */
1396 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1397 continue;
1398 }
1399
1400 /*
1401 * We are going to try and refreeze and so re-evaluate
1402 * the process. We don't want to double count the shared
1403 * memory. So deduct the old snapshot here.
1404 */
1405 memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
1406 p->p_memstat_freeze_sharedanon_pages = 0;
1407
1408 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1409 memorystatus_refreeze_eligible_count--;
1410 } else {
1411 if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) {
1412 continue; // with lock held
1413 }
1414 }
1415
1416 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1417 /*
1418 * Freezer backed by the compressor and swap file(s)
1419 * will hold compressed data.
1420 */
1421
1422 max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining);
1423 } else {
1424 /*
1425 * We only have the compressor pool.
1426 */
1427 max_pages = UINT32_MAX - 1;
1428 }
1429
1430 /* Mark as locked temporarily to avoid kill */
1431 p->p_memstat_state |= P_MEMSTAT_LOCKED;
1432
1433 p = proc_ref_locked(p);
1434 if (!p) {
1435 break;
1436 }
1437
1438 proc_list_unlock();
1439
1440 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
1441 memorystatus_available_pages, 0, 0, 0, 0);
1442
1443 kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */);
1444
1445 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
1446 memorystatus_available_pages, aPid, 0, 0, 0);
1447
1448 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
1449 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n",
1450 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"),
1451 memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared);
1452
1453 proc_list_lock();
1454
1455 /* Success? */
1456 if (KERN_SUCCESS == kr) {
1457 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
1458
1459 p->p_memstat_freeze_sharedanon_pages += shared;
1460
1461 memorystatus_frozen_shared_mb += shared;
1462
1463 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) {
1464 p->p_memstat_state |= P_MEMSTAT_FROZEN;
1465 memorystatus_frozen_count++;
1466 }
1467
1468 p->p_memstat_frozen_count++;
1469
1470 /*
1471 * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process
1472 * to its higher jetsam band.
1473 */
1474 proc_list_unlock();
1475
1476 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1477
1478 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1479 ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE);
1480
1481 if (ret) {
1482 printf("Elevating the frozen process failed with %d\n", ret);
1483 /* not fatal */
1484 ret = 0;
1485 }
1486
1487 proc_list_lock();
1488
1489 /* Update stats */
1490 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1491 throttle_intervals[i].pageouts += dirty;
1492 }
1493 } else {
1494 proc_list_lock();
1495 }
1496
1497 memorystatus_freeze_pageouts += dirty;
1498
1499 if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) {
1500 /*
1501 * Add some eviction logic here? At some point should we
1502 * jetsam a process to get back its swap space so that we
1503 * can freeze a more eligible process at this moment in time?
1504 */
1505 }
1506
1507 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
1508 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n",
1509 refreeze_processes? "re" : "", (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, refreeze_processes? "Re" : "", dirty);
1510
1511 /* Return KERN_SUCCESS */
1512 ret = kr;
1513
1514 /*
1515 * We froze a process successfully. We can stop now
1516 * and see if that helped if this process isn't part
1517 * of a coalition.
1518 *
1519 * Else:
1520 * - if it is a leader, get the list of XPC services
1521 * that need to be frozen.
1522 * - if it is a XPC service whose leader was frozen
1523 * here, continue on to the next XPC service in the list.
1524 */
1525
1526 if (coal == NULL) {
1527 curr_task = proc_task(p);
1528 coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM);
1529 if (coalition_is_leader(curr_task, coal)) {
1530 ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC,
1531 COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS);
1532
1533 if (ntasks > MAX_XPC_SERVICE_PIDS) {
1534 ntasks = MAX_XPC_SERVICE_PIDS;
1535 }
1536 }
1537
1538 next_p = NULL;
1539
1540 if (ntasks > 0) {
1541 /*
1542 * Start off with our first next_p in this list.
1543 */
1544 coal_xpc_pid = pid_list[--ntasks];
1545 next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */);
1546
1547 /*
1548 * We grab a reference when we are about to freeze the process. So drop
1549 * the reference that proc_findinternal() grabbed for us.
1550 * We also have the proc_list_lock and so this process is stable.
1551 */
1552 if (next_p) {
1553 proc_rele_locked(next_p);
1554 }
1555 }
1556 }
1557
1558 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1559 wakeup(&p->p_memstat_state);
1560 proc_rele_locked(p);
1561
1562 if (coal && next_p) {
1563 continue;
1564 }
1565
1566 /*
1567 * No coalition leader was frozen. So we don't
1568 * need to evaluate any XPC services.
1569 *
1570 * OR
1571 *
1572 * We have frozen all eligible XPC services for
1573 * the current coalition leader.
1574 *
1575 * Either way, we can break here and see if freezing
1576 * helped.
1577 */
1578
1579 break;
1580 } else {
1581 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
1582 wakeup(&p->p_memstat_state);
1583
1584 if (refreeze_processes == TRUE) {
1585 if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) ||
1586 (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) {
1587 /*
1588 * Keeping this prior-frozen process in this high band when
1589 * we failed to re-freeze it due to bad shared memory usage
1590 * could cause excessive pressure on the lower bands.
1591 * We need to demote it for now. It'll get re-evaluated next
1592 * time because we don't set the P_MEMSTAT_FREEZE_IGNORE
1593 * bit.
1594 */
1595
1596 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1597 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1598 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE);
1599 }
1600 } else {
1601 p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE;
1602 }
1603
1604 char reason[128];
1605 if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) {
1606 strlcpy(reason, "too much shared memory", 128);
1607 }
1608
1609 if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) {
1610 strlcpy(reason, "low private-shared pages ratio", 128);
1611 }
1612
1613 if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) {
1614 strlcpy(reason, "no compressor space", 128);
1615 }
1616
1617 if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) {
1618 strlcpy(reason, "no swap space", 128);
1619 }
1620
1621 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (%s) pid %d [%s]...skipped (%s)\n",
1622 (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), reason);
1623
1624 proc_rele_locked(p);
1625
1626 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
1627 break;
1628 }
1629 }
1630 }
1631
1632 if ((ret == -1) &&
1633 (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD) &&
1634 (refreeze_processes == FALSE)) {
1635 /*
1636 * We failed to freeze a process from the IDLE
1637 * band AND we have some thawed processes
1638 * AND haven't tried refreezing as yet.
1639 * Let's try and re-freeze processes in the
1640 * frozen band that have been resumed in the past
1641 * and so have brought in state from disk.
1642 */
1643
1644 band = (unsigned int) memorystatus_freeze_jetsam_band;
1645
1646 refreeze_processes = TRUE;
1647
1648 goto freeze_process;
1649 }
1650
1651 proc_list_unlock();
1652
1653 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages, aPid, 0, 0, 0);
1654
1655 return ret;
1656 }
1657
1658 static inline boolean_t
1659 memorystatus_can_freeze_processes(void)
1660 {
1661 boolean_t ret;
1662
1663 proc_list_lock();
1664
1665 if (memorystatus_suspended_count) {
1666 memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT);
1667
1668 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
1669 ret = TRUE;
1670 } else {
1671 ret = FALSE;
1672 }
1673 } else {
1674 ret = FALSE;
1675 }
1676
1677 proc_list_unlock();
1678
1679 return ret;
1680 }
1681
1682 static boolean_t
1683 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
1684 {
1685 boolean_t can_freeze = TRUE;
1686
1687 /* Only freeze if we're sufficiently low on memory; this holds off freeze right
1688 * after boot, and is generally is a no-op once we've reached steady state. */
1689 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
1690 return FALSE;
1691 }
1692
1693 /* Check minimum suspended process threshold. */
1694 if (!memorystatus_can_freeze_processes()) {
1695 return FALSE;
1696 }
1697 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
1698
1699 if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
1700 /*
1701 * In-core compressor used for freezing WITHOUT on-disk swap support.
1702 */
1703 if (vm_compressor_low_on_space()) {
1704 if (*memorystatus_freeze_swap_low) {
1705 *memorystatus_freeze_swap_low = TRUE;
1706 }
1707
1708 can_freeze = FALSE;
1709 } else {
1710 if (*memorystatus_freeze_swap_low) {
1711 *memorystatus_freeze_swap_low = FALSE;
1712 }
1713
1714 can_freeze = TRUE;
1715 }
1716 } else {
1717 /*
1718 * Freezing WITH on-disk swap support.
1719 *
1720 * In-core compressor fronts the swap.
1721 */
1722 if (vm_swap_low_on_space()) {
1723 if (*memorystatus_freeze_swap_low) {
1724 *memorystatus_freeze_swap_low = TRUE;
1725 }
1726
1727 can_freeze = FALSE;
1728 }
1729 }
1730
1731 return can_freeze;
1732 }
1733
1734 /*
1735 * This function evaluates if the currently frozen processes deserve
1736 * to stay in the higher jetsam band. There are 2 modes:
1737 * - 'force one == TRUE': (urgent mode)
1738 * We are out of budget and can't refreeze a process. The process's
1739 * state, if it was resumed, will stay in compressed memory. If we let it
1740 * remain up in the higher frozen jetsam band, it'll put a lot of pressure on
1741 * the lower bands. So we force-demote the least-recently-used-and-thawed
1742 * process.
1743 *
1744 * - 'force_one == FALSE': (normal mode)
1745 * If the # of thaws of a process is below our threshold, then we
1746 * will demote that process into the IDLE band.
1747 * We don't immediately kill the process here because it already has
1748 * state on disk and so it might be worth giving it another shot at
1749 * getting thawed/resumed and used.
1750 */
1751 static void
1752 memorystatus_demote_frozen_processes(boolean_t force_one)
1753 {
1754 unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band;
1755 unsigned int demoted_proc_count = 0;
1756 proc_t p = PROC_NULL, next_p = PROC_NULL;
1757 /* We demote to IDLE unless someone has asserted a higher priority on this process. */
1758 int maxpriority = JETSAM_PRIORITY_IDLE;
1759
1760 proc_list_lock();
1761
1762 if (memorystatus_freeze_enabled == FALSE) {
1763 /*
1764 * Freeze has been disabled likely to
1765 * reclaim swap space. So don't change
1766 * any state on the frozen processes.
1767 */
1768 proc_list_unlock();
1769 return;
1770 }
1771
1772 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
1773 while (next_p) {
1774 p = next_p;
1775 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
1776
1777 if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
1778 continue;
1779 }
1780
1781 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
1782 continue;
1783 }
1784
1785 if (force_one == TRUE) {
1786 if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == 0) {
1787 /*
1788 * This process hasn't been thawed recently and so most of
1789 * its state sits on NAND and so we skip it -- jetsamming it
1790 * won't help with memory pressure.
1791 */
1792 continue;
1793 }
1794 } else {
1795 if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) {
1796 /*
1797 * This process has met / exceeded our thaw count demotion threshold
1798 * and so we let it live in the higher bands.
1799 */
1800 continue;
1801 }
1802 }
1803
1804 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1805 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1806
1807 maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority);
1808 memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE);
1809 #if DEVELOPMENT || DEBUG
1810 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_demote_frozen_process(%s) pid %d [%s]",
1811 (force_one ? "urgent" : "normal"), (p ? p->p_pid : -1), ((p && *p->p_name) ? p->p_name : "unknown"));
1812 #endif /* DEVELOPMENT || DEBUG */
1813
1814 /*
1815 * The freezer thread will consider this a normal app to be frozen
1816 * because it is in the IDLE band. So we don't need the
1817 * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed
1818 * we'll correctly count it as eligible for re-freeze again.
1819 *
1820 * We don't drop the frozen count because this process still has
1821 * state on disk. So there's a chance it gets resumed and then it
1822 * should land in the higher jetsam band. For that it needs to
1823 * remain marked frozen.
1824 */
1825 if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) {
1826 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
1827 memorystatus_refreeze_eligible_count--;
1828 }
1829
1830 demoted_proc_count++;
1831
1832 if ((force_one == TRUE) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) {
1833 break;
1834 }
1835 }
1836
1837 if (force_one == FALSE) {
1838 /*
1839 * We use this counter to track daily thaws.
1840 * So we only reset it to 0 under the normal
1841 * mode.
1842 */
1843 memorystatus_thaw_count = 0;
1844 }
1845
1846 proc_list_unlock();
1847 }
1848
1849
1850 /*
1851 * This function will do 4 things:
1852 *
1853 * 1) check to see if we are currently in a degraded freezer mode, and if so:
1854 * - check to see if our window has expired and we should exit this mode, OR,
1855 * - return a budget based on the degraded throttle window's max. pageouts vs current pageouts.
1856 *
1857 * 2) check to see if we are in a NEW normal window and update the normal throttle window's params.
1858 *
1859 * 3) check what the current normal window allows for a budget.
1860 *
1861 * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below
1862 * what we would normally expect, then we are running low on our daily budget and need to enter
1863 * degraded perf. mode.
1864 */
1865
1866 static void
1867 memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed)
1868 {
1869 clock_sec_t sec;
1870 clock_nsec_t nsec;
1871 mach_timespec_t ts;
1872
1873 unsigned int freeze_daily_pageouts_max = 0;
1874
1875 #if DEVELOPMENT || DEBUG
1876 if (!memorystatus_freeze_throttle_enabled) {
1877 /*
1878 * No throttling...we can use the full budget everytime.
1879 */
1880 *budget_pages_allowed = UINT64_MAX;
1881 return;
1882 }
1883 #endif
1884
1885 clock_get_system_nanotime(&sec, &nsec);
1886 ts.tv_sec = sec;
1887 ts.tv_nsec = nsec;
1888
1889 struct throttle_interval_t *interval = NULL;
1890
1891 if (memorystatus_freeze_degradation == TRUE) {
1892 interval = degraded_throttle_window;
1893
1894 if (CMP_MACH_TIMESPEC(&ts, &interval->ts) >= 0) {
1895 memorystatus_freeze_degradation = FALSE;
1896 interval->pageouts = 0;
1897 interval->max_pageouts = 0;
1898 } else {
1899 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
1900 }
1901 }
1902
1903 interval = normal_throttle_window;
1904
1905 if (CMP_MACH_TIMESPEC(&ts, &interval->ts) >= 0) {
1906 /*
1907 * New throttle window.
1908 * Rollover any unused budget.
1909 * Also ask the storage layer what the new budget needs to be.
1910 */
1911 uint64_t freeze_daily_budget = 0;
1912 unsigned int daily_budget_pageouts = 0;
1913
1914 if (vm_swap_max_budget(&freeze_daily_budget)) {
1915 memorystatus_freeze_daily_mb_max = (freeze_daily_budget / (1024 * 1024));
1916 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max);
1917 }
1918
1919 freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE);
1920
1921 daily_budget_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * freeze_daily_pageouts_max) / NORMAL_WINDOW_MINS));
1922 interval->max_pageouts = (interval->max_pageouts - interval->pageouts) + daily_budget_pageouts;
1923
1924 interval->ts.tv_sec = interval->mins * 60;
1925 interval->ts.tv_nsec = 0;
1926 ADD_MACH_TIMESPEC(&interval->ts, &ts);
1927 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
1928 if (interval->pageouts > interval->max_pageouts) {
1929 interval->pageouts -= interval->max_pageouts;
1930 } else {
1931 interval->pageouts = 0;
1932 }
1933 *budget_pages_allowed = interval->max_pageouts;
1934
1935 memorystatus_demote_frozen_processes(FALSE); /* normal mode...don't force a demotion */
1936 } else {
1937 /*
1938 * Current throttle window.
1939 * Deny freezing if we have no budget left.
1940 * Try graceful degradation if we are within 25% of:
1941 * - the daily budget, and
1942 * - the current budget left is below our normal budget expectations.
1943 */
1944
1945 #if DEVELOPMENT || DEBUG
1946 /*
1947 * This can only happen in the INTERNAL configs because we allow modifying the daily budget for testing.
1948 */
1949
1950 if (freeze_daily_pageouts_max > interval->max_pageouts) {
1951 /*
1952 * We just bumped the daily budget. Re-evaluate our normal window params.
1953 */
1954 interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * freeze_daily_pageouts_max) / NORMAL_WINDOW_MINS));
1955 memorystatus_freeze_degradation = FALSE; //we'll re-evaluate this below...
1956 }
1957 #endif /* DEVELOPMENT || DEBUG */
1958
1959 if (memorystatus_freeze_degradation == FALSE) {
1960 if (interval->pageouts >= interval->max_pageouts) {
1961 *budget_pages_allowed = 0;
1962 } else {
1963 int budget_left = interval->max_pageouts - interval->pageouts;
1964 int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100;
1965
1966 mach_timespec_t time_left = {0, 0};
1967
1968 time_left.tv_sec = interval->ts.tv_sec;
1969 time_left.tv_nsec = 0;
1970
1971 SUB_MACH_TIMESPEC(&time_left, &ts);
1972
1973 if (budget_left <= budget_threshold) {
1974 /*
1975 * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration.
1976 * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full
1977 * daily pageout budget.
1978 */
1979
1980 unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS;
1981 unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS;
1982
1983 /*
1984 * The current rate of pageouts is below what we would expect for
1985 * the normal rate i.e. we have below normal budget left and so...
1986 */
1987
1988 if (current_budget_rate_allowed < normal_budget_rate_allowed) {
1989 memorystatus_freeze_degradation = TRUE;
1990 degraded_throttle_window->max_pageouts = current_budget_rate_allowed;
1991 degraded_throttle_window->pageouts = 0;
1992
1993 /*
1994 * Switch over to the degraded throttle window so the budget
1995 * doled out is based on that window.
1996 */
1997 interval = degraded_throttle_window;
1998 }
1999 }
2000
2001 *budget_pages_allowed = interval->max_pageouts - interval->pageouts;
2002 }
2003 }
2004 }
2005
2006 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
2007 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60,
2008 interval->throttle ? "on" : "off");
2009 }
2010
2011 static void
2012 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2013 {
2014 static boolean_t memorystatus_freeze_swap_low = FALSE;
2015
2016 lck_mtx_lock(&freezer_mutex);
2017
2018 if (memorystatus_freeze_enabled) {
2019 if ((memorystatus_frozen_count < memorystatus_frozen_processes_max) ||
2020 (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD)) {
2021 if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2022 /* Only freeze if we've not exceeded our pageout budgets.*/
2023 memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining);
2024
2025 if (memorystatus_freeze_budget_pages_remaining) {
2026 memorystatus_freeze_top_process();
2027 } else {
2028 memorystatus_demote_frozen_processes(TRUE); /* urgent mode..force one demotion */
2029 }
2030 }
2031 }
2032 }
2033
2034 /*
2035 * We use memorystatus_apps_idle_delay_time because if/when we adopt aging for applications,
2036 * it'll tie neatly into running the freezer once we age an application.
2037 *
2038 * Till then, it serves as a good interval that can be tuned via a sysctl too.
2039 */
2040 memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + memorystatus_apps_idle_delay_time;
2041
2042 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2043 lck_mtx_unlock(&freezer_mutex);
2044
2045 thread_block((thread_continue_t) memorystatus_freeze_thread);
2046 }
2047
2048 boolean_t
2049 memorystatus_freeze_thread_should_run(void)
2050 {
2051 /*
2052 * No freezer_mutex held here...see why near call-site
2053 * within memorystatus_pages_update().
2054 */
2055
2056 boolean_t should_run = FALSE;
2057
2058 if (memorystatus_freeze_enabled == FALSE) {
2059 goto out;
2060 }
2061
2062 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2063 goto out;
2064 }
2065
2066 if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max) &&
2067 (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD)) {
2068 goto out;
2069 }
2070
2071 if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) {
2072 goto out;
2073 }
2074
2075 uint64_t curr_time = mach_absolute_time();
2076
2077 if (curr_time < memorystatus_freezer_thread_next_run_ts) {
2078 goto out;
2079 }
2080
2081 should_run = TRUE;
2082
2083 out:
2084 return should_run;
2085 }
2086
2087 int
2088 memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable)
2089 {
2090 proc_t p = PROC_NULL;
2091
2092 if (pid == 0) {
2093 return EINVAL;
2094 }
2095
2096 p = proc_find(pid);
2097 if (!p) {
2098 return ESRCH;
2099 }
2100
2101 /*
2102 * Only allow this on the current proc for now.
2103 * We can check for privileges and allow targeting another process in the future.
2104 */
2105 if (p != current_proc()) {
2106 proc_rele(p);
2107 return EPERM;
2108 }
2109
2110 proc_list_lock();
2111 *is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1);
2112 proc_rele_locked(p);
2113 proc_list_unlock();
2114
2115 return 0;
2116 }
2117
2118 int
2119 memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable)
2120 {
2121 proc_t p = PROC_NULL;
2122
2123 if (pid == 0) {
2124 return EINVAL;
2125 }
2126
2127 /*
2128 * To enable freezable status, you need to be root or an entitlement.
2129 */
2130 if (is_freezable &&
2131 !kauth_cred_issuser(kauth_cred_get()) &&
2132 !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) {
2133 return EPERM;
2134 }
2135
2136 p = proc_find(pid);
2137 if (!p) {
2138 return ESRCH;
2139 }
2140
2141 /*
2142 * A process can change its own status. A coalition leader can
2143 * change the status of coalition members.
2144 */
2145 if (p != current_proc()) {
2146 coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM);
2147 if (!coalition_is_leader(proc_task(current_proc()), coal)) {
2148 proc_rele(p);
2149 return EPERM;
2150 }
2151 }
2152
2153 proc_list_lock();
2154 if (is_freezable == FALSE) {
2155 /* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */
2156 p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
2157 printf("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n",
2158 p->p_pid, (*p->p_name ? p->p_name : "unknown"));
2159 } else {
2160 p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
2161 printf("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n",
2162 p->p_pid, (*p->p_name ? p->p_name : "unknown"));
2163 }
2164 proc_rele_locked(p);
2165 proc_list_unlock();
2166
2167 return 0;
2168 }
2169
2170 static int
2171 sysctl_memorystatus_do_fastwake_warmup_all SYSCTL_HANDLER_ARGS
2172 {
2173 #pragma unused(oidp, arg1, arg2)
2174
2175 if (!req->newptr) {
2176 return EINVAL;
2177 }
2178
2179 /* Need to be root or have entitlement */
2180 if (!kauth_cred_issuser(kauth_cred_get()) && !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) {
2181 return EPERM;
2182 }
2183
2184 if (memorystatus_freeze_enabled == FALSE) {
2185 return ENOTSUP;
2186 }
2187
2188 do_fastwake_warmup_all();
2189
2190 return 0;
2191 }
2192
2193 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
2194 0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", "");
2195
2196 #endif /* CONFIG_FREEZE */