]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_memorystatus.c
xnu-6153.11.26.tar.gz
[apple/xnu.git] / bsd / kern / kern_memorystatus.c
1 /*
2 * Copyright (c) 2006-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <kern/policy_internal.h>
39 #include <kern/thread_group.h>
40
41 #include <IOKit/IOBSD.h>
42
43 #include <corpses/task_corpse.h>
44 #include <libkern/libkern.h>
45 #include <mach/coalition.h>
46 #include <mach/mach_time.h>
47 #include <mach/task.h>
48 #include <mach/host_priv.h>
49 #include <mach/mach_host.h>
50 #include <os/log.h>
51 #include <pexpert/pexpert.h>
52 #include <sys/coalition.h>
53 #include <sys/kern_event.h>
54 #include <sys/proc.h>
55 #include <sys/proc_info.h>
56 #include <sys/reason.h>
57 #include <sys/signal.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/sysproto.h>
61 #include <sys/wait.h>
62 #include <sys/tree.h>
63 #include <sys/priv.h>
64 #include <vm/vm_pageout.h>
65 #include <vm/vm_protos.h>
66 #include <mach/machine/sdt.h>
67 #include <libkern/section_keywords.h>
68 #include <stdatomic.h>
69
70 #if CONFIG_FREEZE
71 #include <vm/vm_map.h>
72 #endif /* CONFIG_FREEZE */
73
74 #include <sys/kern_memorystatus.h>
75 #include <sys/kern_memorystatus_freeze.h>
76 #include <sys/kern_memorystatus_notify.h>
77
78 /* For logging clarity */
79 static const char *memorystatus_kill_cause_name[] = {
80 "", /* kMemorystatusInvalid */
81 "jettisoned", /* kMemorystatusKilled */
82 "highwater", /* kMemorystatusKilledHiwat */
83 "vnode-limit", /* kMemorystatusKilledVnodes */
84 "vm-pageshortage", /* kMemorystatusKilledVMPageShortage */
85 "proc-thrashing", /* kMemorystatusKilledProcThrashing */
86 "fc-thrashing", /* kMemorystatusKilledFCThrashing */
87 "per-process-limit", /* kMemorystatusKilledPerProcessLimit */
88 "disk-space-shortage", /* kMemorystatusKilledDiskSpaceShortage */
89 "idle-exit", /* kMemorystatusKilledIdleExit */
90 "zone-map-exhaustion", /* kMemorystatusKilledZoneMapExhaustion */
91 "vm-compressor-thrashing", /* kMemorystatusKilledVMCompressorThrashing */
92 "vm-compressor-space-shortage", /* kMemorystatusKilledVMCompressorSpaceShortage */
93 };
94
95 static const char *
96 memorystatus_priority_band_name(int32_t priority)
97 {
98 switch (priority) {
99 case JETSAM_PRIORITY_FOREGROUND:
100 return "FOREGROUND";
101 case JETSAM_PRIORITY_AUDIO_AND_ACCESSORY:
102 return "AUDIO_AND_ACCESSORY";
103 case JETSAM_PRIORITY_CONDUCTOR:
104 return "CONDUCTOR";
105 case JETSAM_PRIORITY_DRIVER_APPLE:
106 return "DRIVER_APPLE";
107 case JETSAM_PRIORITY_HOME:
108 return "HOME";
109 case JETSAM_PRIORITY_EXECUTIVE:
110 return "EXECUTIVE";
111 case JETSAM_PRIORITY_IMPORTANT:
112 return "IMPORTANT";
113 case JETSAM_PRIORITY_CRITICAL:
114 return "CRITICAL";
115 }
116
117 return "?";
118 }
119
120 /* Does cause indicate vm or fc thrashing? */
121 static boolean_t
122 is_reason_thrashing(unsigned cause)
123 {
124 switch (cause) {
125 case kMemorystatusKilledFCThrashing:
126 case kMemorystatusKilledVMCompressorThrashing:
127 case kMemorystatusKilledVMCompressorSpaceShortage:
128 return TRUE;
129 default:
130 return FALSE;
131 }
132 }
133
134 /* Is the zone map almost full? */
135 static boolean_t
136 is_reason_zone_map_exhaustion(unsigned cause)
137 {
138 if (cause == kMemorystatusKilledZoneMapExhaustion) {
139 return TRUE;
140 }
141 return FALSE;
142 }
143
144 /*
145 * Returns the current zone map size and capacity to include in the jetsam snapshot.
146 * Defined in zalloc.c
147 */
148 extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity);
149
150 /*
151 * Returns the name of the largest zone and its size to include in the jetsam snapshot.
152 * Defined in zalloc.c
153 */
154 extern void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size);
155
156 /*
157 * Active / Inactive limit support
158 * proc list must be locked
159 *
160 * The SET_*** macros are used to initialize a limit
161 * for the first time.
162 *
163 * The CACHE_*** macros are use to cache the limit that will
164 * soon be in effect down in the ledgers.
165 */
166
167 #define SET_ACTIVE_LIMITS_LOCKED(p, limit, is_fatal) \
168 MACRO_BEGIN \
169 (p)->p_memstat_memlimit_active = (limit); \
170 if (is_fatal) { \
171 (p)->p_memstat_state |= P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL; \
172 } else { \
173 (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL; \
174 } \
175 MACRO_END
176
177 #define SET_INACTIVE_LIMITS_LOCKED(p, limit, is_fatal) \
178 MACRO_BEGIN \
179 (p)->p_memstat_memlimit_inactive = (limit); \
180 if (is_fatal) { \
181 (p)->p_memstat_state |= P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL; \
182 } else { \
183 (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL; \
184 } \
185 MACRO_END
186
187 #define CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal) \
188 MACRO_BEGIN \
189 (p)->p_memstat_memlimit = (p)->p_memstat_memlimit_active; \
190 if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL) { \
191 (p)->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; \
192 is_fatal = TRUE; \
193 } else { \
194 (p)->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; \
195 is_fatal = FALSE; \
196 } \
197 MACRO_END
198
199 #define CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal) \
200 MACRO_BEGIN \
201 (p)->p_memstat_memlimit = (p)->p_memstat_memlimit_inactive; \
202 if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) { \
203 (p)->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; \
204 is_fatal = TRUE; \
205 } else { \
206 (p)->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; \
207 is_fatal = FALSE; \
208 } \
209 MACRO_END
210
211
212 /* General tunables */
213
214 unsigned long delta_percentage = 5;
215 unsigned long critical_threshold_percentage = 5;
216 // On embedded devices with more than 3GB of memory we lower the critical percentage.
217 uint64_t config_jetsam_large_memory_cutoff = 3UL * (1UL << 30);
218 unsigned long critical_threshold_percentage_larger_devices = 4;
219 unsigned long delta_percentage_larger_devices = 4;
220 unsigned long idle_offset_percentage = 5;
221 unsigned long pressure_threshold_percentage = 15;
222 unsigned long policy_more_free_offset_percentage = 5;
223 unsigned long sysproc_aging_aggr_threshold_percentage = 7;
224
225 /*
226 * default jetsam snapshot support
227 */
228 memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot;
229 memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot_copy;
230 unsigned int memorystatus_jetsam_snapshot_count = 0;
231 unsigned int memorystatus_jetsam_snapshot_copy_count = 0;
232 unsigned int memorystatus_jetsam_snapshot_max = 0;
233 unsigned int memorystatus_jetsam_snapshot_size = 0;
234 uint64_t memorystatus_jetsam_snapshot_last_timestamp = 0;
235 uint64_t memorystatus_jetsam_snapshot_timeout = 0;
236
237 /* General memorystatus stuff */
238
239 uint64_t memorystatus_sysprocs_idle_delay_time = 0;
240 uint64_t memorystatus_apps_idle_delay_time = 0;
241
242 static lck_grp_attr_t *memorystatus_jetsam_fg_band_lock_grp_attr;
243 static lck_grp_t *memorystatus_jetsam_fg_band_lock_grp;
244 lck_mtx_t memorystatus_jetsam_fg_band_lock;
245
246 /* Idle guard handling */
247
248 static int32_t memorystatus_scheduled_idle_demotions_sysprocs = 0;
249 static int32_t memorystatus_scheduled_idle_demotions_apps = 0;
250
251 static void memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2);
252 static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state);
253 static void memorystatus_reschedule_idle_demotion_locked(void);
254 int memorystatus_update_priority_for_appnap(proc_t p, boolean_t is_appnap);
255 vm_pressure_level_t convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t);
256 boolean_t is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t);
257 void memorystatus_klist_reset_all_for_level(vm_pressure_level_t pressure_level_to_clear);
258 void memorystatus_send_low_swap_note(void);
259 int memorystatus_get_proccnt_upto_priority(int32_t max_bucket_index);
260 boolean_t memorystatus_kill_elevated_process(uint32_t cause, os_reason_t jetsam_reason, unsigned int band, int aggr_count,
261 uint32_t *errors, uint64_t *memory_reclaimed);
262 uint64_t memorystatus_available_memory_internal(proc_t p);
263
264 unsigned int memorystatus_level = 0;
265 static int memorystatus_list_count = 0;
266 memstat_bucket_t memstat_bucket[MEMSTAT_BUCKET_COUNT];
267 static thread_call_t memorystatus_idle_demotion_call;
268 uint64_t memstat_idle_demotion_deadline = 0;
269 int system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND1;
270 int applications_aging_band = JETSAM_PRIORITY_IDLE;
271
272 #define isProcessInAgingBands(p) ((isSysProc(p) && system_procs_aging_band && (p->p_memstat_effectivepriority == system_procs_aging_band)) || (isApp(p) && applications_aging_band && (p->p_memstat_effectivepriority == applications_aging_band)))
273
274 #define kJetsamAgingPolicyNone (0)
275 #define kJetsamAgingPolicyLegacy (1)
276 #define kJetsamAgingPolicySysProcsReclaimedFirst (2)
277 #define kJetsamAgingPolicyAppsReclaimedFirst (3)
278 #define kJetsamAgingPolicyMax kJetsamAgingPolicyAppsReclaimedFirst
279
280 unsigned int jetsam_aging_policy = kJetsamAgingPolicySysProcsReclaimedFirst;
281
282 extern int corpse_for_fatal_memkill;
283 extern uint64_t vm_purgeable_purge_task_owned(task_t task);
284 boolean_t memorystatus_allowed_vm_map_fork(task_t);
285 #if DEVELOPMENT || DEBUG
286 void memorystatus_abort_vm_map_fork(task_t);
287 #endif
288
289 /*
290 * Idle delay timeout factors for daemons based on relaunch behavior. Only used in
291 * kJetsamAgingPolicySysProcsReclaimedFirst aging policy.
292 */
293 #define kJetsamSysProcsIdleDelayTimeLowRatio (5)
294 #define kJetsamSysProcsIdleDelayTimeMedRatio (2)
295 #define kJetsamSysProcsIdleDelayTimeHighRatio (1)
296 static_assert(kJetsamSysProcsIdleDelayTimeLowRatio <= DEFERRED_IDLE_EXIT_TIME_SECS, "sysproc idle delay time for low relaunch daemons would be 0");
297
298 /*
299 * For the kJetsamAgingPolicySysProcsReclaimedFirst aging policy, treat apps as well
300 * behaved daemons for aging purposes.
301 */
302 #define kJetsamAppsIdleDelayTimeRatio (kJetsamSysProcsIdleDelayTimeLowRatio)
303
304 static uint64_t
305 memorystatus_sysprocs_idle_time(proc_t p)
306 {
307 /*
308 * The kJetsamAgingPolicySysProcsReclaimedFirst aging policy uses the relaunch behavior to
309 * determine the exact idle deferred time provided to the daemons. For all other aging
310 * policies, simply return the default aging idle time.
311 */
312 if (jetsam_aging_policy != kJetsamAgingPolicySysProcsReclaimedFirst) {
313 return memorystatus_sysprocs_idle_delay_time;
314 }
315
316 uint64_t idle_delay_time = 0;
317 /*
318 * For system processes, base the idle delay time on the
319 * jetsam relaunch behavior specified by launchd. The idea
320 * is to provide extra protection to the daemons which would
321 * relaunch immediately after jetsam.
322 */
323 switch (p->p_memstat_relaunch_flags) {
324 case P_MEMSTAT_RELAUNCH_UNKNOWN:
325 case P_MEMSTAT_RELAUNCH_LOW:
326 idle_delay_time = memorystatus_sysprocs_idle_delay_time / kJetsamSysProcsIdleDelayTimeLowRatio;
327 break;
328 case P_MEMSTAT_RELAUNCH_MED:
329 idle_delay_time = memorystatus_sysprocs_idle_delay_time / kJetsamSysProcsIdleDelayTimeMedRatio;
330 break;
331 case P_MEMSTAT_RELAUNCH_HIGH:
332 idle_delay_time = memorystatus_sysprocs_idle_delay_time / kJetsamSysProcsIdleDelayTimeHighRatio;
333 break;
334 default:
335 panic("Unknown relaunch flags on process!");
336 break;
337 }
338 return idle_delay_time;
339 }
340
341 static uint64_t
342 memorystatus_apps_idle_time(__unused proc_t p)
343 {
344 /*
345 * For kJetsamAgingPolicySysProcsReclaimedFirst, the Apps are considered as low
346 * relaunch candidates. So only provide limited protection to them. In the other
347 * aging policies, return the default aging idle time.
348 */
349 if (jetsam_aging_policy != kJetsamAgingPolicySysProcsReclaimedFirst) {
350 return memorystatus_apps_idle_delay_time;
351 }
352
353 return memorystatus_apps_idle_delay_time / kJetsamAppsIdleDelayTimeRatio;
354 }
355
356
357 #if 0
358
359 /* Keeping around for future use if we need a utility that can do this OR an app that needs a dynamic adjustment. */
360
361 static int
362 sysctl_set_jetsam_aging_policy SYSCTL_HANDLER_ARGS
363 {
364 #pragma unused(oidp, arg1, arg2)
365
366 int error = 0, val = 0;
367 memstat_bucket_t *old_bucket = 0;
368 int old_system_procs_aging_band = 0, new_system_procs_aging_band = 0;
369 int old_applications_aging_band = 0, new_applications_aging_band = 0;
370 proc_t p = NULL, next_proc = NULL;
371
372
373 error = sysctl_io_number(req, jetsam_aging_policy, sizeof(int), &val, NULL);
374 if (error || !req->newptr) {
375 return error;
376 }
377
378 if ((val < 0) || (val > kJetsamAgingPolicyMax)) {
379 printf("jetsam: ordering policy sysctl has invalid value - %d\n", val);
380 return EINVAL;
381 }
382
383 /*
384 * We need to synchronize with any potential adding/removal from aging bands
385 * that might be in progress currently. We use the proc_list_lock() just for
386 * consistency with all the routines dealing with 'aging' processes. We need
387 * a lighterweight lock.
388 */
389 proc_list_lock();
390
391 old_system_procs_aging_band = system_procs_aging_band;
392 old_applications_aging_band = applications_aging_band;
393
394 switch (val) {
395 case kJetsamAgingPolicyNone:
396 new_system_procs_aging_band = JETSAM_PRIORITY_IDLE;
397 new_applications_aging_band = JETSAM_PRIORITY_IDLE;
398 break;
399
400 case kJetsamAgingPolicyLegacy:
401 /*
402 * Legacy behavior where some daemons get a 10s protection once and only before the first clean->dirty->clean transition before going into IDLE band.
403 */
404 new_system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND1;
405 new_applications_aging_band = JETSAM_PRIORITY_IDLE;
406 break;
407
408 case kJetsamAgingPolicySysProcsReclaimedFirst:
409 new_system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND1;
410 new_applications_aging_band = JETSAM_PRIORITY_AGING_BAND2;
411 break;
412
413 case kJetsamAgingPolicyAppsReclaimedFirst:
414 new_system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND2;
415 new_applications_aging_band = JETSAM_PRIORITY_AGING_BAND1;
416 break;
417
418 default:
419 break;
420 }
421
422 if (old_system_procs_aging_band && (old_system_procs_aging_band != new_system_procs_aging_band)) {
423 old_bucket = &memstat_bucket[old_system_procs_aging_band];
424 p = TAILQ_FIRST(&old_bucket->list);
425
426 while (p) {
427 next_proc = TAILQ_NEXT(p, p_memstat_list);
428
429 if (isSysProc(p)) {
430 if (new_system_procs_aging_band == JETSAM_PRIORITY_IDLE) {
431 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
432 }
433
434 memorystatus_update_priority_locked(p, new_system_procs_aging_band, false, true);
435 }
436
437 p = next_proc;
438 continue;
439 }
440 }
441
442 if (old_applications_aging_band && (old_applications_aging_band != new_applications_aging_band)) {
443 old_bucket = &memstat_bucket[old_applications_aging_band];
444 p = TAILQ_FIRST(&old_bucket->list);
445
446 while (p) {
447 next_proc = TAILQ_NEXT(p, p_memstat_list);
448
449 if (isApp(p)) {
450 if (new_applications_aging_band == JETSAM_PRIORITY_IDLE) {
451 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
452 }
453
454 memorystatus_update_priority_locked(p, new_applications_aging_band, false, true);
455 }
456
457 p = next_proc;
458 continue;
459 }
460 }
461
462 jetsam_aging_policy = val;
463 system_procs_aging_band = new_system_procs_aging_band;
464 applications_aging_band = new_applications_aging_band;
465
466 proc_list_unlock();
467
468 return 0;
469 }
470
471 SYSCTL_PROC(_kern, OID_AUTO, set_jetsam_aging_policy, CTLTYPE_INT | CTLFLAG_RW,
472 0, 0, sysctl_set_jetsam_aging_policy, "I", "Jetsam Aging Policy");
473 #endif /*0*/
474
475 static int
476 sysctl_jetsam_set_sysprocs_idle_delay_time SYSCTL_HANDLER_ARGS
477 {
478 #pragma unused(oidp, arg1, arg2)
479
480 int error = 0, val = 0, old_time_in_secs = 0;
481 uint64_t old_time_in_ns = 0;
482
483 absolutetime_to_nanoseconds(memorystatus_sysprocs_idle_delay_time, &old_time_in_ns);
484 old_time_in_secs = old_time_in_ns / NSEC_PER_SEC;
485
486 error = sysctl_io_number(req, old_time_in_secs, sizeof(int), &val, NULL);
487 if (error || !req->newptr) {
488 return error;
489 }
490
491 if ((val < 0) || (val > INT32_MAX)) {
492 printf("jetsam: new idle delay interval has invalid value.\n");
493 return EINVAL;
494 }
495
496 nanoseconds_to_absolutetime((uint64_t)val * NSEC_PER_SEC, &memorystatus_sysprocs_idle_delay_time);
497
498 return 0;
499 }
500
501 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_sysprocs_idle_delay_time, CTLTYPE_INT | CTLFLAG_RW,
502 0, 0, sysctl_jetsam_set_sysprocs_idle_delay_time, "I", "Aging window for system processes");
503
504
505 static int
506 sysctl_jetsam_set_apps_idle_delay_time SYSCTL_HANDLER_ARGS
507 {
508 #pragma unused(oidp, arg1, arg2)
509
510 int error = 0, val = 0, old_time_in_secs = 0;
511 uint64_t old_time_in_ns = 0;
512
513 absolutetime_to_nanoseconds(memorystatus_apps_idle_delay_time, &old_time_in_ns);
514 old_time_in_secs = old_time_in_ns / NSEC_PER_SEC;
515
516 error = sysctl_io_number(req, old_time_in_secs, sizeof(int), &val, NULL);
517 if (error || !req->newptr) {
518 return error;
519 }
520
521 if ((val < 0) || (val > INT32_MAX)) {
522 printf("jetsam: new idle delay interval has invalid value.\n");
523 return EINVAL;
524 }
525
526 nanoseconds_to_absolutetime((uint64_t)val * NSEC_PER_SEC, &memorystatus_apps_idle_delay_time);
527
528 return 0;
529 }
530
531 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_apps_idle_delay_time, CTLTYPE_INT | CTLFLAG_RW,
532 0, 0, sysctl_jetsam_set_apps_idle_delay_time, "I", "Aging window for applications");
533
534 SYSCTL_INT(_kern, OID_AUTO, jetsam_aging_policy, CTLTYPE_INT | CTLFLAG_RD, &jetsam_aging_policy, 0, "");
535
536 static unsigned int memorystatus_dirty_count = 0;
537
538 SYSCTL_INT(_kern, OID_AUTO, max_task_pmem, CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED, &max_task_footprint_mb, 0, "");
539
540 static int memorystatus_highwater_enabled = 1; /* Update the cached memlimit data. */
541 static boolean_t proc_jetsam_state_is_active_locked(proc_t);
542
543 #if __arm64__
544 #if CONFIG_MEMORYSTATUS
545 int legacy_footprint_bonus_mb = 50; /* This value was chosen after looking at the top 30 apps
546 * that needed the additional room in their footprint when
547 * the 'correct' accounting methods were applied to them.
548 */
549
550 #if DEVELOPMENT || DEBUG
551 SYSCTL_INT(_kern, OID_AUTO, legacy_footprint_bonus_mb, CTLFLAG_RW | CTLFLAG_LOCKED, &legacy_footprint_bonus_mb, 0, "");
552 #endif /* DEVELOPMENT || DEBUG */
553
554 void
555 memorystatus_act_on_legacy_footprint_entitlement(proc_t p, boolean_t footprint_increase)
556 {
557 int memlimit_mb_active = 0, memlimit_mb_inactive = 0;
558 boolean_t memlimit_active_is_fatal = FALSE, memlimit_inactive_is_fatal = 0, use_active_limit = FALSE;
559
560 if (p == NULL) {
561 return;
562 }
563
564 proc_list_lock();
565
566 if (p->p_memstat_memlimit_active > 0) {
567 memlimit_mb_active = p->p_memstat_memlimit_active;
568 } else if (p->p_memstat_memlimit_active == -1) {
569 memlimit_mb_active = max_task_footprint_mb;
570 } else {
571 /*
572 * Nothing to do for '0' which is
573 * a special value only used internally
574 * to test 'no limits'.
575 */
576 proc_list_unlock();
577 return;
578 }
579
580 if (p->p_memstat_memlimit_inactive > 0) {
581 memlimit_mb_inactive = p->p_memstat_memlimit_inactive;
582 } else if (p->p_memstat_memlimit_inactive == -1) {
583 memlimit_mb_inactive = max_task_footprint_mb;
584 } else {
585 /*
586 * Nothing to do for '0' which is
587 * a special value only used internally
588 * to test 'no limits'.
589 */
590 proc_list_unlock();
591 return;
592 }
593
594 if (footprint_increase) {
595 memlimit_mb_active += legacy_footprint_bonus_mb;
596 memlimit_mb_inactive += legacy_footprint_bonus_mb;
597 } else {
598 memlimit_mb_active -= legacy_footprint_bonus_mb;
599 if (memlimit_mb_active == max_task_footprint_mb) {
600 memlimit_mb_active = -1; /* reverting back to default system limit */
601 }
602
603 memlimit_mb_inactive -= legacy_footprint_bonus_mb;
604 if (memlimit_mb_inactive == max_task_footprint_mb) {
605 memlimit_mb_inactive = -1; /* reverting back to default system limit */
606 }
607 }
608
609 memlimit_active_is_fatal = (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL);
610 memlimit_inactive_is_fatal = (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL);
611
612 SET_ACTIVE_LIMITS_LOCKED(p, memlimit_mb_active, memlimit_active_is_fatal);
613 SET_INACTIVE_LIMITS_LOCKED(p, memlimit_mb_inactive, memlimit_inactive_is_fatal);
614
615 if (proc_jetsam_state_is_active_locked(p) == TRUE) {
616 use_active_limit = TRUE;
617 CACHE_ACTIVE_LIMITS_LOCKED(p, memlimit_active_is_fatal);
618 } else {
619 CACHE_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive_is_fatal);
620 }
621
622
623 if (memorystatus_highwater_enabled) {
624 task_set_phys_footprint_limit_internal(p->task,
625 (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1,
626 NULL, /*return old value */
627 use_active_limit, /*active limit?*/
628 (use_active_limit ? memlimit_active_is_fatal : memlimit_inactive_is_fatal));
629 }
630
631 proc_list_unlock();
632 }
633
634 #endif /* CONFIG_MEMORYSTATUS */
635 #endif /* __arm64__ */
636
637 #if CONFIG_EMBEDDED
638
639 SYSCTL_INT(_kern, OID_AUTO, memorystatus_level, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_level, 0, "");
640
641 #endif /* CONFIG_EMBEDDED */
642
643 int
644 memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret)
645 {
646 user_addr_t level = 0;
647
648 level = args->level;
649
650 if (copyout(&memorystatus_level, level, sizeof(memorystatus_level)) != 0) {
651 return EFAULT;
652 }
653
654 return 0;
655 }
656
657 static void memorystatus_thread(void *param __unused, wait_result_t wr __unused);
658
659 /* Memory Limits */
660
661 static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause, os_reason_t jetsam_reason);
662 static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause, os_reason_t jetsam_reason);
663
664
665 static int memorystatus_cmd_set_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval);
666
667 static int memorystatus_set_memlimit_properties(pid_t pid, memorystatus_memlimit_properties_t *entry);
668
669 static int memorystatus_cmd_get_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval);
670
671 static int memorystatus_cmd_get_memlimit_excess_np(pid_t pid, uint32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval);
672
673 static void memorystatus_get_memlimit_properties_internal(proc_t p, memorystatus_memlimit_properties_t *p_entry);
674 static int memorystatus_set_memlimit_properties_internal(proc_t p, memorystatus_memlimit_properties_t *p_entry);
675
676 int proc_get_memstat_priority(proc_t, boolean_t);
677
678 static boolean_t memorystatus_idle_snapshot = 0;
679
680 unsigned int memorystatus_delta = 0;
681
682 /* Jetsam Loop Detection */
683 static boolean_t memorystatus_jld_enabled = FALSE; /* Enable jetsam loop detection */
684 static uint32_t memorystatus_jld_eval_period_msecs = 0; /* Init pass sets this based on device memory size */
685 static int memorystatus_jld_eval_aggressive_count = 3; /* Raise the priority max after 'n' aggressive loops */
686 static int memorystatus_jld_eval_aggressive_priority_band_max = 15; /* Kill aggressively up through this band */
687
688 /*
689 * A FG app can request that the aggressive jetsam mechanism display some leniency in the FG band. This 'lenient' mode is described as:
690 * --- if aggressive jetsam kills an app in the FG band and gets back >=AGGRESSIVE_JETSAM_LENIENT_MODE_THRESHOLD memory, it will stop the aggressive march further into and up the jetsam bands.
691 *
692 * RESTRICTIONS:
693 * - Such a request is respected/acknowledged only once while that 'requesting' app is in the FG band i.e. if aggressive jetsam was
694 * needed and the 'lenient' mode was deployed then that's it for this special mode while the app is in the FG band.
695 *
696 * - If the app is still in the FG band and aggressive jetsam is needed again, there will be no stop-and-check the next time around.
697 *
698 * - Also, the transition of the 'requesting' app away from the FG band will void this special behavior.
699 */
700
701 #define AGGRESSIVE_JETSAM_LENIENT_MODE_THRESHOLD 25
702 boolean_t memorystatus_aggressive_jetsam_lenient_allowed = FALSE;
703 boolean_t memorystatus_aggressive_jetsam_lenient = FALSE;
704
705 #if DEVELOPMENT || DEBUG
706 /*
707 * Jetsam Loop Detection tunables.
708 */
709
710 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_period_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_jld_eval_period_msecs, 0, "");
711 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_aggressive_count, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_jld_eval_aggressive_count, 0, "");
712 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_aggressive_priority_band_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_jld_eval_aggressive_priority_band_max, 0, "");
713 #endif /* DEVELOPMENT || DEBUG */
714
715 static uint32_t kill_under_pressure_cause = 0;
716
717 /*
718 * snapshot support for memstats collected at boot.
719 */
720 static memorystatus_jetsam_snapshot_t memorystatus_at_boot_snapshot;
721
722 static void memorystatus_init_jetsam_snapshot_locked(memorystatus_jetsam_snapshot_t *od_snapshot, uint32_t ods_list_count);
723 static boolean_t memorystatus_init_jetsam_snapshot_entry_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry, uint64_t gencount);
724 static void memorystatus_update_jetsam_snapshot_entry_locked(proc_t p, uint32_t kill_cause, uint64_t killtime);
725
726 static void memorystatus_clear_errors(void);
727 static void memorystatus_get_task_phys_footprint_page_counts(task_t task,
728 uint64_t *internal_pages, uint64_t *internal_compressed_pages,
729 uint64_t *purgeable_nonvolatile_pages, uint64_t *purgeable_nonvolatile_compressed_pages,
730 uint64_t *alternate_accounting_pages, uint64_t *alternate_accounting_compressed_pages,
731 uint64_t *iokit_mapped_pages, uint64_t *page_table_pages);
732
733 static void memorystatus_get_task_memory_region_count(task_t task, uint64_t *count);
734
735 static uint32_t memorystatus_build_state(proc_t p);
736 //static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured);
737
738 static boolean_t memorystatus_kill_top_process(boolean_t any, boolean_t sort_flag, uint32_t cause, os_reason_t jetsam_reason, int32_t *priority,
739 uint32_t *errors, uint64_t *memory_reclaimed);
740 static boolean_t memorystatus_kill_processes_aggressive(uint32_t cause, int aggr_count, int32_t priority_max, uint32_t *errors, uint64_t *memory_reclaimed);
741 static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors, boolean_t *purged, uint64_t *memory_reclaimed);
742
743 static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause);
744
745 /* Priority Band Sorting Routines */
746 static int memorystatus_sort_bucket(unsigned int bucket_index, int sort_order);
747 static int memorystatus_sort_by_largest_coalition_locked(unsigned int bucket_index, int coal_sort_order);
748 static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index);
749 static int memorystatus_move_list_locked(unsigned int bucket_index, pid_t *pid_list, int list_sz);
750
751 /* qsort routines */
752 typedef int (*cmpfunc_t)(const void *a, const void *b);
753 extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
754 static int memstat_asc_cmp(const void *a, const void *b);
755
756 /* VM pressure */
757
758 extern unsigned int vm_page_free_count;
759 extern unsigned int vm_page_active_count;
760 extern unsigned int vm_page_inactive_count;
761 extern unsigned int vm_page_throttled_count;
762 extern unsigned int vm_page_purgeable_count;
763 extern unsigned int vm_page_wire_count;
764 #if CONFIG_SECLUDED_MEMORY
765 extern unsigned int vm_page_secluded_count;
766 extern unsigned int vm_page_secluded_count_over_target;
767 #endif /* CONFIG_SECLUDED_MEMORY */
768
769 /* Aggressive jetsam pages threshold for sysproc aging policy */
770 unsigned int memorystatus_sysproc_aging_aggr_pages = 0;
771
772 #if CONFIG_JETSAM
773 unsigned int memorystatus_available_pages = (unsigned int)-1;
774 unsigned int memorystatus_available_pages_pressure = 0;
775 unsigned int memorystatus_available_pages_critical = 0;
776 unsigned int memorystatus_available_pages_critical_base = 0;
777 unsigned int memorystatus_available_pages_critical_idle_offset = 0;
778
779 #if DEVELOPMENT || DEBUG
780 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_available_pages, 0, "");
781 #else
782 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, &memorystatus_available_pages, 0, "");
783 #endif /* DEVELOPMENT || DEBUG */
784
785 static unsigned int memorystatus_jetsam_policy = kPolicyDefault;
786 unsigned int memorystatus_policy_more_free_offset_pages = 0;
787 static void memorystatus_update_levels_locked(boolean_t critical_only);
788 static unsigned int memorystatus_thread_wasted_wakeup = 0;
789
790 /* Callback into vm_compressor.c to signal that thrashing has been mitigated. */
791 extern void vm_thrashing_jetsam_done(void);
792 static int memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit);
793 #if DEVELOPMENT || DEBUG
794 static inline uint32_t
795 roundToNearestMB(uint32_t in)
796 {
797 return (in + ((1 << 20) - 1)) >> 20;
798 }
799
800 static int memorystatus_cmd_increase_jetsam_task_limit(pid_t pid, uint32_t byte_increase);
801 #endif
802
803 int32_t max_kill_priority = JETSAM_PRIORITY_MAX;
804
805 #else /* CONFIG_JETSAM */
806
807 uint64_t memorystatus_available_pages = (uint64_t)-1;
808 uint64_t memorystatus_available_pages_pressure = (uint64_t)-1;
809 uint64_t memorystatus_available_pages_critical = (uint64_t)-1;
810
811 int32_t max_kill_priority = JETSAM_PRIORITY_IDLE;
812 #endif /* CONFIG_JETSAM */
813
814 #if DEVELOPMENT || DEBUG
815
816 lck_grp_attr_t *disconnect_page_mappings_lck_grp_attr;
817 lck_grp_t *disconnect_page_mappings_lck_grp;
818 static lck_mtx_t disconnect_page_mappings_mutex;
819
820 extern boolean_t kill_on_no_paging_space;
821 #endif /* DEVELOPMENT || DEBUG */
822
823
824 /* Debug */
825
826 extern struct knote *vm_find_knote_from_pid(pid_t, struct klist *);
827
828 #if DEVELOPMENT || DEBUG
829
830 static unsigned int memorystatus_debug_dump_this_bucket = 0;
831
832 static void
833 memorystatus_debug_dump_bucket_locked(unsigned int bucket_index)
834 {
835 proc_t p = NULL;
836 uint64_t bytes = 0;
837 int ledger_limit = 0;
838 unsigned int b = bucket_index;
839 boolean_t traverse_all_buckets = FALSE;
840
841 if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
842 traverse_all_buckets = TRUE;
843 b = 0;
844 } else {
845 traverse_all_buckets = FALSE;
846 b = bucket_index;
847 }
848
849 /*
850 * footprint reported in [pages / MB ]
851 * limits reported as:
852 * L-limit proc's Ledger limit
853 * C-limit proc's Cached limit, should match Ledger
854 * A-limit proc's Active limit
855 * IA-limit proc's Inactive limit
856 * F==Fatal, NF==NonFatal
857 */
858
859 printf("memorystatus_debug_dump ***START*(PAGE_SIZE_64=%llu)**\n", PAGE_SIZE_64);
860 printf("bucket [pid] [pages / MB] [state] [EP / RP / AP] dirty deadline [L-limit / C-limit / A-limit / IA-limit] name\n");
861 p = memorystatus_get_first_proc_locked(&b, traverse_all_buckets);
862 while (p) {
863 bytes = get_task_phys_footprint(p->task);
864 task_get_phys_footprint_limit(p->task, &ledger_limit);
865 printf("%2d [%5d] [%5lld /%3lldMB] 0x%-8x [%2d / %2d / %2d] 0x%-3x %10lld [%3d / %3d%s / %3d%s / %3d%s] %s\n",
866 b, p->p_pid,
867 (bytes / PAGE_SIZE_64), /* task's footprint converted from bytes to pages */
868 (bytes / (1024ULL * 1024ULL)), /* task's footprint converted from bytes to MB */
869 p->p_memstat_state, p->p_memstat_effectivepriority, p->p_memstat_requestedpriority, p->p_memstat_assertionpriority,
870 p->p_memstat_dirty, p->p_memstat_idledeadline,
871 ledger_limit,
872 p->p_memstat_memlimit,
873 (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"),
874 p->p_memstat_memlimit_active,
875 (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL ? "F " : "NF"),
876 p->p_memstat_memlimit_inactive,
877 (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL ? "F " : "NF"),
878 (*p->p_name ? p->p_name : "unknown"));
879 p = memorystatus_get_next_proc_locked(&b, p, traverse_all_buckets);
880 }
881 printf("memorystatus_debug_dump ***END***\n");
882 }
883
884 static int
885 sysctl_memorystatus_debug_dump_bucket SYSCTL_HANDLER_ARGS
886 {
887 #pragma unused(oidp, arg2)
888 int bucket_index = 0;
889 int error;
890 error = SYSCTL_OUT(req, arg1, sizeof(int));
891 if (error || !req->newptr) {
892 return error;
893 }
894 error = SYSCTL_IN(req, &bucket_index, sizeof(int));
895 if (error || !req->newptr) {
896 return error;
897 }
898 if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
899 /*
900 * All jetsam buckets will be dumped.
901 */
902 } else {
903 /*
904 * Only a single bucket will be dumped.
905 */
906 }
907
908 proc_list_lock();
909 memorystatus_debug_dump_bucket_locked(bucket_index);
910 proc_list_unlock();
911 memorystatus_debug_dump_this_bucket = bucket_index;
912 return error;
913 }
914
915 /*
916 * Debug aid to look at jetsam buckets and proc jetsam fields.
917 * Use this sysctl to act on a particular jetsam bucket.
918 * Writing the sysctl triggers the dump.
919 * Usage: sysctl kern.memorystatus_debug_dump_this_bucket=<bucket_index>
920 */
921
922 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_debug_dump_this_bucket, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_debug_dump_this_bucket, 0, sysctl_memorystatus_debug_dump_bucket, "I", "");
923
924
925 /* Debug aid to aid determination of limit */
926
927 static int
928 sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS
929 {
930 #pragma unused(oidp, arg2)
931 proc_t p;
932 unsigned int b = 0;
933 int error, enable = 0;
934 boolean_t use_active; /* use the active limit and active limit attributes */
935 boolean_t is_fatal;
936
937 error = SYSCTL_OUT(req, arg1, sizeof(int));
938 if (error || !req->newptr) {
939 return error;
940 }
941
942 error = SYSCTL_IN(req, &enable, sizeof(int));
943 if (error || !req->newptr) {
944 return error;
945 }
946
947 if (!(enable == 0 || enable == 1)) {
948 return EINVAL;
949 }
950
951 proc_list_lock();
952
953 p = memorystatus_get_first_proc_locked(&b, TRUE);
954 while (p) {
955 use_active = proc_jetsam_state_is_active_locked(p);
956
957 if (enable) {
958 if (use_active == TRUE) {
959 CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal);
960 } else {
961 CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal);
962 }
963 } else {
964 /*
965 * Disabling limits does not touch the stored variants.
966 * Set the cached limit fields to system_wide defaults.
967 */
968 p->p_memstat_memlimit = -1;
969 p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;
970 is_fatal = TRUE;
971 }
972
973 /*
974 * Enforce the cached limit by writing to the ledger.
975 */
976 task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit: -1, NULL, use_active, is_fatal);
977
978 p = memorystatus_get_next_proc_locked(&b, p, TRUE);
979 }
980
981 memorystatus_highwater_enabled = enable;
982
983 proc_list_unlock();
984
985 return 0;
986 }
987
988 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_highwater_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_highwater_enabled, 0, sysctl_memorystatus_highwater_enable, "I", "");
989
990 SYSCTL_INT(_kern, OID_AUTO, memorystatus_idle_snapshot, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_idle_snapshot, 0, "");
991
992 #if CONFIG_JETSAM
993 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_available_pages_critical, 0, "");
994 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_base, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_available_pages_critical_base, 0, "");
995 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_idle_offset, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_available_pages_critical_idle_offset, 0, "");
996 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_policy_more_free_offset_pages, CTLFLAG_RW, &memorystatus_policy_more_free_offset_pages, 0, "");
997
998 static unsigned int memorystatus_jetsam_panic_debug = 0;
999
1000 #if VM_PRESSURE_EVENTS
1001
1002 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_available_pages_pressure, 0, "");
1003
1004 #endif /* VM_PRESSURE_EVENTS */
1005
1006 #endif /* CONFIG_JETSAM */
1007
1008 #endif /* DEVELOPMENT || DEBUG */
1009
1010 extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation,
1011 void *parameter,
1012 integer_t priority,
1013 thread_t *new_thread);
1014
1015 #if DEVELOPMENT || DEBUG
1016
1017 static int
1018 sysctl_memorystatus_disconnect_page_mappings SYSCTL_HANDLER_ARGS
1019 {
1020 #pragma unused(arg1, arg2)
1021 int error = 0, pid = 0;
1022 proc_t p;
1023
1024 error = sysctl_handle_int(oidp, &pid, 0, req);
1025 if (error || !req->newptr) {
1026 return error;
1027 }
1028
1029 lck_mtx_lock(&disconnect_page_mappings_mutex);
1030
1031 if (pid == -1) {
1032 vm_pageout_disconnect_all_pages();
1033 } else {
1034 p = proc_find(pid);
1035
1036 if (p != NULL) {
1037 error = task_disconnect_page_mappings(p->task);
1038
1039 proc_rele(p);
1040
1041 if (error) {
1042 error = EIO;
1043 }
1044 } else {
1045 error = EINVAL;
1046 }
1047 }
1048 lck_mtx_unlock(&disconnect_page_mappings_mutex);
1049
1050 return error;
1051 }
1052
1053 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_disconnect_page_mappings, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
1054 0, 0, &sysctl_memorystatus_disconnect_page_mappings, "I", "");
1055
1056 #endif /* DEVELOPMENT || DEBUG */
1057
1058
1059 /*
1060 * Picks the sorting routine for a given jetsam priority band.
1061 *
1062 * Input:
1063 * bucket_index - jetsam priority band to be sorted.
1064 * sort_order - JETSAM_SORT_xxx from kern_memorystatus.h
1065 * Currently sort_order is only meaningful when handling
1066 * coalitions.
1067 *
1068 * Return:
1069 * 0 on success
1070 * non-0 on failure
1071 */
1072 static int
1073 memorystatus_sort_bucket(unsigned int bucket_index, int sort_order)
1074 {
1075 int coal_sort_order;
1076
1077 /*
1078 * Verify the jetsam priority
1079 */
1080 if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
1081 return EINVAL;
1082 }
1083
1084 #if DEVELOPMENT || DEBUG
1085 if (sort_order == JETSAM_SORT_DEFAULT) {
1086 coal_sort_order = COALITION_SORT_DEFAULT;
1087 } else {
1088 coal_sort_order = sort_order; /* only used for testing scenarios */
1089 }
1090 #else
1091 /* Verify default */
1092 if (sort_order == JETSAM_SORT_DEFAULT) {
1093 coal_sort_order = COALITION_SORT_DEFAULT;
1094 } else {
1095 return EINVAL;
1096 }
1097 #endif
1098
1099 proc_list_lock();
1100
1101 if (memstat_bucket[bucket_index].count == 0) {
1102 proc_list_unlock();
1103 return 0;
1104 }
1105
1106 switch (bucket_index) {
1107 case JETSAM_PRIORITY_FOREGROUND:
1108 if (memorystatus_sort_by_largest_coalition_locked(bucket_index, coal_sort_order) == 0) {
1109 /*
1110 * Fall back to per process sorting when zero coalitions are found.
1111 */
1112 memorystatus_sort_by_largest_process_locked(bucket_index);
1113 }
1114 break;
1115 default:
1116 memorystatus_sort_by_largest_process_locked(bucket_index);
1117 break;
1118 }
1119 proc_list_unlock();
1120
1121 return 0;
1122 }
1123
1124 /*
1125 * Sort processes by size for a single jetsam bucket.
1126 */
1127
1128 static void
1129 memorystatus_sort_by_largest_process_locked(unsigned int bucket_index)
1130 {
1131 proc_t p = NULL, insert_after_proc = NULL, max_proc = NULL;
1132 proc_t next_p = NULL, prev_max_proc = NULL;
1133 uint32_t pages = 0, max_pages = 0;
1134 memstat_bucket_t *current_bucket;
1135
1136 if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
1137 return;
1138 }
1139
1140 current_bucket = &memstat_bucket[bucket_index];
1141
1142 p = TAILQ_FIRST(&current_bucket->list);
1143
1144 while (p) {
1145 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
1146 max_pages = pages;
1147 max_proc = p;
1148 prev_max_proc = p;
1149
1150 while ((next_p = TAILQ_NEXT(p, p_memstat_list)) != NULL) {
1151 /* traversing list until we find next largest process */
1152 p = next_p;
1153 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
1154 if (pages > max_pages) {
1155 max_pages = pages;
1156 max_proc = p;
1157 }
1158 }
1159
1160 if (prev_max_proc != max_proc) {
1161 /* found a larger process, place it in the list */
1162 TAILQ_REMOVE(&current_bucket->list, max_proc, p_memstat_list);
1163 if (insert_after_proc == NULL) {
1164 TAILQ_INSERT_HEAD(&current_bucket->list, max_proc, p_memstat_list);
1165 } else {
1166 TAILQ_INSERT_AFTER(&current_bucket->list, insert_after_proc, max_proc, p_memstat_list);
1167 }
1168 prev_max_proc = max_proc;
1169 }
1170
1171 insert_after_proc = max_proc;
1172
1173 p = TAILQ_NEXT(max_proc, p_memstat_list);
1174 }
1175 }
1176
1177 proc_t
1178 memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search)
1179 {
1180 memstat_bucket_t *current_bucket;
1181 proc_t next_p;
1182
1183 if ((*bucket_index) >= MEMSTAT_BUCKET_COUNT) {
1184 return NULL;
1185 }
1186
1187 current_bucket = &memstat_bucket[*bucket_index];
1188 next_p = TAILQ_FIRST(&current_bucket->list);
1189 if (!next_p && search) {
1190 while (!next_p && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) {
1191 current_bucket = &memstat_bucket[*bucket_index];
1192 next_p = TAILQ_FIRST(&current_bucket->list);
1193 }
1194 }
1195
1196 return next_p;
1197 }
1198
1199 proc_t
1200 memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search)
1201 {
1202 memstat_bucket_t *current_bucket;
1203 proc_t next_p;
1204
1205 if (!p || ((*bucket_index) >= MEMSTAT_BUCKET_COUNT)) {
1206 return NULL;
1207 }
1208
1209 next_p = TAILQ_NEXT(p, p_memstat_list);
1210 while (!next_p && search && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) {
1211 current_bucket = &memstat_bucket[*bucket_index];
1212 next_p = TAILQ_FIRST(&current_bucket->list);
1213 }
1214
1215 return next_p;
1216 }
1217
1218 /*
1219 * Structure to hold state for a jetsam thread.
1220 * Typically there should be a single jetsam thread
1221 * unless parallel jetsam is enabled.
1222 */
1223 struct jetsam_thread_state {
1224 uint8_t inited; /* boolean - if the thread is initialized */
1225 uint8_t limit_to_low_bands; /* boolean */
1226 int memorystatus_wakeup; /* wake channel */
1227 int index; /* jetsam thread index */
1228 thread_t thread; /* jetsam thread pointer */
1229 } *jetsam_threads;
1230
1231 /* Maximum number of jetsam threads allowed */
1232 #define JETSAM_THREADS_LIMIT 3
1233
1234 /* Number of active jetsam threads */
1235 _Atomic int active_jetsam_threads = 1;
1236
1237 /* Number of maximum jetsam threads configured */
1238 int max_jetsam_threads = JETSAM_THREADS_LIMIT;
1239
1240 /*
1241 * Global switch for enabling fast jetsam. Fast jetsam is
1242 * hooked up via the system_override() system call. It has the
1243 * following effects:
1244 * - Raise the jetsam threshold ("clear-the-deck")
1245 * - Enabled parallel jetsam on eligible devices
1246 */
1247 int fast_jetsam_enabled = 0;
1248
1249 /* Routine to find the jetsam state structure for the current jetsam thread */
1250 static inline struct jetsam_thread_state *
1251 jetsam_current_thread(void)
1252 {
1253 for (int thr_id = 0; thr_id < max_jetsam_threads; thr_id++) {
1254 if (jetsam_threads[thr_id].thread == current_thread()) {
1255 return &(jetsam_threads[thr_id]);
1256 }
1257 }
1258 return NULL;
1259 }
1260
1261
1262 __private_extern__ void
1263 memorystatus_init(void)
1264 {
1265 kern_return_t result;
1266 int i;
1267
1268 #if CONFIG_FREEZE
1269 memorystatus_freeze_jetsam_band = JETSAM_PRIORITY_UI_SUPPORT;
1270 memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX;
1271 memorystatus_frozen_shared_mb_max = ((MAX_FROZEN_SHARED_MB_PERCENT * max_task_footprint_mb) / 100); /* 10% of the system wide task limit */
1272 memorystatus_freeze_shared_mb_per_process_max = (memorystatus_frozen_shared_mb_max / 4);
1273 memorystatus_freeze_pages_min = FREEZE_PAGES_MIN;
1274 memorystatus_freeze_pages_max = FREEZE_PAGES_MAX;
1275 memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS;
1276 memorystatus_thaw_count_demotion_threshold = MIN_THAW_DEMOTION_THRESHOLD;
1277 #endif
1278
1279 #if DEVELOPMENT || DEBUG
1280 disconnect_page_mappings_lck_grp_attr = lck_grp_attr_alloc_init();
1281 disconnect_page_mappings_lck_grp = lck_grp_alloc_init("disconnect_page_mappings", disconnect_page_mappings_lck_grp_attr);
1282
1283 lck_mtx_init(&disconnect_page_mappings_mutex, disconnect_page_mappings_lck_grp, NULL);
1284
1285 if (kill_on_no_paging_space == TRUE) {
1286 max_kill_priority = JETSAM_PRIORITY_MAX;
1287 }
1288 #endif
1289
1290 memorystatus_jetsam_fg_band_lock_grp_attr = lck_grp_attr_alloc_init();
1291 memorystatus_jetsam_fg_band_lock_grp =
1292 lck_grp_alloc_init("memorystatus_jetsam_fg_band", memorystatus_jetsam_fg_band_lock_grp_attr);
1293 lck_mtx_init(&memorystatus_jetsam_fg_band_lock, memorystatus_jetsam_fg_band_lock_grp, NULL);
1294
1295 /* Init buckets */
1296 for (i = 0; i < MEMSTAT_BUCKET_COUNT; i++) {
1297 TAILQ_INIT(&memstat_bucket[i].list);
1298 memstat_bucket[i].count = 0;
1299 memstat_bucket[i].relaunch_high_count = 0;
1300 }
1301 memorystatus_idle_demotion_call = thread_call_allocate((thread_call_func_t)memorystatus_perform_idle_demotion, NULL);
1302
1303 nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_sysprocs_idle_delay_time);
1304 nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_apps_idle_delay_time);
1305
1306 #if CONFIG_JETSAM
1307 /* Apply overrides */
1308 if (!PE_parse_boot_argn("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage))) {
1309 PE_get_default("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage));
1310 }
1311 if (delta_percentage == 0) {
1312 delta_percentage = 5;
1313 }
1314 if (max_mem > config_jetsam_large_memory_cutoff) {
1315 critical_threshold_percentage = critical_threshold_percentage_larger_devices;
1316 delta_percentage = delta_percentage_larger_devices;
1317 }
1318 assert(delta_percentage < 100);
1319 if (!PE_parse_boot_argn("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage))) {
1320 PE_get_default("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage));
1321 }
1322 assert(critical_threshold_percentage < 100);
1323 PE_get_default("kern.jetsam_idle_offset", &idle_offset_percentage, sizeof(idle_offset_percentage));
1324 assert(idle_offset_percentage < 100);
1325 PE_get_default("kern.jetsam_pressure_threshold", &pressure_threshold_percentage, sizeof(pressure_threshold_percentage));
1326 assert(pressure_threshold_percentage < 100);
1327 PE_get_default("kern.jetsam_freeze_threshold", &freeze_threshold_percentage, sizeof(freeze_threshold_percentage));
1328 assert(freeze_threshold_percentage < 100);
1329
1330
1331 if (!PE_parse_boot_argn("jetsam_aging_policy", &jetsam_aging_policy,
1332 sizeof(jetsam_aging_policy))) {
1333 if (!PE_get_default("kern.jetsam_aging_policy", &jetsam_aging_policy,
1334 sizeof(jetsam_aging_policy))) {
1335 jetsam_aging_policy = kJetsamAgingPolicySysProcsReclaimedFirst;
1336 }
1337 }
1338
1339 if (jetsam_aging_policy > kJetsamAgingPolicyMax) {
1340 jetsam_aging_policy = kJetsamAgingPolicySysProcsReclaimedFirst;
1341 }
1342
1343 switch (jetsam_aging_policy) {
1344 case kJetsamAgingPolicyNone:
1345 system_procs_aging_band = JETSAM_PRIORITY_IDLE;
1346 applications_aging_band = JETSAM_PRIORITY_IDLE;
1347 break;
1348
1349 case kJetsamAgingPolicyLegacy:
1350 /*
1351 * Legacy behavior where some daemons get a 10s protection once
1352 * AND only before the first clean->dirty->clean transition before
1353 * going into IDLE band.
1354 */
1355 system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND1;
1356 applications_aging_band = JETSAM_PRIORITY_IDLE;
1357 break;
1358
1359 case kJetsamAgingPolicySysProcsReclaimedFirst:
1360 system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND1;
1361 applications_aging_band = JETSAM_PRIORITY_AGING_BAND2;
1362 break;
1363
1364 case kJetsamAgingPolicyAppsReclaimedFirst:
1365 system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND2;
1366 applications_aging_band = JETSAM_PRIORITY_AGING_BAND1;
1367 break;
1368
1369 default:
1370 break;
1371 }
1372
1373 /*
1374 * The aging bands cannot overlap with the JETSAM_PRIORITY_ELEVATED_INACTIVE
1375 * band and must be below it in priority. This is so that we don't have to make
1376 * our 'aging' code worry about a mix of processes, some of which need to age
1377 * and some others that need to stay elevated in the jetsam bands.
1378 */
1379 assert(JETSAM_PRIORITY_ELEVATED_INACTIVE > system_procs_aging_band);
1380 assert(JETSAM_PRIORITY_ELEVATED_INACTIVE > applications_aging_band);
1381
1382 /* Take snapshots for idle-exit kills by default? First check the boot-arg... */
1383 if (!PE_parse_boot_argn("jetsam_idle_snapshot", &memorystatus_idle_snapshot, sizeof(memorystatus_idle_snapshot))) {
1384 /* ...no boot-arg, so check the device tree */
1385 PE_get_default("kern.jetsam_idle_snapshot", &memorystatus_idle_snapshot, sizeof(memorystatus_idle_snapshot));
1386 }
1387
1388 memorystatus_delta = delta_percentage * atop_64(max_mem) / 100;
1389 memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100;
1390 memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta;
1391 memorystatus_policy_more_free_offset_pages = (policy_more_free_offset_percentage / delta_percentage) * memorystatus_delta;
1392 memorystatus_sysproc_aging_aggr_pages = sysproc_aging_aggr_threshold_percentage * atop_64(max_mem) / 100;
1393
1394 /* Jetsam Loop Detection */
1395 if (max_mem <= (512 * 1024 * 1024)) {
1396 /* 512 MB devices */
1397 memorystatus_jld_eval_period_msecs = 8000; /* 8000 msecs == 8 second window */
1398 } else {
1399 /* 1GB and larger devices */
1400 memorystatus_jld_eval_period_msecs = 6000; /* 6000 msecs == 6 second window */
1401 }
1402
1403 memorystatus_jld_enabled = TRUE;
1404
1405 /* No contention at this point */
1406 memorystatus_update_levels_locked(FALSE);
1407
1408 #endif /* CONFIG_JETSAM */
1409
1410 memorystatus_jetsam_snapshot_max = maxproc;
1411
1412 memorystatus_jetsam_snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1413 (sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_max);
1414
1415 memorystatus_jetsam_snapshot =
1416 (memorystatus_jetsam_snapshot_t*)kalloc(memorystatus_jetsam_snapshot_size);
1417 if (!memorystatus_jetsam_snapshot) {
1418 panic("Could not allocate memorystatus_jetsam_snapshot");
1419 }
1420
1421 memorystatus_jetsam_snapshot_copy =
1422 (memorystatus_jetsam_snapshot_t*)kalloc(memorystatus_jetsam_snapshot_size);
1423 if (!memorystatus_jetsam_snapshot_copy) {
1424 panic("Could not allocate memorystatus_jetsam_snapshot_copy");
1425 }
1426
1427 nanoseconds_to_absolutetime((uint64_t)JETSAM_SNAPSHOT_TIMEOUT_SECS * NSEC_PER_SEC, &memorystatus_jetsam_snapshot_timeout);
1428
1429 memset(&memorystatus_at_boot_snapshot, 0, sizeof(memorystatus_jetsam_snapshot_t));
1430
1431 #if CONFIG_FREEZE
1432 memorystatus_freeze_threshold = (freeze_threshold_percentage / delta_percentage) * memorystatus_delta;
1433 #endif
1434
1435 /* Check the boot-arg to see if fast jetsam is allowed */
1436 if (!PE_parse_boot_argn("fast_jetsam_enabled", &fast_jetsam_enabled, sizeof(fast_jetsam_enabled))) {
1437 fast_jetsam_enabled = 0;
1438 }
1439
1440 /* Check the boot-arg to configure the maximum number of jetsam threads */
1441 if (!PE_parse_boot_argn("max_jetsam_threads", &max_jetsam_threads, sizeof(max_jetsam_threads))) {
1442 max_jetsam_threads = JETSAM_THREADS_LIMIT;
1443 }
1444
1445 /* Restrict the maximum number of jetsam threads to JETSAM_THREADS_LIMIT */
1446 if (max_jetsam_threads > JETSAM_THREADS_LIMIT) {
1447 max_jetsam_threads = JETSAM_THREADS_LIMIT;
1448 }
1449
1450 /* For low CPU systems disable fast jetsam mechanism */
1451 if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) {
1452 max_jetsam_threads = 1;
1453 fast_jetsam_enabled = 0;
1454 }
1455
1456 /* Initialize the jetsam_threads state array */
1457 jetsam_threads = kalloc(sizeof(struct jetsam_thread_state) * max_jetsam_threads);
1458
1459 /* Initialize all the jetsam threads */
1460 for (i = 0; i < max_jetsam_threads; i++) {
1461 jetsam_threads[i].inited = FALSE;
1462 jetsam_threads[i].index = i;
1463 result = kernel_thread_start_priority(memorystatus_thread, NULL, 95 /* MAXPRI_KERNEL */, &jetsam_threads[i].thread);
1464 if (result != KERN_SUCCESS) {
1465 panic("Could not create memorystatus_thread %d", i);
1466 }
1467 thread_deallocate(jetsam_threads[i].thread);
1468 }
1469 }
1470
1471 /* Centralised for the purposes of allowing panic-on-jetsam */
1472 extern void
1473 vm_run_compactor(void);
1474
1475 /*
1476 * The jetsam no frills kill call
1477 * Return: 0 on success
1478 * error code on failure (EINVAL...)
1479 */
1480 static int
1481 jetsam_do_kill(proc_t p, int jetsam_flags, os_reason_t jetsam_reason)
1482 {
1483 int error = 0;
1484 error = exit_with_reason(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags, jetsam_reason);
1485 return error;
1486 }
1487
1488 /*
1489 * Wrapper for processes exiting with memorystatus details
1490 */
1491 static boolean_t
1492 memorystatus_do_kill(proc_t p, uint32_t cause, os_reason_t jetsam_reason, uint64_t *footprint_of_killed_proc)
1493 {
1494 int error = 0;
1495 __unused pid_t victim_pid = p->p_pid;
1496 uint64_t footprint = get_task_phys_footprint(p->task);
1497 #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD)
1498 int32_t memstat_effectivepriority = p->p_memstat_effectivepriority;
1499 #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) */
1500
1501 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_START,
1502 victim_pid, cause, vm_page_free_count, footprint, 0);
1503 DTRACE_MEMORYSTATUS4(memorystatus_do_kill, proc_t, p, os_reason_t, jetsam_reason, uint32_t, cause, uint64_t, footprint);
1504 #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
1505 if (memorystatus_jetsam_panic_debug & (1 << cause)) {
1506 panic("memorystatus_do_kill(): jetsam debug panic (cause: %d)", cause);
1507 }
1508 #else
1509 #pragma unused(cause)
1510 #endif
1511
1512 if (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND) {
1513 printf("memorystatus: killing process %d [%s] in high band %s (%d) - memorystatus_available_pages: %llu\n", p->p_pid,
1514 (*p->p_name ? p->p_name : "unknown"),
1515 memorystatus_priority_band_name(p->p_memstat_effectivepriority), p->p_memstat_effectivepriority,
1516 (uint64_t)memorystatus_available_pages);
1517 }
1518
1519 /*
1520 * The jetsam_reason (os_reason_t) has enough information about the kill cause.
1521 * We don't really need jetsam_flags anymore, so it's okay that not all possible kill causes have been mapped.
1522 */
1523 int jetsam_flags = P_LTERM_JETSAM;
1524 switch (cause) {
1525 case kMemorystatusKilledHiwat: jetsam_flags |= P_JETSAM_HIWAT; break;
1526 case kMemorystatusKilledVnodes: jetsam_flags |= P_JETSAM_VNODE; break;
1527 case kMemorystatusKilledVMPageShortage: jetsam_flags |= P_JETSAM_VMPAGESHORTAGE; break;
1528 case kMemorystatusKilledVMCompressorThrashing:
1529 case kMemorystatusKilledVMCompressorSpaceShortage: jetsam_flags |= P_JETSAM_VMTHRASHING; break;
1530 case kMemorystatusKilledFCThrashing: jetsam_flags |= P_JETSAM_FCTHRASHING; break;
1531 case kMemorystatusKilledPerProcessLimit: jetsam_flags |= P_JETSAM_PID; break;
1532 case kMemorystatusKilledIdleExit: jetsam_flags |= P_JETSAM_IDLEEXIT; break;
1533 }
1534 error = jetsam_do_kill(p, jetsam_flags, jetsam_reason);
1535 *footprint_of_killed_proc = ((error == 0) ? footprint : 0);
1536
1537 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_END,
1538 victim_pid, memstat_effectivepriority, vm_page_free_count, error, 0);
1539
1540 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_COMPACTOR_RUN)) | DBG_FUNC_START,
1541 victim_pid, cause, vm_page_free_count, *footprint_of_killed_proc, 0);
1542
1543 vm_run_compactor();
1544
1545 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_COMPACTOR_RUN)) | DBG_FUNC_END,
1546 victim_pid, cause, vm_page_free_count, 0, 0);
1547
1548 return error == 0;
1549 }
1550
1551 /*
1552 * Node manipulation
1553 */
1554
1555 static void
1556 memorystatus_check_levels_locked(void)
1557 {
1558 #if CONFIG_JETSAM
1559 /* Update levels */
1560 memorystatus_update_levels_locked(TRUE);
1561 #else /* CONFIG_JETSAM */
1562 /*
1563 * Nothing to do here currently since we update
1564 * memorystatus_available_pages in vm_pressure_response.
1565 */
1566 #endif /* CONFIG_JETSAM */
1567 }
1568
1569 /*
1570 * Pin a process to a particular jetsam band when it is in the background i.e. not doing active work.
1571 * For an application: that means no longer in the FG band
1572 * For a daemon: that means no longer in its 'requested' jetsam priority band
1573 */
1574
1575 int
1576 memorystatus_update_inactive_jetsam_priority_band(pid_t pid, uint32_t op_flags, int jetsam_prio, boolean_t effective_now)
1577 {
1578 int error = 0;
1579 boolean_t enable = FALSE;
1580 proc_t p = NULL;
1581
1582 if (op_flags == MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE) {
1583 enable = TRUE;
1584 } else if (op_flags == MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_DISABLE) {
1585 enable = FALSE;
1586 } else {
1587 return EINVAL;
1588 }
1589
1590 p = proc_find(pid);
1591 if (p != NULL) {
1592 if ((enable && ((p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) == P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND)) ||
1593 (!enable && ((p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) == 0))) {
1594 /*
1595 * No change in state.
1596 */
1597 } else {
1598 proc_list_lock();
1599
1600 if (enable) {
1601 p->p_memstat_state |= P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1602 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1603
1604 if (effective_now) {
1605 if (p->p_memstat_effectivepriority < jetsam_prio) {
1606 if (memorystatus_highwater_enabled) {
1607 /*
1608 * Process is about to transition from
1609 * inactive --> active
1610 * assign active state
1611 */
1612 boolean_t is_fatal;
1613 boolean_t use_active = TRUE;
1614 CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal);
1615 task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1, NULL, use_active, is_fatal);
1616 }
1617 memorystatus_update_priority_locked(p, jetsam_prio, FALSE, FALSE);
1618 }
1619 } else {
1620 if (isProcessInAgingBands(p)) {
1621 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, FALSE, TRUE);
1622 }
1623 }
1624 } else {
1625 p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
1626 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1627
1628 if (effective_now) {
1629 if (p->p_memstat_effectivepriority == jetsam_prio) {
1630 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, FALSE, TRUE);
1631 }
1632 } else {
1633 if (isProcessInAgingBands(p)) {
1634 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, FALSE, TRUE);
1635 }
1636 }
1637 }
1638
1639 proc_list_unlock();
1640 }
1641 proc_rele(p);
1642 error = 0;
1643 } else {
1644 error = ESRCH;
1645 }
1646
1647 return error;
1648 }
1649
1650 static void
1651 memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2)
1652 {
1653 proc_t p;
1654 uint64_t current_time = 0, idle_delay_time = 0;
1655 int demote_prio_band = 0;
1656 memstat_bucket_t *demotion_bucket;
1657
1658 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion()\n");
1659
1660 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_START, 0, 0, 0, 0, 0);
1661
1662 current_time = mach_absolute_time();
1663
1664 proc_list_lock();
1665
1666 demote_prio_band = JETSAM_PRIORITY_IDLE + 1;
1667
1668 for (; demote_prio_band < JETSAM_PRIORITY_MAX; demote_prio_band++) {
1669 if (demote_prio_band != system_procs_aging_band && demote_prio_band != applications_aging_band) {
1670 continue;
1671 }
1672
1673 demotion_bucket = &memstat_bucket[demote_prio_band];
1674 p = TAILQ_FIRST(&demotion_bucket->list);
1675
1676 while (p) {
1677 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion() found %d\n", p->p_pid);
1678
1679 assert(p->p_memstat_idledeadline);
1680
1681 assert(p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS);
1682
1683 if (current_time >= p->p_memstat_idledeadline) {
1684 if ((isSysProc(p) &&
1685 ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED | P_DIRTY_IS_DIRTY)) != P_DIRTY_IDLE_EXIT_ENABLED)) || /* system proc marked dirty*/
1686 task_has_assertions((struct task *)(p->task))) { /* has outstanding assertions which might indicate outstanding work too */
1687 idle_delay_time = (isSysProc(p)) ? memorystatus_sysprocs_idle_time(p) : memorystatus_apps_idle_time(p);
1688
1689 p->p_memstat_idledeadline += idle_delay_time;
1690 p = TAILQ_NEXT(p, p_memstat_list);
1691 } else {
1692 proc_t next_proc = NULL;
1693
1694 next_proc = TAILQ_NEXT(p, p_memstat_list);
1695 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1696
1697 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, false, true);
1698
1699 p = next_proc;
1700 continue;
1701 }
1702 } else {
1703 // No further candidates
1704 break;
1705 }
1706 }
1707 }
1708
1709 memorystatus_reschedule_idle_demotion_locked();
1710
1711 proc_list_unlock();
1712
1713 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1714 }
1715
1716 static void
1717 memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state)
1718 {
1719 boolean_t present_in_sysprocs_aging_bucket = FALSE;
1720 boolean_t present_in_apps_aging_bucket = FALSE;
1721 uint64_t idle_delay_time = 0;
1722
1723 if (jetsam_aging_policy == kJetsamAgingPolicyNone) {
1724 return;
1725 }
1726
1727 if ((p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) ||
1728 (p->p_memstat_state & P_MEMSTAT_PRIORITY_ASSERTION)) {
1729 /*
1730 * This process isn't going to be making the trip to the lower bands.
1731 */
1732 return;
1733 }
1734
1735 if (isProcessInAgingBands(p)) {
1736 if (jetsam_aging_policy != kJetsamAgingPolicyLegacy) {
1737 assert((p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) != P_DIRTY_AGING_IN_PROGRESS);
1738 }
1739
1740 if (isSysProc(p) && system_procs_aging_band) {
1741 present_in_sysprocs_aging_bucket = TRUE;
1742 } else if (isApp(p) && applications_aging_band) {
1743 present_in_apps_aging_bucket = TRUE;
1744 }
1745 }
1746
1747 assert(!present_in_sysprocs_aging_bucket);
1748 assert(!present_in_apps_aging_bucket);
1749
1750 MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for pid %d (dirty:0x%x, set_state %d, demotions %d).\n",
1751 p->p_pid, p->p_memstat_dirty, set_state, (memorystatus_scheduled_idle_demotions_sysprocs + memorystatus_scheduled_idle_demotions_apps));
1752
1753 if (isSysProc(p)) {
1754 assert((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED);
1755 }
1756
1757 idle_delay_time = (isSysProc(p)) ? memorystatus_sysprocs_idle_time(p) : memorystatus_apps_idle_time(p);
1758 if (set_state) {
1759 p->p_memstat_dirty |= P_DIRTY_AGING_IN_PROGRESS;
1760 p->p_memstat_idledeadline = mach_absolute_time() + idle_delay_time;
1761 }
1762
1763 assert(p->p_memstat_idledeadline);
1764
1765 if (isSysProc(p) && present_in_sysprocs_aging_bucket == FALSE) {
1766 memorystatus_scheduled_idle_demotions_sysprocs++;
1767 } else if (isApp(p) && present_in_apps_aging_bucket == FALSE) {
1768 memorystatus_scheduled_idle_demotions_apps++;
1769 }
1770 }
1771
1772 void
1773 memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state)
1774 {
1775 boolean_t present_in_sysprocs_aging_bucket = FALSE;
1776 boolean_t present_in_apps_aging_bucket = FALSE;
1777
1778 if (!system_procs_aging_band && !applications_aging_band) {
1779 return;
1780 }
1781
1782 if ((p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) == 0) {
1783 return;
1784 }
1785
1786 if (isProcessInAgingBands(p)) {
1787 if (jetsam_aging_policy != kJetsamAgingPolicyLegacy) {
1788 assert((p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) == P_DIRTY_AGING_IN_PROGRESS);
1789 }
1790
1791 if (isSysProc(p) && system_procs_aging_band) {
1792 assert(p->p_memstat_effectivepriority == system_procs_aging_band);
1793 assert(p->p_memstat_idledeadline);
1794 present_in_sysprocs_aging_bucket = TRUE;
1795 } else if (isApp(p) && applications_aging_band) {
1796 assert(p->p_memstat_effectivepriority == applications_aging_band);
1797 assert(p->p_memstat_idledeadline);
1798 present_in_apps_aging_bucket = TRUE;
1799 }
1800 }
1801
1802 MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for pid %d (clear_state %d, demotions %d).\n",
1803 p->p_pid, clear_state, (memorystatus_scheduled_idle_demotions_sysprocs + memorystatus_scheduled_idle_demotions_apps));
1804
1805
1806 if (clear_state) {
1807 p->p_memstat_idledeadline = 0;
1808 p->p_memstat_dirty &= ~P_DIRTY_AGING_IN_PROGRESS;
1809 }
1810
1811 if (isSysProc(p) && present_in_sysprocs_aging_bucket == TRUE) {
1812 memorystatus_scheduled_idle_demotions_sysprocs--;
1813 assert(memorystatus_scheduled_idle_demotions_sysprocs >= 0);
1814 } else if (isApp(p) && present_in_apps_aging_bucket == TRUE) {
1815 memorystatus_scheduled_idle_demotions_apps--;
1816 assert(memorystatus_scheduled_idle_demotions_apps >= 0);
1817 }
1818
1819 assert((memorystatus_scheduled_idle_demotions_sysprocs + memorystatus_scheduled_idle_demotions_apps) >= 0);
1820 }
1821
1822 static void
1823 memorystatus_reschedule_idle_demotion_locked(void)
1824 {
1825 if (0 == (memorystatus_scheduled_idle_demotions_sysprocs + memorystatus_scheduled_idle_demotions_apps)) {
1826 if (memstat_idle_demotion_deadline) {
1827 /* Transitioned 1->0, so cancel next call */
1828 thread_call_cancel(memorystatus_idle_demotion_call);
1829 memstat_idle_demotion_deadline = 0;
1830 }
1831 } else {
1832 memstat_bucket_t *demotion_bucket;
1833 proc_t p = NULL, p1 = NULL, p2 = NULL;
1834
1835 if (system_procs_aging_band) {
1836 demotion_bucket = &memstat_bucket[system_procs_aging_band];
1837 p1 = TAILQ_FIRST(&demotion_bucket->list);
1838
1839 p = p1;
1840 }
1841
1842 if (applications_aging_band) {
1843 demotion_bucket = &memstat_bucket[applications_aging_band];
1844 p2 = TAILQ_FIRST(&demotion_bucket->list);
1845
1846 if (p1 && p2) {
1847 p = (p1->p_memstat_idledeadline > p2->p_memstat_idledeadline) ? p2 : p1;
1848 } else {
1849 p = (p1 == NULL) ? p2 : p1;
1850 }
1851 }
1852
1853 assert(p);
1854
1855 if (p != NULL) {
1856 assert(p && p->p_memstat_idledeadline);
1857 if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline) {
1858 thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline);
1859 memstat_idle_demotion_deadline = p->p_memstat_idledeadline;
1860 }
1861 }
1862 }
1863 }
1864
1865 /*
1866 * List manipulation
1867 */
1868
1869 int
1870 memorystatus_add(proc_t p, boolean_t locked)
1871 {
1872 memstat_bucket_t *bucket;
1873
1874 MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding pid %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority);
1875
1876 if (!locked) {
1877 proc_list_lock();
1878 }
1879
1880 DTRACE_MEMORYSTATUS2(memorystatus_add, proc_t, p, int32_t, p->p_memstat_effectivepriority);
1881
1882 /* Processes marked internal do not have priority tracked */
1883 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
1884 goto exit;
1885 }
1886
1887 /*
1888 * Opt out system processes from being frozen by default.
1889 * For coalition-based freezing, we only want to freeze sysprocs that have specifically opted in.
1890 */
1891 if (isSysProc(p)) {
1892 p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED;
1893 }
1894
1895 bucket = &memstat_bucket[p->p_memstat_effectivepriority];
1896
1897 if (isSysProc(p) && system_procs_aging_band && (p->p_memstat_effectivepriority == system_procs_aging_band)) {
1898 assert(bucket->count == memorystatus_scheduled_idle_demotions_sysprocs - 1);
1899 } else if (isApp(p) && applications_aging_band && (p->p_memstat_effectivepriority == applications_aging_band)) {
1900 assert(bucket->count == memorystatus_scheduled_idle_demotions_apps - 1);
1901 } else if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) {
1902 /*
1903 * Entering the idle band.
1904 * Record idle start time.
1905 */
1906 p->p_memstat_idle_start = mach_absolute_time();
1907 }
1908
1909 TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list);
1910 bucket->count++;
1911 if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) {
1912 bucket->relaunch_high_count++;
1913 }
1914
1915 memorystatus_list_count++;
1916
1917 memorystatus_check_levels_locked();
1918
1919 exit:
1920 if (!locked) {
1921 proc_list_unlock();
1922 }
1923
1924 return 0;
1925 }
1926
1927 /*
1928 * Description:
1929 * Moves a process from one jetsam bucket to another.
1930 * which changes the LRU position of the process.
1931 *
1932 * Monitors transition between buckets and if necessary
1933 * will update cached memory limits accordingly.
1934 *
1935 * skip_demotion_check:
1936 * - if the 'jetsam aging policy' is NOT 'legacy':
1937 * When this flag is TRUE, it means we are going
1938 * to age the ripe processes out of the aging bands and into the
1939 * IDLE band and apply their inactive memory limits.
1940 *
1941 * - if the 'jetsam aging policy' is 'legacy':
1942 * When this flag is TRUE, it might mean the above aging mechanism
1943 * OR
1944 * It might be that we have a process that has used up its 'idle deferral'
1945 * stay that is given to it once per lifetime. And in this case, the process
1946 * won't be going through any aging codepaths. But we still need to apply
1947 * the right inactive limits and so we explicitly set this to TRUE if the
1948 * new priority for the process is the IDLE band.
1949 */
1950 void
1951 memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert, boolean_t skip_demotion_check)
1952 {
1953 memstat_bucket_t *old_bucket, *new_bucket;
1954
1955 assert(priority < MEMSTAT_BUCKET_COUNT);
1956
1957 /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */
1958 if ((p->p_listflag & P_LIST_EXITED) != 0) {
1959 return;
1960 }
1961
1962 MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting %s(%d) to priority %d, inserting at %s\n",
1963 (*p->p_name ? p->p_name : "unknown"), p->p_pid, priority, head_insert ? "head" : "tail");
1964
1965 DTRACE_MEMORYSTATUS3(memorystatus_update_priority, proc_t, p, int32_t, p->p_memstat_effectivepriority, int, priority);
1966
1967 old_bucket = &memstat_bucket[p->p_memstat_effectivepriority];
1968
1969 if (skip_demotion_check == FALSE) {
1970 if (isSysProc(p)) {
1971 /*
1972 * For system processes, the memorystatus_dirty_* routines take care of adding/removing
1973 * the processes from the aging bands and balancing the demotion counts.
1974 * We can, however, override that if the process has an 'elevated inactive jetsam band' attribute.
1975 */
1976
1977 if (p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) {
1978 /*
1979 * 2 types of processes can use the non-standard elevated inactive band:
1980 * - Frozen processes that always land in memorystatus_freeze_jetsam_band
1981 * OR
1982 * - processes that specifically opt-in to the elevated inactive support e.g. docked processes.
1983 */
1984 #if CONFIG_FREEZE
1985 if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
1986 if (priority <= memorystatus_freeze_jetsam_band) {
1987 priority = memorystatus_freeze_jetsam_band;
1988 }
1989 } else
1990 #endif /* CONFIG_FREEZE */
1991 {
1992 if (priority <= JETSAM_PRIORITY_ELEVATED_INACTIVE) {
1993 priority = JETSAM_PRIORITY_ELEVATED_INACTIVE;
1994 }
1995 }
1996 assert(!(p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS));
1997 }
1998 } else if (isApp(p)) {
1999 /*
2000 * Check to see if the application is being lowered in jetsam priority. If so, and:
2001 * - it has an 'elevated inactive jetsam band' attribute, then put it in the appropriate band.
2002 * - it is a normal application, then let it age in the aging band if that policy is in effect.
2003 */
2004
2005 if (p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) {
2006 #if CONFIG_FREEZE
2007 if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
2008 if (priority <= memorystatus_freeze_jetsam_band) {
2009 priority = memorystatus_freeze_jetsam_band;
2010 }
2011 } else
2012 #endif /* CONFIG_FREEZE */
2013 {
2014 if (priority <= JETSAM_PRIORITY_ELEVATED_INACTIVE) {
2015 priority = JETSAM_PRIORITY_ELEVATED_INACTIVE;
2016 }
2017 }
2018 } else {
2019 if (applications_aging_band) {
2020 if (p->p_memstat_effectivepriority == applications_aging_band) {
2021 assert(old_bucket->count == (memorystatus_scheduled_idle_demotions_apps + 1));
2022 }
2023
2024 if ((jetsam_aging_policy != kJetsamAgingPolicyLegacy) && (priority <= applications_aging_band)) {
2025 assert(!(p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS));
2026 priority = applications_aging_band;
2027 memorystatus_schedule_idle_demotion_locked(p, TRUE);
2028 }
2029 }
2030 }
2031 }
2032 }
2033
2034 if ((system_procs_aging_band && (priority == system_procs_aging_band)) || (applications_aging_band && (priority == applications_aging_band))) {
2035 assert(p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS);
2036 }
2037
2038 #if DEVELOPMENT || DEBUG
2039 if (priority == JETSAM_PRIORITY_IDLE && /* if the process is on its way into the IDLE band */
2040 skip_demotion_check == FALSE && /* and it isn't via the path that will set the INACTIVE memlimits */
2041 (p->p_memstat_dirty & P_DIRTY_TRACK) && /* and it has 'DIRTY' tracking enabled */
2042 ((p->p_memstat_memlimit != p->p_memstat_memlimit_inactive) || /* and we notice that the current limit isn't the right value (inactive) */
2043 ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) ? (!(p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT)) : (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT)))) { /* OR type (fatal vs non-fatal) */
2044 printf("memorystatus_update_priority_locked: on %s with 0x%x, prio: %d and %d\n", p->p_name, p->p_memstat_state, priority, p->p_memstat_memlimit); /* then we must catch this */
2045 }
2046 #endif /* DEVELOPMENT || DEBUG */
2047
2048 TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list);
2049 old_bucket->count--;
2050 if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) {
2051 old_bucket->relaunch_high_count--;
2052 }
2053
2054 new_bucket = &memstat_bucket[priority];
2055 if (head_insert) {
2056 TAILQ_INSERT_HEAD(&new_bucket->list, p, p_memstat_list);
2057 } else {
2058 TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list);
2059 }
2060 new_bucket->count++;
2061 if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) {
2062 new_bucket->relaunch_high_count++;
2063 }
2064
2065 if (memorystatus_highwater_enabled) {
2066 boolean_t is_fatal;
2067 boolean_t use_active;
2068
2069 /*
2070 * If cached limit data is updated, then the limits
2071 * will be enforced by writing to the ledgers.
2072 */
2073 boolean_t ledger_update_needed = TRUE;
2074
2075 /*
2076 * Here, we must update the cached memory limit if the task
2077 * is transitioning between:
2078 * active <--> inactive
2079 * FG <--> BG
2080 * but:
2081 * dirty <--> clean is ignored
2082 *
2083 * We bypass non-idle processes that have opted into dirty tracking because
2084 * a move between buckets does not imply a transition between the
2085 * dirty <--> clean state.
2086 */
2087
2088 if (p->p_memstat_dirty & P_DIRTY_TRACK) {
2089 if (skip_demotion_check == TRUE && priority == JETSAM_PRIORITY_IDLE) {
2090 CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal);
2091 use_active = FALSE;
2092 } else {
2093 ledger_update_needed = FALSE;
2094 }
2095 } else if ((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) {
2096 /*
2097 * inactive --> active
2098 * BG --> FG
2099 * assign active state
2100 */
2101 CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal);
2102 use_active = TRUE;
2103 } else if ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) {
2104 /*
2105 * active --> inactive
2106 * FG --> BG
2107 * assign inactive state
2108 */
2109 CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal);
2110 use_active = FALSE;
2111 } else {
2112 /*
2113 * The transition between jetsam priority buckets apparently did
2114 * not affect active/inactive state.
2115 * This is not unusual... especially during startup when
2116 * processes are getting established in their respective bands.
2117 */
2118 ledger_update_needed = FALSE;
2119 }
2120
2121 /*
2122 * Enforce the new limits by writing to the ledger
2123 */
2124 if (ledger_update_needed) {
2125 task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1, NULL, use_active, is_fatal);
2126
2127 MEMORYSTATUS_DEBUG(3, "memorystatus_update_priority_locked: new limit on pid %d (%dMB %s) priority old --> new (%d --> %d) dirty?=0x%x %s\n",
2128 p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
2129 (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, priority, p->p_memstat_dirty,
2130 (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
2131 }
2132 }
2133
2134 /*
2135 * Record idle start or idle delta.
2136 */
2137 if (p->p_memstat_effectivepriority == priority) {
2138 /*
2139 * This process is not transitioning between
2140 * jetsam priority buckets. Do nothing.
2141 */
2142 } else if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) {
2143 uint64_t now;
2144 /*
2145 * Transitioning out of the idle priority bucket.
2146 * Record idle delta.
2147 */
2148 assert(p->p_memstat_idle_start != 0);
2149 now = mach_absolute_time();
2150 if (now > p->p_memstat_idle_start) {
2151 p->p_memstat_idle_delta = now - p->p_memstat_idle_start;
2152 }
2153
2154 /*
2155 * About to become active and so memory footprint could change.
2156 * So mark it eligible for freeze-considerations next time around.
2157 */
2158 if (p->p_memstat_state & P_MEMSTAT_FREEZE_IGNORE) {
2159 p->p_memstat_state &= ~P_MEMSTAT_FREEZE_IGNORE;
2160 }
2161 } else if (priority == JETSAM_PRIORITY_IDLE) {
2162 /*
2163 * Transitioning into the idle priority bucket.
2164 * Record idle start.
2165 */
2166 p->p_memstat_idle_start = mach_absolute_time();
2167 }
2168
2169 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CHANGE_PRIORITY), p->p_pid, priority, p->p_memstat_effectivepriority, 0, 0);
2170
2171 p->p_memstat_effectivepriority = priority;
2172
2173 #if CONFIG_SECLUDED_MEMORY
2174 if (secluded_for_apps &&
2175 task_could_use_secluded_mem(p->task)) {
2176 task_set_can_use_secluded_mem(
2177 p->task,
2178 (priority >= JETSAM_PRIORITY_FOREGROUND));
2179 }
2180 #endif /* CONFIG_SECLUDED_MEMORY */
2181
2182 memorystatus_check_levels_locked();
2183 }
2184
2185 int
2186 memorystatus_relaunch_flags_update(proc_t p, int relaunch_flags)
2187 {
2188 p->p_memstat_relaunch_flags = relaunch_flags;
2189 KDBG(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_RELAUNCH_FLAGS), p->p_pid, relaunch_flags, 0, 0, 0);
2190 return 0;
2191 }
2192
2193 /*
2194 *
2195 * Description: Update the jetsam priority and memory limit attributes for a given process.
2196 *
2197 * Parameters:
2198 * p init this process's jetsam information.
2199 * priority The jetsam priority band
2200 * user_data user specific data, unused by the kernel
2201 * is_assertion When true, a priority update is driven by an assertion.
2202 * effective guards against race if process's update already occurred
2203 * update_memlimit When true we know this is the init step via the posix_spawn path.
2204 *
2205 * memlimit_active Value in megabytes; The monitored footprint level while the
2206 * process is active. Exceeding it may result in termination
2207 * based on it's associated fatal flag.
2208 *
2209 * memlimit_active_is_fatal When a process is active and exceeds its memory footprint,
2210 * this describes whether or not it should be immediately fatal.
2211 *
2212 * memlimit_inactive Value in megabytes; The monitored footprint level while the
2213 * process is inactive. Exceeding it may result in termination
2214 * based on it's associated fatal flag.
2215 *
2216 * memlimit_inactive_is_fatal When a process is inactive and exceeds its memory footprint,
2217 * this describes whether or not it should be immediatly fatal.
2218 *
2219 * Returns: 0 Success
2220 * non-0 Failure
2221 */
2222
2223 int
2224 memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t is_assertion, boolean_t effective, boolean_t update_memlimit,
2225 int32_t memlimit_active, boolean_t memlimit_active_is_fatal,
2226 int32_t memlimit_inactive, boolean_t memlimit_inactive_is_fatal)
2227 {
2228 int ret;
2229 boolean_t head_insert = false;
2230
2231 MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing (%s) pid %d: priority %d, user_data 0x%llx\n", (*p->p_name ? p->p_name : "unknown"), p->p_pid, priority, user_data);
2232
2233 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0);
2234
2235 if (priority == -1) {
2236 /* Use as shorthand for default priority */
2237 priority = JETSAM_PRIORITY_DEFAULT;
2238 } else if ((priority == system_procs_aging_band) || (priority == applications_aging_band)) {
2239 /* Both the aging bands are reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */
2240 priority = JETSAM_PRIORITY_IDLE;
2241 } else if (priority == JETSAM_PRIORITY_IDLE_HEAD) {
2242 /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle queue */
2243 priority = JETSAM_PRIORITY_IDLE;
2244 head_insert = TRUE;
2245 } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) {
2246 /* Sanity check */
2247 ret = EINVAL;
2248 goto out;
2249 }
2250
2251 proc_list_lock();
2252
2253 assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL));
2254
2255 if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) {
2256 ret = EALREADY;
2257 proc_list_unlock();
2258 MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", p->p_pid);
2259 goto out;
2260 }
2261
2262 if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) || ((p->p_listflag & P_LIST_EXITED) != 0)) {
2263 /*
2264 * This could happen when a process calling posix_spawn() is exiting on the jetsam thread.
2265 */
2266 ret = EBUSY;
2267 proc_list_unlock();
2268 goto out;
2269 }
2270
2271 p->p_memstat_state |= P_MEMSTAT_PRIORITYUPDATED;
2272 p->p_memstat_userdata = user_data;
2273
2274 if (is_assertion) {
2275 if (priority == JETSAM_PRIORITY_IDLE) {
2276 /*
2277 * Assertions relinquish control when the process is heading to IDLE.
2278 */
2279 if (p->p_memstat_state & P_MEMSTAT_PRIORITY_ASSERTION) {
2280 /*
2281 * Mark the process as no longer being managed by assertions.
2282 */
2283 p->p_memstat_state &= ~P_MEMSTAT_PRIORITY_ASSERTION;
2284 } else {
2285 /*
2286 * Ignore an idle priority transition if the process is not
2287 * already managed by assertions. We won't treat this as
2288 * an error, but we will log the unexpected behavior and bail.
2289 */
2290 os_log(OS_LOG_DEFAULT, "memorystatus: Ignore assertion driven idle priority. Process not previously controlled %s:%d\n",
2291 (*p->p_name ? p->p_name : "unknown"), p->p_pid);
2292
2293 ret = 0;
2294 proc_list_unlock();
2295 goto out;
2296 }
2297 } else {
2298 /*
2299 * Process is now being managed by assertions,
2300 */
2301 p->p_memstat_state |= P_MEMSTAT_PRIORITY_ASSERTION;
2302 }
2303
2304 /* Always update the assertion priority in this path */
2305
2306 p->p_memstat_assertionpriority = priority;
2307
2308 int memstat_dirty_flags = memorystatus_dirty_get(p, TRUE); /* proc_list_lock is held */
2309
2310 if (memstat_dirty_flags != 0) {
2311 /*
2312 * Calculate maximum priority only when dirty tracking processes are involved.
2313 */
2314 int maxpriority;
2315 if (memstat_dirty_flags & PROC_DIRTY_IS_DIRTY) {
2316 maxpriority = MAX(p->p_memstat_assertionpriority, p->p_memstat_requestedpriority);
2317 } else {
2318 /* clean */
2319
2320 if (memstat_dirty_flags & PROC_DIRTY_ALLOWS_IDLE_EXIT) {
2321 /*
2322 * The aging policy must be evaluated and applied here because runnningboardd
2323 * has relinquished its hold on the jetsam priority by attempting to move a
2324 * clean process to the idle band.
2325 */
2326
2327 int newpriority = JETSAM_PRIORITY_IDLE;
2328 if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED | P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) {
2329 newpriority = (p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) ? system_procs_aging_band : JETSAM_PRIORITY_IDLE;
2330 }
2331
2332 maxpriority = MAX(p->p_memstat_assertionpriority, newpriority );
2333
2334 if (newpriority == system_procs_aging_band) {
2335 memorystatus_schedule_idle_demotion_locked(p, FALSE);
2336 }
2337 } else {
2338 /*
2339 * Preserves requestedpriority when the process does not support pressured exit.
2340 */
2341 maxpriority = MAX(p->p_memstat_assertionpriority, p->p_memstat_requestedpriority);
2342 }
2343 }
2344 priority = maxpriority;
2345 }
2346 } else {
2347 p->p_memstat_requestedpriority = priority;
2348 }
2349
2350 if (update_memlimit) {
2351 boolean_t is_fatal;
2352 boolean_t use_active;
2353
2354 /*
2355 * Posix_spawn'd processes come through this path to instantiate ledger limits.
2356 * Forked processes do not come through this path, so no ledger limits exist.
2357 * (That's why forked processes can consume unlimited memory.)
2358 */
2359
2360 MEMORYSTATUS_DEBUG(3, "memorystatus_update(enter): pid %d, priority %d, dirty=0x%x, Active(%dMB %s), Inactive(%dMB, %s)\n",
2361 p->p_pid, priority, p->p_memstat_dirty,
2362 memlimit_active, (memlimit_active_is_fatal ? "F " : "NF"),
2363 memlimit_inactive, (memlimit_inactive_is_fatal ? "F " : "NF"));
2364
2365 if (memlimit_active <= 0) {
2366 /*
2367 * This process will have a system_wide task limit when active.
2368 * System_wide task limit is always fatal.
2369 * It's quite common to see non-fatal flag passed in here.
2370 * It's not an error, we just ignore it.
2371 */
2372
2373 /*
2374 * For backward compatibility with some unexplained launchd behavior,
2375 * we allow a zero sized limit. But we still enforce system_wide limit
2376 * when written to the ledgers.
2377 */
2378
2379 if (memlimit_active < 0) {
2380 memlimit_active = -1; /* enforces system_wide task limit */
2381 }
2382 memlimit_active_is_fatal = TRUE;
2383 }
2384
2385 if (memlimit_inactive <= 0) {
2386 /*
2387 * This process will have a system_wide task limit when inactive.
2388 * System_wide task limit is always fatal.
2389 */
2390
2391 memlimit_inactive = -1;
2392 memlimit_inactive_is_fatal = TRUE;
2393 }
2394
2395 /*
2396 * Initialize the active limit variants for this process.
2397 */
2398 SET_ACTIVE_LIMITS_LOCKED(p, memlimit_active, memlimit_active_is_fatal);
2399
2400 /*
2401 * Initialize the inactive limit variants for this process.
2402 */
2403 SET_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive, memlimit_inactive_is_fatal);
2404
2405 /*
2406 * Initialize the cached limits for target process.
2407 * When the target process is dirty tracked, it's typically
2408 * in a clean state. Non dirty tracked processes are
2409 * typically active (Foreground or above).
2410 * But just in case, we don't make assumptions...
2411 */
2412
2413 if (proc_jetsam_state_is_active_locked(p) == TRUE) {
2414 CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal);
2415 use_active = TRUE;
2416 } else {
2417 CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal);
2418 use_active = FALSE;
2419 }
2420
2421 /*
2422 * Enforce the cached limit by writing to the ledger.
2423 */
2424 if (memorystatus_highwater_enabled) {
2425 /* apply now */
2426 task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, use_active, is_fatal);
2427
2428 MEMORYSTATUS_DEBUG(3, "memorystatus_update: init: limit on pid %d (%dMB %s) targeting priority(%d) dirty?=0x%x %s\n",
2429 p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
2430 (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), priority, p->p_memstat_dirty,
2431 (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
2432 }
2433 }
2434
2435 /*
2436 * We can't add to the aging bands buckets here.
2437 * But, we could be removing it from those buckets.
2438 * Check and take appropriate steps if so.
2439 */
2440
2441 if (isProcessInAgingBands(p)) {
2442 if ((jetsam_aging_policy != kJetsamAgingPolicyLegacy) && isApp(p) && (priority > applications_aging_band)) {
2443 /*
2444 * Runningboardd is pulling up an application that is in the aging band.
2445 * We reset the app's state here so that it'll get a fresh stay in the
2446 * aging band on the way back.
2447 *
2448 * We always handled the app 'aging' in the memorystatus_update_priority_locked()
2449 * function. Daemons used to be handled via the dirty 'set/clear/track' path.
2450 * But with extensions (daemon-app hybrid), runningboardd is now going through
2451 * this routine for daemons too and things have gotten a bit tangled. This should
2452 * be simplified/untangled at some point and might require some assistance from
2453 * runningboardd.
2454 */
2455 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2456 } else {
2457 memorystatus_invalidate_idle_demotion_locked(p, FALSE);
2458 }
2459 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, FALSE, TRUE);
2460 } else {
2461 if (jetsam_aging_policy == kJetsamAgingPolicyLegacy && priority == JETSAM_PRIORITY_IDLE) {
2462 /*
2463 * Daemons with 'inactive' limits will go through the dirty tracking codepath.
2464 * This path deals with apps that may have 'inactive' limits e.g. WebContent processes.
2465 * If this is the legacy aging policy we explicitly need to apply those limits. If it
2466 * is any other aging policy, then we don't need to worry because all processes
2467 * will go through the aging bands and then the demotion thread will take care to
2468 * move them into the IDLE band and apply the required limits.
2469 */
2470 memorystatus_update_priority_locked(p, priority, head_insert, TRUE);
2471 }
2472 }
2473
2474 memorystatus_update_priority_locked(p, priority, head_insert, FALSE);
2475
2476 proc_list_unlock();
2477 ret = 0;
2478
2479 out:
2480 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_END, ret, 0, 0, 0, 0);
2481
2482 return ret;
2483 }
2484
2485 int
2486 memorystatus_remove(proc_t p)
2487 {
2488 int ret;
2489 memstat_bucket_t *bucket;
2490 boolean_t reschedule = FALSE;
2491
2492 MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing pid %d\n", p->p_pid);
2493
2494 /*
2495 * Check if this proc is locked (because we're performing a freeze).
2496 * If so, we fail and instruct the caller to try again later.
2497 */
2498 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2499 return EAGAIN;
2500 }
2501
2502 assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL));
2503
2504 bucket = &memstat_bucket[p->p_memstat_effectivepriority];
2505
2506 if (isSysProc(p) && system_procs_aging_band && (p->p_memstat_effectivepriority == system_procs_aging_band)) {
2507 assert(bucket->count == memorystatus_scheduled_idle_demotions_sysprocs);
2508 reschedule = TRUE;
2509 } else if (isApp(p) && applications_aging_band && (p->p_memstat_effectivepriority == applications_aging_band)) {
2510 assert(bucket->count == memorystatus_scheduled_idle_demotions_apps);
2511 reschedule = TRUE;
2512 }
2513
2514 /*
2515 * Record idle delta
2516 */
2517
2518 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) {
2519 uint64_t now = mach_absolute_time();
2520 if (now > p->p_memstat_idle_start) {
2521 p->p_memstat_idle_delta = now - p->p_memstat_idle_start;
2522 }
2523 }
2524
2525 TAILQ_REMOVE(&bucket->list, p, p_memstat_list);
2526 bucket->count--;
2527 if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) {
2528 bucket->relaunch_high_count--;
2529 }
2530
2531 memorystatus_list_count--;
2532
2533 /* If awaiting demotion to the idle band, clean up */
2534 if (reschedule) {
2535 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2536 memorystatus_reschedule_idle_demotion_locked();
2537 }
2538
2539 memorystatus_check_levels_locked();
2540
2541 #if CONFIG_FREEZE
2542 if (p->p_memstat_state & (P_MEMSTAT_FROZEN)) {
2543 if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) {
2544 p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
2545 memorystatus_refreeze_eligible_count--;
2546 }
2547
2548 memorystatus_frozen_count--;
2549 memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
2550 p->p_memstat_freeze_sharedanon_pages = 0;
2551 }
2552
2553 if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) {
2554 memorystatus_suspended_count--;
2555 }
2556 #endif
2557
2558 if (p) {
2559 ret = 0;
2560 } else {
2561 ret = ESRCH;
2562 }
2563
2564 return ret;
2565 }
2566
2567 /*
2568 * Validate dirty tracking flags with process state.
2569 *
2570 * Return:
2571 * 0 on success
2572 * non-0 on failure
2573 *
2574 * The proc_list_lock is held by the caller.
2575 */
2576
2577 static int
2578 memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol)
2579 {
2580 /* See that the process isn't marked for termination */
2581 if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) {
2582 return EBUSY;
2583 }
2584
2585 /* Idle exit requires that process be tracked */
2586 if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) &&
2587 !(pcontrol & PROC_DIRTY_TRACK)) {
2588 return EINVAL;
2589 }
2590
2591 /* 'Launch in progress' tracking requires that process have enabled dirty tracking too. */
2592 if ((pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) &&
2593 !(pcontrol & PROC_DIRTY_TRACK)) {
2594 return EINVAL;
2595 }
2596
2597 /* Only one type of DEFER behavior is allowed.*/
2598 if ((pcontrol & PROC_DIRTY_DEFER) &&
2599 (pcontrol & PROC_DIRTY_DEFER_ALWAYS)) {
2600 return EINVAL;
2601 }
2602
2603 /* Deferral is only relevant if idle exit is specified */
2604 if (((pcontrol & PROC_DIRTY_DEFER) ||
2605 (pcontrol & PROC_DIRTY_DEFER_ALWAYS)) &&
2606 !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) {
2607 return EINVAL;
2608 }
2609
2610 return 0;
2611 }
2612
2613 static void
2614 memorystatus_update_idle_priority_locked(proc_t p)
2615 {
2616 int32_t priority;
2617
2618 MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty);
2619
2620 assert(isSysProc(p));
2621
2622 if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED | P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) {
2623 priority = (p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) ? system_procs_aging_band : JETSAM_PRIORITY_IDLE;
2624 } else {
2625 priority = p->p_memstat_requestedpriority;
2626 }
2627
2628 if (p->p_memstat_state & P_MEMSTAT_PRIORITY_ASSERTION) {
2629 /*
2630 * This process has a jetsam priority managed by an assertion.
2631 * Policy is to choose the max priority.
2632 */
2633 if (p->p_memstat_assertionpriority > priority) {
2634 os_log(OS_LOG_DEFAULT, "memorystatus: assertion priority %d overrides priority %d for %s:%d\n",
2635 p->p_memstat_assertionpriority, priority,
2636 (*p->p_name ? p->p_name : "unknown"), p->p_pid);
2637 priority = p->p_memstat_assertionpriority;
2638 }
2639 }
2640
2641 if (priority != p->p_memstat_effectivepriority) {
2642 if ((jetsam_aging_policy == kJetsamAgingPolicyLegacy) &&
2643 (priority == JETSAM_PRIORITY_IDLE)) {
2644 /*
2645 * This process is on its way into the IDLE band. The system is
2646 * using 'legacy' jetsam aging policy. That means, this process
2647 * has already used up its idle-deferral aging time that is given
2648 * once per its lifetime. So we need to set the INACTIVE limits
2649 * explicitly because it won't be going through the demotion paths
2650 * that take care to apply the limits appropriately.
2651 */
2652
2653 if (p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) {
2654 /*
2655 * This process has the 'elevated inactive jetsam band' attribute.
2656 * So, there will be no trip to IDLE after all.
2657 * Instead, we pin the process in the elevated band,
2658 * where its ACTIVE limits will apply.
2659 */
2660
2661 priority = JETSAM_PRIORITY_ELEVATED_INACTIVE;
2662 }
2663
2664 memorystatus_update_priority_locked(p, priority, false, true);
2665 } else {
2666 memorystatus_update_priority_locked(p, priority, false, false);
2667 }
2668 }
2669 }
2670
2671 /*
2672 * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle
2673 * (clean). They may also indicate that they support termination when idle, with the result that they are promoted
2674 * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low
2675 * priority idle band when clean (and killed earlier, protecting higher priority procesess).
2676 *
2677 * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by
2678 * memorystatus_sysprocs_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band
2679 * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to
2680 * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle
2681 * band. The deferral can be cleared early by clearing the appropriate flag.
2682 *
2683 * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process
2684 * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be
2685 * re-enabled or the guard state cleared, depending on whether the guard deadline has passed.
2686 */
2687
2688 int
2689 memorystatus_dirty_track(proc_t p, uint32_t pcontrol)
2690 {
2691 unsigned int old_dirty;
2692 boolean_t reschedule = FALSE;
2693 boolean_t already_deferred = FALSE;
2694 boolean_t defer_now = FALSE;
2695 int ret = 0;
2696
2697 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_TRACK),
2698 p->p_pid, p->p_memstat_dirty, pcontrol, 0, 0);
2699
2700 proc_list_lock();
2701
2702 if ((p->p_listflag & P_LIST_EXITED) != 0) {
2703 /*
2704 * Process is on its way out.
2705 */
2706 ret = EBUSY;
2707 goto exit;
2708 }
2709
2710 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
2711 ret = EPERM;
2712 goto exit;
2713 }
2714
2715 if ((ret = memorystatus_validate_track_flags(p, pcontrol)) != 0) {
2716 /* error */
2717 goto exit;
2718 }
2719
2720 old_dirty = p->p_memstat_dirty;
2721
2722 /* These bits are cumulative, as per <rdar://problem/11159924> */
2723 if (pcontrol & PROC_DIRTY_TRACK) {
2724 p->p_memstat_dirty |= P_DIRTY_TRACK;
2725 }
2726
2727 if (pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) {
2728 p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT;
2729 }
2730
2731 if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) {
2732 p->p_memstat_dirty |= P_DIRTY_LAUNCH_IN_PROGRESS;
2733 }
2734
2735 if (old_dirty & P_DIRTY_AGING_IN_PROGRESS) {
2736 already_deferred = TRUE;
2737 }
2738
2739
2740 /* This can be set and cleared exactly once. */
2741 if (pcontrol & (PROC_DIRTY_DEFER | PROC_DIRTY_DEFER_ALWAYS)) {
2742 if ((pcontrol & (PROC_DIRTY_DEFER)) &&
2743 !(old_dirty & P_DIRTY_DEFER)) {
2744 p->p_memstat_dirty |= P_DIRTY_DEFER;
2745 }
2746
2747 if ((pcontrol & (PROC_DIRTY_DEFER_ALWAYS)) &&
2748 !(old_dirty & P_DIRTY_DEFER_ALWAYS)) {
2749 p->p_memstat_dirty |= P_DIRTY_DEFER_ALWAYS;
2750 }
2751
2752 defer_now = TRUE;
2753 }
2754
2755 MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for pid %d\n",
2756 ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N",
2757 defer_now ? "Y" : "N",
2758 p->p_memstat_dirty & P_DIRTY ? "Y" : "N",
2759 p->p_pid);
2760
2761 /* Kick off or invalidate the idle exit deferment if there's a state transition. */
2762 if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) {
2763 if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) {
2764 if (defer_now && !already_deferred) {
2765 /*
2766 * Request to defer a clean process that's idle-exit enabled
2767 * and not already in the jetsam deferred band. Most likely a
2768 * new launch.
2769 */
2770 memorystatus_schedule_idle_demotion_locked(p, TRUE);
2771 reschedule = TRUE;
2772 } else if (!defer_now) {
2773 /*
2774 * The process isn't asking for the 'aging' facility.
2775 * Could be that it is:
2776 */
2777
2778 if (already_deferred) {
2779 /*
2780 * already in the aging bands. Traditionally,
2781 * some processes have tried to use this to
2782 * opt out of the 'aging' facility.
2783 */
2784
2785 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2786 } else {
2787 /*
2788 * agnostic to the 'aging' facility. In that case,
2789 * we'll go ahead and opt it in because this is likely
2790 * a new launch (clean process, dirty tracking enabled)
2791 */
2792
2793 memorystatus_schedule_idle_demotion_locked(p, TRUE);
2794 }
2795
2796 reschedule = TRUE;
2797 }
2798 }
2799 } else {
2800 /*
2801 * We are trying to operate on a dirty process. Dirty processes have to
2802 * be removed from the deferred band. The question is do we reset the
2803 * deferred state or not?
2804 *
2805 * This could be a legal request like:
2806 * - this process had opted into the 'aging' band
2807 * - but it's now dirty and requests to opt out.
2808 * In this case, we remove the process from the band and reset its
2809 * state too. It'll opt back in properly when needed.
2810 *
2811 * OR, this request could be a user-space bug. E.g.:
2812 * - this process had opted into the 'aging' band when clean
2813 * - and, then issues another request to again put it into the band except
2814 * this time the process is dirty.
2815 * The process going dirty, as a transition in memorystatus_dirty_set(), will pull the process out of
2816 * the deferred band with its state intact. So our request below is no-op.
2817 * But we do it here anyways for coverage.
2818 *
2819 * memorystatus_update_idle_priority_locked()
2820 * single-mindedly treats a dirty process as "cannot be in the aging band".
2821 */
2822
2823 if (!defer_now && already_deferred) {
2824 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2825 reschedule = TRUE;
2826 } else {
2827 boolean_t reset_state = (jetsam_aging_policy != kJetsamAgingPolicyLegacy) ? TRUE : FALSE;
2828
2829 memorystatus_invalidate_idle_demotion_locked(p, reset_state);
2830 reschedule = TRUE;
2831 }
2832 }
2833
2834 memorystatus_update_idle_priority_locked(p);
2835
2836 if (reschedule) {
2837 memorystatus_reschedule_idle_demotion_locked();
2838 }
2839
2840 ret = 0;
2841
2842 exit:
2843 proc_list_unlock();
2844
2845 return ret;
2846 }
2847
2848 int
2849 memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol)
2850 {
2851 int ret;
2852 boolean_t kill = false;
2853 boolean_t reschedule = FALSE;
2854 boolean_t was_dirty = FALSE;
2855 boolean_t now_dirty = FALSE;
2856
2857 MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty);
2858 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_SET), p->p_pid, self, pcontrol, 0, 0);
2859
2860 proc_list_lock();
2861
2862 if ((p->p_listflag & P_LIST_EXITED) != 0) {
2863 /*
2864 * Process is on its way out.
2865 */
2866 ret = EBUSY;
2867 goto exit;
2868 }
2869
2870 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
2871 ret = EPERM;
2872 goto exit;
2873 }
2874
2875 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
2876 was_dirty = TRUE;
2877 }
2878
2879 if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) {
2880 /* Dirty tracking not enabled */
2881 ret = EINVAL;
2882 } else if (pcontrol && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) {
2883 /*
2884 * Process is set to be terminated and we're attempting to mark it dirty.
2885 * Set for termination and marking as clean is OK - see <rdar://problem/10594349>.
2886 */
2887 ret = EBUSY;
2888 } else {
2889 int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN;
2890 if (pcontrol && !(p->p_memstat_dirty & flag)) {
2891 /* Mark the process as having been dirtied at some point */
2892 p->p_memstat_dirty |= (flag | P_DIRTY_MARKED);
2893 memorystatus_dirty_count++;
2894 ret = 0;
2895 } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) {
2896 if ((flag == P_DIRTY_SHUTDOWN) && (!(p->p_memstat_dirty & P_DIRTY))) {
2897 /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */
2898 p->p_memstat_dirty |= P_DIRTY_TERMINATED;
2899 kill = true;
2900 } else if ((flag == P_DIRTY) && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) {
2901 /* Kill previously terminated processes if set clean */
2902 kill = true;
2903 }
2904 p->p_memstat_dirty &= ~flag;
2905 memorystatus_dirty_count--;
2906 ret = 0;
2907 } else {
2908 /* Already set */
2909 ret = EALREADY;
2910 }
2911 }
2912
2913 if (ret != 0) {
2914 goto exit;
2915 }
2916
2917 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
2918 now_dirty = TRUE;
2919 }
2920
2921 if ((was_dirty == TRUE && now_dirty == FALSE) ||
2922 (was_dirty == FALSE && now_dirty == TRUE)) {
2923 /* Manage idle exit deferral, if applied */
2924 if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) {
2925 /*
2926 * Legacy mode: P_DIRTY_AGING_IN_PROGRESS means the process is in the aging band OR it might be heading back
2927 * there once it's clean again. For the legacy case, this only applies if it has some protection window left.
2928 * P_DIRTY_DEFER: one-time protection window given at launch
2929 * P_DIRTY_DEFER_ALWAYS: protection window given for every dirty->clean transition. Like non-legacy mode.
2930 *
2931 * Non-Legacy mode: P_DIRTY_AGING_IN_PROGRESS means the process is in the aging band. It will always stop over
2932 * in that band on it's way to IDLE.
2933 */
2934
2935 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
2936 /*
2937 * New dirty process i.e. "was_dirty == FALSE && now_dirty == TRUE"
2938 *
2939 * The process will move from its aging band to its higher requested
2940 * jetsam band.
2941 */
2942 boolean_t reset_state = (jetsam_aging_policy != kJetsamAgingPolicyLegacy) ? TRUE : FALSE;
2943
2944 memorystatus_invalidate_idle_demotion_locked(p, reset_state);
2945 reschedule = TRUE;
2946 } else {
2947 /*
2948 * Process is back from "dirty" to "clean".
2949 */
2950
2951 if (jetsam_aging_policy == kJetsamAgingPolicyLegacy) {
2952 if (((p->p_memstat_dirty & P_DIRTY_DEFER_ALWAYS) == FALSE) &&
2953 (mach_absolute_time() >= p->p_memstat_idledeadline)) {
2954 /*
2955 * The process' hasn't enrolled in the "always defer after dirty"
2956 * mode and its deadline has expired. It currently
2957 * does not reside in any of the aging buckets.
2958 *
2959 * It's on its way to the JETSAM_PRIORITY_IDLE
2960 * bucket via memorystatus_update_idle_priority_locked()
2961 * below.
2962 *
2963 * So all we need to do is reset all the state on the
2964 * process that's related to the aging bucket i.e.
2965 * the AGING_IN_PROGRESS flag and the timer deadline.
2966 */
2967
2968 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
2969 reschedule = TRUE;
2970 } else {
2971 /*
2972 * Process enrolled in "always stop in deferral band after dirty" OR
2973 * it still has some protection window left and so
2974 * we just re-arm the timer without modifying any
2975 * state on the process iff it still wants into that band.
2976 */
2977
2978 if (p->p_memstat_dirty & P_DIRTY_DEFER_ALWAYS) {
2979 memorystatus_schedule_idle_demotion_locked(p, TRUE);
2980 reschedule = TRUE;
2981 } else if (p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) {
2982 memorystatus_schedule_idle_demotion_locked(p, FALSE);
2983 reschedule = TRUE;
2984 }
2985 }
2986 } else {
2987 memorystatus_schedule_idle_demotion_locked(p, TRUE);
2988 reschedule = TRUE;
2989 }
2990 }
2991 }
2992
2993 memorystatus_update_idle_priority_locked(p);
2994
2995 if (memorystatus_highwater_enabled) {
2996 boolean_t ledger_update_needed = TRUE;
2997 boolean_t use_active;
2998 boolean_t is_fatal;
2999 /*
3000 * We are in this path because this process transitioned between
3001 * dirty <--> clean state. Update the cached memory limits.
3002 */
3003
3004 if (proc_jetsam_state_is_active_locked(p) == TRUE) {
3005 /*
3006 * process is pinned in elevated band
3007 * or
3008 * process is dirty
3009 */
3010 CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal);
3011 use_active = TRUE;
3012 ledger_update_needed = TRUE;
3013 } else {
3014 /*
3015 * process is clean...but if it has opted into pressured-exit
3016 * we don't apply the INACTIVE limit till the process has aged
3017 * out and is entering the IDLE band.
3018 * See memorystatus_update_priority_locked() for that.
3019 */
3020
3021 if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) {
3022 ledger_update_needed = FALSE;
3023 } else {
3024 CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal);
3025 use_active = FALSE;
3026 ledger_update_needed = TRUE;
3027 }
3028 }
3029
3030 /*
3031 * Enforce the new limits by writing to the ledger.
3032 *
3033 * This is a hot path and holding the proc_list_lock while writing to the ledgers,
3034 * (where the task lock is taken) is bad. So, we temporarily drop the proc_list_lock.
3035 * We aren't traversing the jetsam bucket list here, so we should be safe.
3036 * See rdar://21394491.
3037 */
3038
3039 if (ledger_update_needed && proc_ref_locked(p) == p) {
3040 int ledger_limit;
3041 if (p->p_memstat_memlimit > 0) {
3042 ledger_limit = p->p_memstat_memlimit;
3043 } else {
3044 ledger_limit = -1;
3045 }
3046 proc_list_unlock();
3047 task_set_phys_footprint_limit_internal(p->task, ledger_limit, NULL, use_active, is_fatal);
3048 proc_list_lock();
3049 proc_rele_locked(p);
3050
3051 MEMORYSTATUS_DEBUG(3, "memorystatus_dirty_set: new limit on pid %d (%dMB %s) priority(%d) dirty?=0x%x %s\n",
3052 p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
3053 (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty,
3054 (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
3055 }
3056 }
3057
3058 /* If the deferral state changed, reschedule the demotion timer */
3059 if (reschedule) {
3060 memorystatus_reschedule_idle_demotion_locked();
3061 }
3062 }
3063
3064 if (kill) {
3065 if (proc_ref_locked(p) == p) {
3066 proc_list_unlock();
3067 psignal(p, SIGKILL);
3068 proc_list_lock();
3069 proc_rele_locked(p);
3070 }
3071 }
3072
3073 exit:
3074 proc_list_unlock();
3075
3076 return ret;
3077 }
3078
3079 int
3080 memorystatus_dirty_clear(proc_t p, uint32_t pcontrol)
3081 {
3082 int ret = 0;
3083
3084 MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_clear(): %d 0x%x 0x%x\n", p->p_pid, pcontrol, p->p_memstat_dirty);
3085
3086 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_CLEAR), p->p_pid, pcontrol, 0, 0, 0);
3087
3088 proc_list_lock();
3089
3090 if ((p->p_listflag & P_LIST_EXITED) != 0) {
3091 /*
3092 * Process is on its way out.
3093 */
3094 ret = EBUSY;
3095 goto exit;
3096 }
3097
3098 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
3099 ret = EPERM;
3100 goto exit;
3101 }
3102
3103 if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) {
3104 /* Dirty tracking not enabled */
3105 ret = EINVAL;
3106 goto exit;
3107 }
3108
3109 if (!pcontrol || (pcontrol & (PROC_DIRTY_LAUNCH_IN_PROGRESS | PROC_DIRTY_DEFER | PROC_DIRTY_DEFER_ALWAYS)) == 0) {
3110 ret = EINVAL;
3111 goto exit;
3112 }
3113
3114 if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) {
3115 p->p_memstat_dirty &= ~P_DIRTY_LAUNCH_IN_PROGRESS;
3116 }
3117
3118 /* This can be set and cleared exactly once. */
3119 if (pcontrol & (PROC_DIRTY_DEFER | PROC_DIRTY_DEFER_ALWAYS)) {
3120 if (p->p_memstat_dirty & P_DIRTY_DEFER) {
3121 p->p_memstat_dirty &= ~(P_DIRTY_DEFER);
3122 }
3123
3124 if (p->p_memstat_dirty & P_DIRTY_DEFER_ALWAYS) {
3125 p->p_memstat_dirty &= ~(P_DIRTY_DEFER_ALWAYS);
3126 }
3127
3128 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
3129 memorystatus_update_idle_priority_locked(p);
3130 memorystatus_reschedule_idle_demotion_locked();
3131 }
3132
3133 ret = 0;
3134 exit:
3135 proc_list_unlock();
3136
3137 return ret;
3138 }
3139
3140 int
3141 memorystatus_dirty_get(proc_t p, boolean_t locked)
3142 {
3143 int ret = 0;
3144
3145 if (!locked) {
3146 proc_list_lock();
3147 }
3148
3149 if (p->p_memstat_dirty & P_DIRTY_TRACK) {
3150 ret |= PROC_DIRTY_TRACKED;
3151 if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) {
3152 ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT;
3153 }
3154 if (p->p_memstat_dirty & P_DIRTY) {
3155 ret |= PROC_DIRTY_IS_DIRTY;
3156 }
3157 if (p->p_memstat_dirty & P_DIRTY_LAUNCH_IN_PROGRESS) {
3158 ret |= PROC_DIRTY_LAUNCH_IS_IN_PROGRESS;
3159 }
3160 }
3161
3162 if (!locked) {
3163 proc_list_unlock();
3164 }
3165
3166 return ret;
3167 }
3168
3169 int
3170 memorystatus_on_terminate(proc_t p)
3171 {
3172 int sig;
3173
3174 proc_list_lock();
3175
3176 p->p_memstat_dirty |= P_DIRTY_TERMINATED;
3177
3178 if ((p->p_memstat_dirty & (P_DIRTY_TRACK | P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) {
3179 /* Clean; mark as terminated and issue SIGKILL */
3180 sig = SIGKILL;
3181 } else {
3182 /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */
3183 sig = SIGTERM;
3184 }
3185
3186 proc_list_unlock();
3187
3188 return sig;
3189 }
3190
3191 void
3192 memorystatus_on_suspend(proc_t p)
3193 {
3194 #if CONFIG_FREEZE
3195 uint32_t pages;
3196 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
3197 #endif
3198 proc_list_lock();
3199 #if CONFIG_FREEZE
3200 memorystatus_suspended_count++;
3201 #endif
3202 p->p_memstat_state |= P_MEMSTAT_SUSPENDED;
3203 proc_list_unlock();
3204 }
3205
3206 void
3207 memorystatus_on_resume(proc_t p)
3208 {
3209 #if CONFIG_FREEZE
3210 boolean_t frozen;
3211 pid_t pid;
3212 #endif
3213
3214 proc_list_lock();
3215
3216 #if CONFIG_FREEZE
3217 frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN);
3218 if (frozen) {
3219 /*
3220 * Now that we don't _thaw_ a process completely,
3221 * resuming it (and having some on-demand swapins)
3222 * shouldn't preclude it from being counted as frozen.
3223 *
3224 * memorystatus_frozen_count--;
3225 *
3226 * We preserve the P_MEMSTAT_FROZEN state since the process
3227 * could have state on disk AND so will deserve some protection
3228 * in the jetsam bands.
3229 */
3230 if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == 0) {
3231 p->p_memstat_state |= P_MEMSTAT_REFREEZE_ELIGIBLE;
3232 memorystatus_refreeze_eligible_count++;
3233 }
3234 p->p_memstat_thaw_count++;
3235
3236 memorystatus_thaw_count++;
3237 }
3238
3239 memorystatus_suspended_count--;
3240
3241 pid = p->p_pid;
3242 #endif
3243
3244 /*
3245 * P_MEMSTAT_FROZEN will remain unchanged. This used to be:
3246 * p->p_memstat_state &= ~(P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN);
3247 */
3248 p->p_memstat_state &= ~P_MEMSTAT_SUSPENDED;
3249
3250 proc_list_unlock();
3251
3252 #if CONFIG_FREEZE
3253 if (frozen) {
3254 memorystatus_freeze_entry_t data = { pid, FALSE, 0 };
3255 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
3256 }
3257 #endif
3258 }
3259
3260 void
3261 memorystatus_on_inactivity(proc_t p)
3262 {
3263 #pragma unused(p)
3264 #if CONFIG_FREEZE
3265 /* Wake the freeze thread */
3266 thread_wakeup((event_t)&memorystatus_freeze_wakeup);
3267 #endif
3268 }
3269
3270 /*
3271 * The proc_list_lock is held by the caller.
3272 */
3273 static uint32_t
3274 memorystatus_build_state(proc_t p)
3275 {
3276 uint32_t snapshot_state = 0;
3277
3278 /* General */
3279 if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) {
3280 snapshot_state |= kMemorystatusSuspended;
3281 }
3282 if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
3283 snapshot_state |= kMemorystatusFrozen;
3284 }
3285 if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) {
3286 snapshot_state |= kMemorystatusWasThawed;
3287 }
3288 if (p->p_memstat_state & P_MEMSTAT_PRIORITY_ASSERTION) {
3289 snapshot_state |= kMemorystatusAssertion;
3290 }
3291
3292 /* Tracking */
3293 if (p->p_memstat_dirty & P_DIRTY_TRACK) {
3294 snapshot_state |= kMemorystatusTracked;
3295 }
3296 if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) {
3297 snapshot_state |= kMemorystatusSupportsIdleExit;
3298 }
3299 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
3300 snapshot_state |= kMemorystatusDirty;
3301 }
3302
3303 return snapshot_state;
3304 }
3305
3306 static boolean_t
3307 kill_idle_exit_proc(void)
3308 {
3309 proc_t p, victim_p = PROC_NULL;
3310 uint64_t current_time, footprint_of_killed_proc;
3311 boolean_t killed = FALSE;
3312 unsigned int i = 0;
3313 os_reason_t jetsam_reason = OS_REASON_NULL;
3314
3315 /* Pick next idle exit victim. */
3316 current_time = mach_absolute_time();
3317
3318 jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_IDLE_EXIT);
3319 if (jetsam_reason == OS_REASON_NULL) {
3320 printf("kill_idle_exit_proc: failed to allocate jetsam reason\n");
3321 }
3322
3323 proc_list_lock();
3324
3325 p = memorystatus_get_first_proc_locked(&i, FALSE);
3326 while (p) {
3327 /* No need to look beyond the idle band */
3328 if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) {
3329 break;
3330 }
3331
3332 if ((p->p_memstat_dirty & (P_DIRTY_ALLOW_IDLE_EXIT | P_DIRTY_IS_DIRTY | P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) {
3333 if (current_time >= p->p_memstat_idledeadline) {
3334 p->p_memstat_dirty |= P_DIRTY_TERMINATED;
3335 victim_p = proc_ref_locked(p);
3336 break;
3337 }
3338 }
3339
3340 p = memorystatus_get_next_proc_locked(&i, p, FALSE);
3341 }
3342
3343 proc_list_unlock();
3344
3345 if (victim_p) {
3346 printf("memorystatus: killing_idle_process pid %d [%s] jetsam_reason->osr_code: %llu\n", victim_p->p_pid, (*victim_p->p_name ? victim_p->p_name : "unknown"), jetsam_reason->osr_code);
3347 killed = memorystatus_do_kill(victim_p, kMemorystatusKilledIdleExit, jetsam_reason, &footprint_of_killed_proc);
3348 proc_rele(victim_p);
3349 } else {
3350 os_reason_free(jetsam_reason);
3351 }
3352
3353 return killed;
3354 }
3355
3356 static void
3357 memorystatus_thread_wake(void)
3358 {
3359 int thr_id = 0;
3360 int active_thr = atomic_load(&active_jetsam_threads);
3361
3362 /* Wakeup all the jetsam threads */
3363 for (thr_id = 0; thr_id < active_thr; thr_id++) {
3364 thread_wakeup((event_t)&jetsam_threads[thr_id].memorystatus_wakeup);
3365 }
3366 }
3367
3368 #if CONFIG_JETSAM
3369
3370 static void
3371 memorystatus_thread_pool_max()
3372 {
3373 /* Increase the jetsam thread pool to max_jetsam_threads */
3374 int max_threads = max_jetsam_threads;
3375 printf("Expanding memorystatus pool to %d!\n", max_threads);
3376 atomic_store(&active_jetsam_threads, max_threads);
3377 }
3378
3379 static void
3380 memorystatus_thread_pool_default()
3381 {
3382 /* Restore the jetsam thread pool to a single thread */
3383 printf("Reverting memorystatus pool back to 1\n");
3384 atomic_store(&active_jetsam_threads, 1);
3385 }
3386
3387 #endif /* CONFIG_JETSAM */
3388
3389 extern void vm_pressure_response(void);
3390
3391 static int
3392 memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation)
3393 {
3394 struct jetsam_thread_state *jetsam_thread = jetsam_current_thread();
3395
3396 assert(jetsam_thread != NULL);
3397 if (interval_ms) {
3398 assert_wait_timeout(&jetsam_thread->memorystatus_wakeup, THREAD_UNINT, interval_ms, NSEC_PER_MSEC);
3399 } else {
3400 assert_wait(&jetsam_thread->memorystatus_wakeup, THREAD_UNINT);
3401 }
3402
3403 return thread_block(continuation);
3404 }
3405
3406 static boolean_t
3407 memorystatus_avail_pages_below_pressure(void)
3408 {
3409 #if CONFIG_EMBEDDED
3410 /*
3411 * Instead of CONFIG_EMBEDDED for these *avail_pages* routines, we should
3412 * key off of the system having dynamic swap support. With full swap support,
3413 * the system shouldn't really need to worry about various page thresholds.
3414 */
3415 return memorystatus_available_pages <= memorystatus_available_pages_pressure;
3416 #else /* CONFIG_EMBEDDED */
3417 return FALSE;
3418 #endif /* CONFIG_EMBEDDED */
3419 }
3420
3421 static boolean_t
3422 memorystatus_avail_pages_below_critical(void)
3423 {
3424 #if CONFIG_EMBEDDED
3425 return memorystatus_available_pages <= memorystatus_available_pages_critical;
3426 #else /* CONFIG_EMBEDDED */
3427 return FALSE;
3428 #endif /* CONFIG_EMBEDDED */
3429 }
3430
3431 static boolean_t
3432 memorystatus_post_snapshot(int32_t priority, uint32_t cause)
3433 {
3434 boolean_t is_idle_priority;
3435
3436 if (jetsam_aging_policy == kJetsamAgingPolicyLegacy) {
3437 is_idle_priority = (priority == JETSAM_PRIORITY_IDLE);
3438 } else {
3439 is_idle_priority = (priority == JETSAM_PRIORITY_IDLE || priority == JETSAM_PRIORITY_IDLE_DEFERRED);
3440 }
3441 #if CONFIG_EMBEDDED
3442 #pragma unused(cause)
3443 /*
3444 * Don't generate logs for steady-state idle-exit kills,
3445 * unless it is overridden for debug or by the device
3446 * tree.
3447 */
3448
3449 return !is_idle_priority || memorystatus_idle_snapshot;
3450
3451 #else /* CONFIG_EMBEDDED */
3452 /*
3453 * Don't generate logs for steady-state idle-exit kills,
3454 * unless
3455 * - it is overridden for debug or by the device
3456 * tree.
3457 * OR
3458 * - the kill causes are important i.e. not kMemorystatusKilledIdleExit
3459 */
3460
3461 boolean_t snapshot_eligible_kill_cause = (is_reason_thrashing(cause) || is_reason_zone_map_exhaustion(cause));
3462 return !is_idle_priority || memorystatus_idle_snapshot || snapshot_eligible_kill_cause;
3463 #endif /* CONFIG_EMBEDDED */
3464 }
3465
3466 static boolean_t
3467 memorystatus_action_needed(void)
3468 {
3469 #if CONFIG_EMBEDDED
3470 return is_reason_thrashing(kill_under_pressure_cause) ||
3471 is_reason_zone_map_exhaustion(kill_under_pressure_cause) ||
3472 memorystatus_available_pages <= memorystatus_available_pages_pressure;
3473 #else /* CONFIG_EMBEDDED */
3474 return is_reason_thrashing(kill_under_pressure_cause) ||
3475 is_reason_zone_map_exhaustion(kill_under_pressure_cause);
3476 #endif /* CONFIG_EMBEDDED */
3477 }
3478
3479 static boolean_t
3480 memorystatus_act_on_hiwat_processes(uint32_t *errors, uint32_t *hwm_kill, boolean_t *post_snapshot, __unused boolean_t *is_critical, uint64_t *memory_reclaimed)
3481 {
3482 boolean_t purged = FALSE, killed = FALSE;
3483
3484 *memory_reclaimed = 0;
3485 killed = memorystatus_kill_hiwat_proc(errors, &purged, memory_reclaimed);
3486
3487 if (killed) {
3488 *hwm_kill = *hwm_kill + 1;
3489 *post_snapshot = TRUE;
3490 return TRUE;
3491 } else {
3492 if (purged == FALSE) {
3493 /* couldn't purge and couldn't kill */
3494 memorystatus_hwm_candidates = FALSE;
3495 }
3496 }
3497
3498 #if CONFIG_JETSAM
3499 /* No highwater processes to kill. Continue or stop for now? */
3500 if (!is_reason_thrashing(kill_under_pressure_cause) &&
3501 !is_reason_zone_map_exhaustion(kill_under_pressure_cause) &&
3502 (memorystatus_available_pages > memorystatus_available_pages_critical)) {
3503 /*
3504 * We are _not_ out of pressure but we are above the critical threshold and there's:
3505 * - no compressor thrashing
3506 * - enough zone memory
3507 * - no more HWM processes left.
3508 * For now, don't kill any other processes.
3509 */
3510
3511 if (*hwm_kill == 0) {
3512 memorystatus_thread_wasted_wakeup++;
3513 }
3514
3515 *is_critical = FALSE;
3516
3517 return TRUE;
3518 }
3519 #endif /* CONFIG_JETSAM */
3520
3521 return FALSE;
3522 }
3523
3524 /*
3525 * kJetsamHighRelaunchCandidatesThreshold defines the percentage of candidates
3526 * in the idle & deferred bands that need to be bad candidates in order to trigger
3527 * aggressive jetsam.
3528 */
3529 #define kJetsamHighRelaunchCandidatesThreshold (100)
3530
3531 /* kJetsamMinCandidatesThreshold defines the minimum number of candidates in the
3532 * idle/deferred bands to trigger aggressive jetsam. This value basically decides
3533 * how much memory the system is ready to hold in the lower bands without triggering
3534 * aggressive jetsam. This number should ideally be tuned based on the memory config
3535 * of the device.
3536 */
3537 #define kJetsamMinCandidatesThreshold (5)
3538
3539 static boolean_t
3540 memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int jld_eval_aggressive_count, __unused int *jld_idle_kills, __unused int jld_idle_kill_candidates, int *total_candidates, int *elevated_bucket_count)
3541 {
3542 boolean_t aggressive_jetsam_needed = false;
3543
3544 /*
3545 * For the kJetsamAgingPolicySysProcsReclaimedFirst aging policy, we maintain the jetsam
3546 * relaunch behavior for all daemons. Also, daemons and apps are aged in deferred bands on
3547 * every dirty->clean transition. For this aging policy, the best way to determine if
3548 * aggressive jetsam is needed, is to see if the kill candidates are mostly bad candidates.
3549 * If yes, then we need to go to higher bands to reclaim memory.
3550 */
3551 proc_list_lock();
3552 /* Get total candidate counts for idle and idle deferred bands */
3553 *total_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].count + memstat_bucket[system_procs_aging_band].count;
3554 /* Get counts of bad kill candidates in idle and idle deferred bands */
3555 int bad_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].relaunch_high_count + memstat_bucket[system_procs_aging_band].relaunch_high_count;
3556
3557 *elevated_bucket_count = memstat_bucket[JETSAM_PRIORITY_ELEVATED_INACTIVE].count;
3558
3559 proc_list_unlock();
3560
3561 /* Check if the number of bad candidates is greater than kJetsamHighRelaunchCandidatesThreshold % */
3562 aggressive_jetsam_needed = (((bad_candidates * 100) / *total_candidates) >= kJetsamHighRelaunchCandidatesThreshold);
3563
3564 /*
3565 * Since the new aging policy bases the aggressive jetsam trigger on percentage of
3566 * bad candidates, it is prone to being overly aggressive. In order to mitigate that,
3567 * make sure the system is really under memory pressure before triggering aggressive
3568 * jetsam.
3569 */
3570 if (memorystatus_available_pages > memorystatus_sysproc_aging_aggr_pages) {
3571 aggressive_jetsam_needed = false;
3572 }
3573
3574 #if DEVELOPMENT || DEBUG
3575 printf("memorystatus: aggressive%d: [%s] Bad Candidate Threshold Check (total: %d, bad: %d, threshold: %d %%); Memory Pressure Check (available_pgs: %llu, threshold_pgs: %llu)\n",
3576 jld_eval_aggressive_count, aggressive_jetsam_needed ? "PASSED" : "FAILED", *total_candidates, bad_candidates,
3577 kJetsamHighRelaunchCandidatesThreshold, (uint64_t)memorystatus_available_pages, (uint64_t)memorystatus_sysproc_aging_aggr_pages);
3578 #endif /* DEVELOPMENT || DEBUG */
3579 return aggressive_jetsam_needed;
3580 }
3581
3582 static boolean_t
3583 memorystatus_aggressive_jetsam_needed_default(__unused int jld_eval_aggressive_count, int *jld_idle_kills, int jld_idle_kill_candidates, int *total_candidates, int *elevated_bucket_count)
3584 {
3585 boolean_t aggressive_jetsam_needed = false;
3586 /* Jetsam Loop Detection - locals */
3587 memstat_bucket_t *bucket;
3588 int jld_bucket_count = 0;
3589
3590 proc_list_lock();
3591 switch (jetsam_aging_policy) {
3592 case kJetsamAgingPolicyLegacy:
3593 bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
3594 jld_bucket_count = bucket->count;
3595 bucket = &memstat_bucket[JETSAM_PRIORITY_AGING_BAND1];
3596 jld_bucket_count += bucket->count;
3597 break;
3598 case kJetsamAgingPolicyAppsReclaimedFirst:
3599 bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
3600 jld_bucket_count = bucket->count;
3601 bucket = &memstat_bucket[system_procs_aging_band];
3602 jld_bucket_count += bucket->count;
3603 bucket = &memstat_bucket[applications_aging_band];
3604 jld_bucket_count += bucket->count;
3605 break;
3606 case kJetsamAgingPolicyNone:
3607 default:
3608 bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
3609 jld_bucket_count = bucket->count;
3610 break;
3611 }
3612
3613 bucket = &memstat_bucket[JETSAM_PRIORITY_ELEVATED_INACTIVE];
3614 *elevated_bucket_count = bucket->count;
3615 *total_candidates = jld_bucket_count;
3616 proc_list_unlock();
3617
3618 aggressive_jetsam_needed = (*jld_idle_kills > jld_idle_kill_candidates);
3619
3620 #if DEVELOPMENT || DEBUG
3621 if (aggressive_jetsam_needed) {
3622 printf("memorystatus: aggressive%d: idle candidates: %d, idle kills: %d\n",
3623 jld_eval_aggressive_count,
3624 jld_idle_kill_candidates,
3625 *jld_idle_kills);
3626 }
3627 #endif /* DEVELOPMENT || DEBUG */
3628 return aggressive_jetsam_needed;
3629 }
3630
3631 static boolean_t
3632 memorystatus_act_aggressive(uint32_t cause, os_reason_t jetsam_reason, int *jld_idle_kills, boolean_t *corpse_list_purged, boolean_t *post_snapshot, uint64_t *memory_reclaimed)
3633 {
3634 boolean_t aggressive_jetsam_needed = false;
3635 boolean_t killed;
3636 uint32_t errors = 0;
3637 uint64_t footprint_of_killed_proc = 0;
3638 int elevated_bucket_count = 0;
3639 int total_candidates = 0;
3640 *memory_reclaimed = 0;
3641
3642 /*
3643 * The aggressive jetsam logic looks at the number of times it has been in the
3644 * aggressive loop to determine the max priority band it should kill upto. The
3645 * static variables below are used to track that property.
3646 *
3647 * To reset those values, the implementation checks if it has been
3648 * memorystatus_jld_eval_period_msecs since the parameters were reset.
3649 */
3650 static int jld_eval_aggressive_count = 0;
3651 static int32_t jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT;
3652 static uint64_t jld_timestamp_msecs = 0;
3653 static int jld_idle_kill_candidates = 0;
3654
3655 if (memorystatus_jld_enabled == FALSE) {
3656 /* If aggressive jetsam is disabled, nothing to do here */
3657 return FALSE;
3658 }
3659
3660 /* Get current timestamp (msecs only) */
3661 struct timeval jld_now_tstamp = {0, 0};
3662 uint64_t jld_now_msecs = 0;
3663 microuptime(&jld_now_tstamp);
3664 jld_now_msecs = (jld_now_tstamp.tv_sec * 1000);
3665
3666 /*
3667 * The aggressive jetsam logic looks at the number of candidates and their
3668 * properties to decide if aggressive jetsam should be engaged.
3669 */
3670 if (jetsam_aging_policy == kJetsamAgingPolicySysProcsReclaimedFirst) {
3671 /*
3672 * For the kJetsamAgingPolicySysProcsReclaimedFirst aging policy, the logic looks at the number of
3673 * candidates in the idle and deferred band and how many out of them are marked as high relaunch
3674 * probability.
3675 */
3676 aggressive_jetsam_needed = memorystatus_aggressive_jetsam_needed_sysproc_aging(jld_eval_aggressive_count,
3677 jld_idle_kills, jld_idle_kill_candidates, &total_candidates, &elevated_bucket_count);
3678 } else {
3679 /*
3680 * The other aging policies look at number of candidate processes over a specific time window and
3681 * evaluate if the system is in a jetsam loop. If yes, aggressive jetsam is triggered.
3682 */
3683 aggressive_jetsam_needed = memorystatus_aggressive_jetsam_needed_default(jld_eval_aggressive_count,
3684 jld_idle_kills, jld_idle_kill_candidates, &total_candidates, &elevated_bucket_count);
3685 }
3686
3687 /*
3688 * Check if its been really long since the aggressive jetsam evaluation
3689 * parameters have been refreshed. This logic also resets the jld_eval_aggressive_count
3690 * counter to make sure we reset the aggressive jetsam severity.
3691 */
3692 boolean_t param_reval = false;
3693
3694 if ((total_candidates == 0) ||
3695 (jld_now_msecs > (jld_timestamp_msecs + memorystatus_jld_eval_period_msecs))) {
3696 jld_timestamp_msecs = jld_now_msecs;
3697 jld_idle_kill_candidates = total_candidates;
3698 *jld_idle_kills = 0;
3699 jld_eval_aggressive_count = 0;
3700 jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT;
3701 param_reval = true;
3702 }
3703
3704 /*
3705 * If the parameters have been updated, re-evaluate the aggressive_jetsam_needed condition for
3706 * the non kJetsamAgingPolicySysProcsReclaimedFirst policy since its based on jld_idle_kill_candidates etc.
3707 */
3708 if ((param_reval == true) && (jetsam_aging_policy != kJetsamAgingPolicySysProcsReclaimedFirst)) {
3709 aggressive_jetsam_needed = (*jld_idle_kills > jld_idle_kill_candidates);
3710 }
3711
3712 /*
3713 * It is also possible that the system is down to a very small number of processes in the candidate
3714 * bands. In that case, the decisions made by the memorystatus_aggressive_jetsam_needed_* routines
3715 * would not be useful. In that case, do not trigger aggressive jetsam.
3716 */
3717 if (total_candidates < kJetsamMinCandidatesThreshold) {
3718 #if DEVELOPMENT || DEBUG
3719 printf("memorystatus: aggressive: [FAILED] Low Candidate Count (current: %d, threshold: %d)\n", total_candidates, kJetsamMinCandidatesThreshold);
3720 #endif /* DEVELOPMENT || DEBUG */
3721 aggressive_jetsam_needed = false;
3722 }
3723
3724 if (aggressive_jetsam_needed == false) {
3725 /* Either the aging policy or the candidate count decided that aggressive jetsam is not needed. Nothing more to do here. */
3726 return FALSE;
3727 }
3728
3729 /* Looks like aggressive jetsam is needed */
3730 jld_eval_aggressive_count++;
3731
3732 if (jld_eval_aggressive_count == memorystatus_jld_eval_aggressive_count) {
3733 memorystatus_issue_fg_band_notify();
3734
3735 /*
3736 * If we reach this aggressive cycle, corpses might be causing memory pressure.
3737 * So, in an effort to avoid jetsams in the FG band, we will attempt to purge
3738 * corpse memory prior to this final march through JETSAM_PRIORITY_UI_SUPPORT.
3739 */
3740 if (total_corpses_count() > 0 && !*corpse_list_purged) {
3741 task_purge_all_corpses();
3742 *corpse_list_purged = TRUE;
3743 }
3744 } else if (jld_eval_aggressive_count > memorystatus_jld_eval_aggressive_count) {
3745 /*
3746 * Bump up the jetsam priority limit (eg: the bucket index)
3747 * Enforce bucket index sanity.
3748 */
3749 if ((memorystatus_jld_eval_aggressive_priority_band_max < 0) ||
3750 (memorystatus_jld_eval_aggressive_priority_band_max >= MEMSTAT_BUCKET_COUNT)) {
3751 /*
3752 * Do nothing. Stick with the default level.
3753 */
3754 } else {
3755 jld_priority_band_max = memorystatus_jld_eval_aggressive_priority_band_max;
3756 }
3757 }
3758
3759 /* Visit elevated processes first */
3760 while (elevated_bucket_count) {
3761 elevated_bucket_count--;
3762
3763 /*
3764 * memorystatus_kill_elevated_process() drops a reference,
3765 * so take another one so we can continue to use this exit reason
3766 * even after it returns.
3767 */
3768
3769 os_reason_ref(jetsam_reason);
3770 killed = memorystatus_kill_elevated_process(
3771 cause,
3772 jetsam_reason,
3773 JETSAM_PRIORITY_ELEVATED_INACTIVE,
3774 jld_eval_aggressive_count,
3775 &errors, &footprint_of_killed_proc);
3776 if (killed) {
3777 *post_snapshot = TRUE;
3778 *memory_reclaimed += footprint_of_killed_proc;
3779 if (memorystatus_avail_pages_below_pressure()) {
3780 /*
3781 * Still under pressure.
3782 * Find another pinned processes.
3783 */
3784 continue;
3785 } else {
3786 return TRUE;
3787 }
3788 } else {
3789 /*
3790 * No pinned processes left to kill.
3791 * Abandon elevated band.
3792 */
3793 break;
3794 }
3795 }
3796
3797 /*
3798 * memorystatus_kill_processes_aggressive() allocates its own
3799 * jetsam_reason so the kMemorystatusKilledProcThrashing cause
3800 * is consistent throughout the aggressive march.
3801 */
3802 killed = memorystatus_kill_processes_aggressive(
3803 kMemorystatusKilledProcThrashing,
3804 jld_eval_aggressive_count,
3805 jld_priority_band_max,
3806 &errors, &footprint_of_killed_proc);
3807
3808 if (killed) {
3809 /* Always generate logs after aggressive kill */
3810 *post_snapshot = TRUE;
3811 *memory_reclaimed += footprint_of_killed_proc;
3812 *jld_idle_kills = 0;
3813 return TRUE;
3814 }
3815
3816 return FALSE;
3817 }
3818
3819
3820 static void
3821 memorystatus_thread(void *param __unused, wait_result_t wr __unused)
3822 {
3823 boolean_t post_snapshot = FALSE;
3824 uint32_t errors = 0;
3825 uint32_t hwm_kill = 0;
3826 boolean_t sort_flag = TRUE;
3827 boolean_t corpse_list_purged = FALSE;
3828 int jld_idle_kills = 0;
3829 struct jetsam_thread_state *jetsam_thread = jetsam_current_thread();
3830 uint64_t total_memory_reclaimed = 0;
3831
3832 assert(jetsam_thread != NULL);
3833 if (jetsam_thread->inited == FALSE) {
3834 /*
3835 * It's the first time the thread has run, so just mark the thread as privileged and block.
3836 * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>.
3837 */
3838
3839 char name[32];
3840 thread_wire(host_priv_self(), current_thread(), TRUE);
3841 snprintf(name, 32, "VM_memorystatus_%d", jetsam_thread->index + 1);
3842
3843 /* Limit all but one thread to the lower jetsam bands, as that's where most of the victims are. */
3844 if (jetsam_thread->index == 0) {
3845 if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) {
3846 thread_vm_bind_group_add();
3847 }
3848 jetsam_thread->limit_to_low_bands = FALSE;
3849 } else {
3850 jetsam_thread->limit_to_low_bands = TRUE;
3851 }
3852 thread_set_thread_name(current_thread(), name);
3853 jetsam_thread->inited = TRUE;
3854 memorystatus_thread_block(0, memorystatus_thread);
3855 }
3856
3857 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START,
3858 memorystatus_available_pages, memorystatus_jld_enabled, memorystatus_jld_eval_period_msecs, memorystatus_jld_eval_aggressive_count, 0);
3859
3860 /*
3861 * Jetsam aware version.
3862 *
3863 * The VM pressure notification thread is working it's way through clients in parallel.
3864 *
3865 * So, while the pressure notification thread is targeting processes in order of
3866 * increasing jetsam priority, we can hopefully reduce / stop it's work by killing
3867 * any processes that have exceeded their highwater mark.
3868 *
3869 * If we run out of HWM processes and our available pages drops below the critical threshold, then,
3870 * we target the least recently used process in order of increasing jetsam priority (exception: the FG band).
3871 */
3872 while (memorystatus_action_needed()) {
3873 boolean_t killed;
3874 int32_t priority;
3875 uint32_t cause;
3876 uint64_t memory_reclaimed = 0;
3877 uint64_t jetsam_reason_code = JETSAM_REASON_INVALID;
3878 os_reason_t jetsam_reason = OS_REASON_NULL;
3879
3880 cause = kill_under_pressure_cause;
3881 switch (cause) {
3882 case kMemorystatusKilledFCThrashing:
3883 jetsam_reason_code = JETSAM_REASON_MEMORY_FCTHRASHING;
3884 break;
3885 case kMemorystatusKilledVMCompressorThrashing:
3886 jetsam_reason_code = JETSAM_REASON_MEMORY_VMCOMPRESSOR_THRASHING;
3887 break;
3888 case kMemorystatusKilledVMCompressorSpaceShortage:
3889 jetsam_reason_code = JETSAM_REASON_MEMORY_VMCOMPRESSOR_SPACE_SHORTAGE;
3890 break;
3891 case kMemorystatusKilledZoneMapExhaustion:
3892 jetsam_reason_code = JETSAM_REASON_ZONE_MAP_EXHAUSTION;
3893 break;
3894 case kMemorystatusKilledVMPageShortage:
3895 /* falls through */
3896 default:
3897 jetsam_reason_code = JETSAM_REASON_MEMORY_VMPAGESHORTAGE;
3898 cause = kMemorystatusKilledVMPageShortage;
3899 break;
3900 }
3901
3902 /* Highwater */
3903 boolean_t is_critical = TRUE;
3904 if (memorystatus_act_on_hiwat_processes(&errors, &hwm_kill, &post_snapshot, &is_critical, &memory_reclaimed)) {
3905 total_memory_reclaimed += memory_reclaimed;
3906 if (is_critical == FALSE) {
3907 /*
3908 * For now, don't kill any other processes.
3909 */
3910 break;
3911 } else {
3912 goto done;
3913 }
3914 }
3915
3916 jetsam_reason = os_reason_create(OS_REASON_JETSAM, jetsam_reason_code);
3917 if (jetsam_reason == OS_REASON_NULL) {
3918 printf("memorystatus_thread: failed to allocate jetsam reason\n");
3919 }
3920
3921 /* Only unlimited jetsam threads should act aggressive */
3922 if (!jetsam_thread->limit_to_low_bands &&
3923 memorystatus_act_aggressive(cause, jetsam_reason, &jld_idle_kills, &corpse_list_purged, &post_snapshot, &memory_reclaimed)) {
3924 total_memory_reclaimed += memory_reclaimed;
3925 goto done;
3926 }
3927
3928 /*
3929 * memorystatus_kill_top_process() drops a reference,
3930 * so take another one so we can continue to use this exit reason
3931 * even after it returns
3932 */
3933 os_reason_ref(jetsam_reason);
3934
3935 /* LRU */
3936 killed = memorystatus_kill_top_process(TRUE, sort_flag, cause, jetsam_reason, &priority, &errors, &memory_reclaimed);
3937 sort_flag = FALSE;
3938
3939 if (killed) {
3940 total_memory_reclaimed += memory_reclaimed;
3941 if (memorystatus_post_snapshot(priority, cause) == TRUE) {
3942 post_snapshot = TRUE;
3943 }
3944
3945 /* Jetsam Loop Detection */
3946 if (memorystatus_jld_enabled == TRUE) {
3947 if ((priority == JETSAM_PRIORITY_IDLE) || (priority == system_procs_aging_band) || (priority == applications_aging_band)) {
3948 jld_idle_kills++;
3949 } else {
3950 /*
3951 * We've reached into bands beyond idle deferred.
3952 * We make no attempt to monitor them
3953 */
3954 }
3955 }
3956
3957 /*
3958 * If we have jetsammed a process in or above JETSAM_PRIORITY_UI_SUPPORT
3959 * then we attempt to relieve pressure by purging corpse memory and notifying
3960 * anybody wanting to know this.
3961 */
3962 if (priority >= JETSAM_PRIORITY_UI_SUPPORT) {
3963 memorystatus_issue_fg_band_notify();
3964 if (total_corpses_count() > 0 && !corpse_list_purged) {
3965 task_purge_all_corpses();
3966 corpse_list_purged = TRUE;
3967 }
3968 }
3969 goto done;
3970 }
3971
3972 if (memorystatus_avail_pages_below_critical()) {
3973 /*
3974 * Still under pressure and unable to kill a process - purge corpse memory
3975 */
3976 if (total_corpses_count() > 0) {
3977 task_purge_all_corpses();
3978 corpse_list_purged = TRUE;
3979 }
3980
3981 if (!jetsam_thread->limit_to_low_bands && memorystatus_avail_pages_below_critical()) {
3982 /*
3983 * Still under pressure and unable to kill a process - panic
3984 */
3985 panic("memorystatus_jetsam_thread: no victim! available pages:%llu\n", (uint64_t)memorystatus_available_pages);
3986 }
3987 }
3988
3989 done:
3990
3991 /*
3992 * We do not want to over-kill when thrashing has been detected.
3993 * To avoid that, we reset the flag here and notify the
3994 * compressor.
3995 */
3996 if (is_reason_thrashing(kill_under_pressure_cause)) {
3997 kill_under_pressure_cause = 0;
3998 #if CONFIG_JETSAM
3999 vm_thrashing_jetsam_done();
4000 #endif /* CONFIG_JETSAM */
4001 } else if (is_reason_zone_map_exhaustion(kill_under_pressure_cause)) {
4002 kill_under_pressure_cause = 0;
4003 }
4004
4005 os_reason_free(jetsam_reason);
4006 }
4007
4008 kill_under_pressure_cause = 0;
4009
4010 if (errors) {
4011 memorystatus_clear_errors();
4012 }
4013
4014 if (post_snapshot) {
4015 proc_list_lock();
4016 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
4017 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
4018 uint64_t timestamp_now = mach_absolute_time();
4019 memorystatus_jetsam_snapshot->notification_time = timestamp_now;
4020 memorystatus_jetsam_snapshot->js_gencount++;
4021 if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
4022 timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
4023 proc_list_unlock();
4024 int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
4025 if (!ret) {
4026 proc_list_lock();
4027 memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
4028 proc_list_unlock();
4029 }
4030 } else {
4031 proc_list_unlock();
4032 }
4033 }
4034
4035 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END,
4036 memorystatus_available_pages, total_memory_reclaimed, 0, 0, 0);
4037
4038 memorystatus_thread_block(0, memorystatus_thread);
4039 }
4040
4041 /*
4042 * Returns TRUE:
4043 * when an idle-exitable proc was killed
4044 * Returns FALSE:
4045 * when there are no more idle-exitable procs found
4046 * when the attempt to kill an idle-exitable proc failed
4047 */
4048 boolean_t
4049 memorystatus_idle_exit_from_VM(void)
4050 {
4051 /*
4052 * This routine should no longer be needed since we are
4053 * now using jetsam bands on all platforms and so will deal
4054 * with IDLE processes within the memorystatus thread itself.
4055 *
4056 * But we still use it because we observed that macos systems
4057 * started heavy compression/swapping with a bunch of
4058 * idle-exitable processes alive and doing nothing. We decided
4059 * to rather kill those processes than start swapping earlier.
4060 */
4061
4062 return kill_idle_exit_proc();
4063 }
4064
4065 /*
4066 * Callback invoked when allowable physical memory footprint exceeded
4067 * (dirty pages + IOKit mappings)
4068 *
4069 * This is invoked for both advisory, non-fatal per-task high watermarks,
4070 * as well as the fatal task memory limits.
4071 */
4072 void
4073 memorystatus_on_ledger_footprint_exceeded(boolean_t warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal)
4074 {
4075 os_reason_t jetsam_reason = OS_REASON_NULL;
4076
4077 proc_t p = current_proc();
4078
4079 #if VM_PRESSURE_EVENTS
4080 if (warning == TRUE) {
4081 /*
4082 * This is a warning path which implies that the current process is close, but has
4083 * not yet exceeded its per-process memory limit.
4084 */
4085 if (memorystatus_warn_process(p->p_pid, memlimit_is_active, memlimit_is_fatal, FALSE /* not exceeded */) != TRUE) {
4086 /* Print warning, since it's possible that task has not registered for pressure notifications */
4087 os_log(OS_LOG_DEFAULT, "memorystatus_on_ledger_footprint_exceeded: failed to warn the current task (%d exiting, or no handler registered?).\n", p->p_pid);
4088 }
4089 return;
4090 }
4091 #endif /* VM_PRESSURE_EVENTS */
4092
4093 if (memlimit_is_fatal) {
4094 /*
4095 * If this process has no high watermark or has a fatal task limit, then we have been invoked because the task
4096 * has violated either the system-wide per-task memory limit OR its own task limit.
4097 */
4098 jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_PERPROCESSLIMIT);
4099 if (jetsam_reason == NULL) {
4100 printf("task_exceeded footprint: failed to allocate jetsam reason\n");
4101 } else if (corpse_for_fatal_memkill != 0 && proc_send_synchronous_EXC_RESOURCE(p) == FALSE) {
4102 /* Set OS_REASON_FLAG_GENERATE_CRASH_REPORT to generate corpse */
4103 jetsam_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
4104 }
4105
4106 if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit, jetsam_reason) != TRUE) {
4107 printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n");
4108 }
4109 } else {
4110 /*
4111 * HWM offender exists. Done without locks or synchronization.
4112 * See comment near its declaration for more details.
4113 */
4114 memorystatus_hwm_candidates = TRUE;
4115
4116 #if VM_PRESSURE_EVENTS
4117 /*
4118 * The current process is not in the warning path.
4119 * This path implies the current process has exceeded a non-fatal (soft) memory limit.
4120 * Failure to send note is ignored here.
4121 */
4122 (void)memorystatus_warn_process(p->p_pid, memlimit_is_active, memlimit_is_fatal, TRUE /* exceeded */);
4123
4124 #endif /* VM_PRESSURE_EVENTS */
4125 }
4126 }
4127
4128 void
4129 memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal)
4130 {
4131 proc_t p = current_proc();
4132
4133 /*
4134 * The limit violation is logged here, but only once per process per limit.
4135 * Soft memory limit is a non-fatal high-water-mark
4136 * Hard memory limit is a fatal custom-task-limit or system-wide per-task memory limit.
4137 */
4138
4139 os_log_with_startup_serial(OS_LOG_DEFAULT, "EXC_RESOURCE -> %s[%d] exceeded mem limit: %s%s %d MB (%s)\n",
4140 ((p && *p->p_name) ? p->p_name : "unknown"), (p ? p->p_pid : -1), (memlimit_is_active ? "Active" : "Inactive"),
4141 (memlimit_is_fatal ? "Hard" : "Soft"), max_footprint_mb,
4142 (memlimit_is_fatal ? "fatal" : "non-fatal"));
4143
4144 return;
4145 }
4146
4147
4148 /*
4149 * Description:
4150 * Evaluates process state to determine which limit
4151 * should be applied (active vs. inactive limit).
4152 *
4153 * Processes that have the 'elevated inactive jetsam band' attribute
4154 * are first evaluated based on their current priority band.
4155 * presently elevated ==> active
4156 *
4157 * Processes that opt into dirty tracking are evaluated
4158 * based on clean vs dirty state.
4159 * dirty ==> active
4160 * clean ==> inactive
4161 *
4162 * Process that do not opt into dirty tracking are
4163 * evalulated based on priority level.
4164 * Foreground or above ==> active
4165 * Below Foreground ==> inactive
4166 *
4167 * Return: TRUE if active
4168 * False if inactive
4169 */
4170
4171 static boolean_t
4172 proc_jetsam_state_is_active_locked(proc_t p)
4173 {
4174 if ((p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) &&
4175 (p->p_memstat_effectivepriority == JETSAM_PRIORITY_ELEVATED_INACTIVE)) {
4176 /*
4177 * process has the 'elevated inactive jetsam band' attribute
4178 * and process is present in the elevated band
4179 * implies active state
4180 */
4181 return TRUE;
4182 } else if (p->p_memstat_dirty & P_DIRTY_TRACK) {
4183 /*
4184 * process has opted into dirty tracking
4185 * active state is based on dirty vs. clean
4186 */
4187 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
4188 /*
4189 * process is dirty
4190 * implies active state
4191 */
4192 return TRUE;
4193 } else {
4194 /*
4195 * process is clean
4196 * implies inactive state
4197 */
4198 return FALSE;
4199 }
4200 } else if (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND) {
4201 /*
4202 * process is Foreground or higher
4203 * implies active state
4204 */
4205 return TRUE;
4206 } else {
4207 /*
4208 * process found below Foreground
4209 * implies inactive state
4210 */
4211 return FALSE;
4212 }
4213 }
4214
4215 static boolean_t
4216 memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause, os_reason_t jetsam_reason)
4217 {
4218 boolean_t res;
4219
4220 uint32_t errors = 0;
4221 uint64_t memory_reclaimed = 0;
4222
4223 if (victim_pid == -1) {
4224 /* No pid, so kill first process */
4225 res = memorystatus_kill_top_process(TRUE, TRUE, cause, jetsam_reason, NULL, &errors, &memory_reclaimed);
4226 } else {
4227 res = memorystatus_kill_specific_process(victim_pid, cause, jetsam_reason);
4228 }
4229
4230 if (errors) {
4231 memorystatus_clear_errors();
4232 }
4233
4234 if (res == TRUE) {
4235 /* Fire off snapshot notification */
4236 proc_list_lock();
4237 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
4238 sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count;
4239 uint64_t timestamp_now = mach_absolute_time();
4240 memorystatus_jetsam_snapshot->notification_time = timestamp_now;
4241 if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
4242 timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
4243 proc_list_unlock();
4244 int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
4245 if (!ret) {
4246 proc_list_lock();
4247 memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
4248 proc_list_unlock();
4249 }
4250 } else {
4251 proc_list_unlock();
4252 }
4253 }
4254
4255 return res;
4256 }
4257
4258 /*
4259 * Jetsam a specific process.
4260 */
4261 static boolean_t
4262 memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause, os_reason_t jetsam_reason)
4263 {
4264 boolean_t killed;
4265 proc_t p;
4266 uint64_t killtime = 0;
4267 uint64_t footprint_of_killed_proc;
4268 clock_sec_t tv_sec;
4269 clock_usec_t tv_usec;
4270 uint32_t tv_msec;
4271
4272 /* TODO - add a victim queue and push this into the main jetsam thread */
4273
4274 p = proc_find(victim_pid);
4275 if (!p) {
4276 os_reason_free(jetsam_reason);
4277 return FALSE;
4278 }
4279
4280 proc_list_lock();
4281
4282 if (memorystatus_jetsam_snapshot_count == 0) {
4283 memorystatus_init_jetsam_snapshot_locked(NULL, 0);
4284 }
4285
4286 killtime = mach_absolute_time();
4287 absolutetime_to_microtime(killtime, &tv_sec, &tv_usec);
4288 tv_msec = tv_usec / 1000;
4289
4290 memorystatus_update_jetsam_snapshot_entry_locked(p, cause, killtime);
4291
4292 proc_list_unlock();
4293
4294 killed = memorystatus_do_kill(p, cause, jetsam_reason, &footprint_of_killed_proc);
4295
4296 os_log_with_startup_serial(OS_LOG_DEFAULT, "%lu.%03d memorystatus: killing_specific_process pid %d [%s] (%s %d) %lluKB - memorystatus_available_pages: %llu\n",
4297 (unsigned long)tv_sec, tv_msec, victim_pid, ((p && *p->p_name) ? p->p_name : "unknown"),
4298 memorystatus_kill_cause_name[cause], (p ? p->p_memstat_effectivepriority: -1),
4299 footprint_of_killed_proc >> 10, (uint64_t)memorystatus_available_pages);
4300
4301 proc_rele(p);
4302
4303 return killed;
4304 }
4305
4306
4307 /*
4308 * Toggle the P_MEMSTAT_TERMINATED state.
4309 * Takes the proc_list_lock.
4310 */
4311 void
4312 proc_memstat_terminated(proc_t p, boolean_t set)
4313 {
4314 #if DEVELOPMENT || DEBUG
4315 if (p) {
4316 proc_list_lock();
4317 if (set == TRUE) {
4318 p->p_memstat_state |= P_MEMSTAT_TERMINATED;
4319 } else {
4320 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
4321 }
4322 proc_list_unlock();
4323 }
4324 #else
4325 #pragma unused(p, set)
4326 /*
4327 * do nothing
4328 */
4329 #endif /* DEVELOPMENT || DEBUG */
4330 return;
4331 }
4332
4333
4334 #if CONFIG_JETSAM
4335 /*
4336 * This is invoked when cpulimits have been exceeded while in fatal mode.
4337 * The jetsam_flags do not apply as those are for memory related kills.
4338 * We call this routine so that the offending process is killed with
4339 * a non-zero exit status.
4340 */
4341 void
4342 jetsam_on_ledger_cpulimit_exceeded(void)
4343 {
4344 int retval = 0;
4345 int jetsam_flags = 0; /* make it obvious */
4346 proc_t p = current_proc();
4347 os_reason_t jetsam_reason = OS_REASON_NULL;
4348
4349 printf("task_exceeded_cpulimit: killing pid %d [%s]\n",
4350 p->p_pid, (*p->p_name ? p->p_name : "(unknown)"));
4351
4352 jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_CPULIMIT);
4353 if (jetsam_reason == OS_REASON_NULL) {
4354 printf("task_exceeded_cpulimit: unable to allocate memory for jetsam reason\n");
4355 }
4356
4357 retval = jetsam_do_kill(p, jetsam_flags, jetsam_reason);
4358
4359 if (retval) {
4360 printf("task_exceeded_cpulimit: failed to kill current task (exiting?).\n");
4361 }
4362 }
4363
4364 #endif /* CONFIG_JETSAM */
4365
4366 static void
4367 memorystatus_get_task_memory_region_count(task_t task, uint64_t *count)
4368 {
4369 assert(task);
4370 assert(count);
4371
4372 *count = get_task_memory_region_count(task);
4373 }
4374
4375
4376 #define MEMORYSTATUS_VM_MAP_FORK_ALLOWED 0x100000000
4377 #define MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED 0x200000000
4378
4379 #if DEVELOPMENT || DEBUG
4380
4381 /*
4382 * Sysctl only used to test memorystatus_allowed_vm_map_fork() path.
4383 * set a new pidwatch value
4384 * or
4385 * get the current pidwatch value
4386 *
4387 * The pidwatch_val starts out with a PID to watch for in the map_fork path.
4388 * Its value is:
4389 * - OR'd with MEMORYSTATUS_VM_MAP_FORK_ALLOWED if we allow the map_fork.
4390 * - OR'd with MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED if we disallow the map_fork.
4391 * - set to -1ull if the map_fork() is aborted for other reasons.
4392 */
4393
4394 uint64_t memorystatus_vm_map_fork_pidwatch_val = 0;
4395
4396 static int sysctl_memorystatus_vm_map_fork_pidwatch SYSCTL_HANDLER_ARGS {
4397 #pragma unused(oidp, arg1, arg2)
4398
4399 uint64_t new_value = 0;
4400 uint64_t old_value = 0;
4401 int error = 0;
4402
4403 /*
4404 * The pid is held in the low 32 bits.
4405 * The 'allowed' flags are in the upper 32 bits.
4406 */
4407 old_value = memorystatus_vm_map_fork_pidwatch_val;
4408
4409 error = sysctl_io_number(req, old_value, sizeof(old_value), &new_value, NULL);
4410
4411 if (error || !req->newptr) {
4412 /*
4413 * No new value passed in.
4414 */
4415 return error;
4416 }
4417
4418 /*
4419 * A new pid was passed in via req->newptr.
4420 * Ignore any attempt to set the higher order bits.
4421 */
4422 memorystatus_vm_map_fork_pidwatch_val = new_value & 0xFFFFFFFF;
4423 printf("memorystatus: pidwatch old_value = 0x%llx, new_value = 0x%llx \n", old_value, new_value);
4424
4425 return error;
4426 }
4427
4428 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_map_fork_pidwatch, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED,
4429 0, 0, sysctl_memorystatus_vm_map_fork_pidwatch, "Q", "get/set pid watched for in vm_map_fork");
4430
4431
4432 /*
4433 * Record if a watched process fails to qualify for a vm_map_fork().
4434 */
4435 void
4436 memorystatus_abort_vm_map_fork(task_t task)
4437 {
4438 if (memorystatus_vm_map_fork_pidwatch_val != 0) {
4439 proc_t p = get_bsdtask_info(task);
4440 if (p != NULL && memorystatus_vm_map_fork_pidwatch_val == (uint64_t)p->p_pid) {
4441 memorystatus_vm_map_fork_pidwatch_val = -1ull;
4442 }
4443 }
4444 }
4445
4446 static void
4447 set_vm_map_fork_pidwatch(task_t task, uint64_t x)
4448 {
4449 if (memorystatus_vm_map_fork_pidwatch_val != 0) {
4450 proc_t p = get_bsdtask_info(task);
4451 if (p && (memorystatus_vm_map_fork_pidwatch_val == (uint64_t)p->p_pid)) {
4452 memorystatus_vm_map_fork_pidwatch_val |= x;
4453 }
4454 }
4455 }
4456
4457 #else /* DEVELOPMENT || DEBUG */
4458
4459
4460 static void
4461 set_vm_map_fork_pidwatch(task_t task, uint64_t x)
4462 {
4463 #pragma unused(task)
4464 #pragma unused(x)
4465 }
4466
4467 #endif /* DEVELOPMENT || DEBUG */
4468
4469 /*
4470 * Called during EXC_RESOURCE handling when a process exceeds a soft
4471 * memory limit. This is the corpse fork path and here we decide if
4472 * vm_map_fork will be allowed when creating the corpse.
4473 * The task being considered is suspended.
4474 *
4475 * By default, a vm_map_fork is allowed to proceed.
4476 *
4477 * A few simple policy assumptions:
4478 * Desktop platform is not considered in this path.
4479 * The vm_map_fork is always allowed.
4480 *
4481 * If the device has a zero system-wide task limit,
4482 * then the vm_map_fork is allowed.
4483 *
4484 * And if a process's memory footprint calculates less
4485 * than or equal to half of the system-wide task limit,
4486 * then the vm_map_fork is allowed. This calculation
4487 * is based on the assumption that a process can
4488 * munch memory up to the system-wide task limit.
4489 */
4490 boolean_t
4491 memorystatus_allowed_vm_map_fork(task_t task)
4492 {
4493 boolean_t is_allowed = TRUE; /* default */
4494
4495 #if CONFIG_EMBEDDED
4496
4497 uint64_t footprint_in_bytes;
4498 uint64_t max_allowed_bytes;
4499
4500 if (max_task_footprint_mb == 0) {
4501 set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_ALLOWED);
4502 return is_allowed;
4503 }
4504
4505 footprint_in_bytes = get_task_phys_footprint(task);
4506
4507 /*
4508 * Maximum is 1/4 of the system-wide task limit.
4509 */
4510 max_allowed_bytes = ((uint64_t)max_task_footprint_mb * 1024 * 1024) >> 2;
4511
4512 if (footprint_in_bytes > max_allowed_bytes) {
4513 printf("memorystatus disallowed vm_map_fork %lld %lld\n", footprint_in_bytes, max_allowed_bytes);
4514 set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED);
4515 return !is_allowed;
4516 }
4517 #endif /* CONFIG_EMBEDDED */
4518
4519 set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_ALLOWED);
4520 return is_allowed;
4521 }
4522
4523 void
4524 memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages)
4525 {
4526 assert(task);
4527 assert(footprint);
4528
4529 uint64_t pages;
4530
4531 pages = (get_task_phys_footprint(task) / PAGE_SIZE_64);
4532 assert(((uint32_t)pages) == pages);
4533 *footprint = (uint32_t)pages;
4534
4535 if (max_footprint_lifetime) {
4536 pages = (get_task_phys_footprint_lifetime_max(task) / PAGE_SIZE_64);
4537 assert(((uint32_t)pages) == pages);
4538 *max_footprint_lifetime = (uint32_t)pages;
4539 }
4540 if (purgeable_pages) {
4541 pages = (get_task_purgeable_size(task) / PAGE_SIZE_64);
4542 assert(((uint32_t)pages) == pages);
4543 *purgeable_pages = (uint32_t)pages;
4544 }
4545 }
4546
4547 static void
4548 memorystatus_get_task_phys_footprint_page_counts(task_t task,
4549 uint64_t *internal_pages, uint64_t *internal_compressed_pages,
4550 uint64_t *purgeable_nonvolatile_pages, uint64_t *purgeable_nonvolatile_compressed_pages,
4551 uint64_t *alternate_accounting_pages, uint64_t *alternate_accounting_compressed_pages,
4552 uint64_t *iokit_mapped_pages, uint64_t *page_table_pages)
4553 {
4554 assert(task);
4555
4556 if (internal_pages) {
4557 *internal_pages = (get_task_internal(task) / PAGE_SIZE_64);
4558 }
4559
4560 if (internal_compressed_pages) {
4561 *internal_compressed_pages = (get_task_internal_compressed(task) / PAGE_SIZE_64);
4562 }
4563
4564 if (purgeable_nonvolatile_pages) {
4565 *purgeable_nonvolatile_pages = (get_task_purgeable_nonvolatile(task) / PAGE_SIZE_64);
4566 }
4567
4568 if (purgeable_nonvolatile_compressed_pages) {
4569 *purgeable_nonvolatile_compressed_pages = (get_task_purgeable_nonvolatile_compressed(task) / PAGE_SIZE_64);
4570 }
4571
4572 if (alternate_accounting_pages) {
4573 *alternate_accounting_pages = (get_task_alternate_accounting(task) / PAGE_SIZE_64);
4574 }
4575
4576 if (alternate_accounting_compressed_pages) {
4577 *alternate_accounting_compressed_pages = (get_task_alternate_accounting_compressed(task) / PAGE_SIZE_64);
4578 }
4579
4580 if (iokit_mapped_pages) {
4581 *iokit_mapped_pages = (get_task_iokit_mapped(task) / PAGE_SIZE_64);
4582 }
4583
4584 if (page_table_pages) {
4585 *page_table_pages = (get_task_page_table(task) / PAGE_SIZE_64);
4586 }
4587 }
4588
4589 /*
4590 * This routine only acts on the global jetsam event snapshot.
4591 * Updating the process's entry can race when the memorystatus_thread
4592 * has chosen to kill a process that is racing to exit on another core.
4593 */
4594 static void
4595 memorystatus_update_jetsam_snapshot_entry_locked(proc_t p, uint32_t kill_cause, uint64_t killtime)
4596 {
4597 memorystatus_jetsam_snapshot_entry_t *entry = NULL;
4598 memorystatus_jetsam_snapshot_t *snapshot = NULL;
4599 memorystatus_jetsam_snapshot_entry_t *snapshot_list = NULL;
4600
4601 unsigned int i;
4602
4603 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
4604
4605 if (memorystatus_jetsam_snapshot_count == 0) {
4606 /*
4607 * No active snapshot.
4608 * Nothing to do.
4609 */
4610 return;
4611 }
4612
4613 /*
4614 * Sanity check as this routine should only be called
4615 * from a jetsam kill path.
4616 */
4617 assert(kill_cause != 0 && killtime != 0);
4618
4619 snapshot = memorystatus_jetsam_snapshot;
4620 snapshot_list = memorystatus_jetsam_snapshot->entries;
4621
4622 for (i = 0; i < memorystatus_jetsam_snapshot_count; i++) {
4623 if (snapshot_list[i].pid == p->p_pid) {
4624 entry = &snapshot_list[i];
4625
4626 if (entry->killed || entry->jse_killtime) {
4627 /*
4628 * We apparently raced on the exit path
4629 * for this process, as it's snapshot entry
4630 * has already recorded a kill.
4631 */
4632 assert(entry->killed && entry->jse_killtime);
4633 break;
4634 }
4635
4636 /*
4637 * Update the entry we just found in the snapshot.
4638 */
4639
4640 entry->killed = kill_cause;
4641 entry->jse_killtime = killtime;
4642 entry->jse_gencount = snapshot->js_gencount;
4643 entry->jse_idle_delta = p->p_memstat_idle_delta;
4644 #if CONFIG_FREEZE
4645 entry->jse_thaw_count = p->p_memstat_thaw_count;
4646 #else /* CONFIG_FREEZE */
4647 entry->jse_thaw_count = 0;
4648 #endif /* CONFIG_FREEZE */
4649
4650 /*
4651 * If a process has moved between bands since snapshot was
4652 * initialized, then likely these fields changed too.
4653 */
4654 if (entry->priority != p->p_memstat_effectivepriority) {
4655 strlcpy(entry->name, p->p_name, sizeof(entry->name));
4656 entry->priority = p->p_memstat_effectivepriority;
4657 entry->state = memorystatus_build_state(p);
4658 entry->user_data = p->p_memstat_userdata;
4659 entry->fds = p->p_fd->fd_nfiles;
4660 }
4661
4662 /*
4663 * Always update the page counts on a kill.
4664 */
4665
4666 uint32_t pages = 0;
4667 uint32_t max_pages_lifetime = 0;
4668 uint32_t purgeable_pages = 0;
4669
4670 memorystatus_get_task_page_counts(p->task, &pages, &max_pages_lifetime, &purgeable_pages);
4671 entry->pages = (uint64_t)pages;
4672 entry->max_pages_lifetime = (uint64_t)max_pages_lifetime;
4673 entry->purgeable_pages = (uint64_t)purgeable_pages;
4674
4675 uint64_t internal_pages = 0;
4676 uint64_t internal_compressed_pages = 0;
4677 uint64_t purgeable_nonvolatile_pages = 0;
4678 uint64_t purgeable_nonvolatile_compressed_pages = 0;
4679 uint64_t alternate_accounting_pages = 0;
4680 uint64_t alternate_accounting_compressed_pages = 0;
4681 uint64_t iokit_mapped_pages = 0;
4682 uint64_t page_table_pages = 0;
4683
4684 memorystatus_get_task_phys_footprint_page_counts(p->task, &internal_pages, &internal_compressed_pages,
4685 &purgeable_nonvolatile_pages, &purgeable_nonvolatile_compressed_pages,
4686 &alternate_accounting_pages, &alternate_accounting_compressed_pages,
4687 &iokit_mapped_pages, &page_table_pages);
4688
4689 entry->jse_internal_pages = internal_pages;
4690 entry->jse_internal_compressed_pages = internal_compressed_pages;
4691 entry->jse_purgeable_nonvolatile_pages = purgeable_nonvolatile_pages;
4692 entry->jse_purgeable_nonvolatile_compressed_pages = purgeable_nonvolatile_compressed_pages;
4693 entry->jse_alternate_accounting_pages = alternate_accounting_pages;
4694 entry->jse_alternate_accounting_compressed_pages = alternate_accounting_compressed_pages;
4695 entry->jse_iokit_mapped_pages = iokit_mapped_pages;
4696 entry->jse_page_table_pages = page_table_pages;
4697
4698 uint64_t region_count = 0;
4699 memorystatus_get_task_memory_region_count(p->task, &region_count);
4700 entry->jse_memory_region_count = region_count;
4701
4702 goto exit;
4703 }
4704 }
4705
4706 if (entry == NULL) {
4707 /*
4708 * The entry was not found in the snapshot, so the process must have
4709 * launched after the snapshot was initialized.
4710 * Let's try to append the new entry.
4711 */
4712 if (memorystatus_jetsam_snapshot_count < memorystatus_jetsam_snapshot_max) {
4713 /*
4714 * A populated snapshot buffer exists
4715 * and there is room to init a new entry.
4716 */
4717 assert(memorystatus_jetsam_snapshot_count == snapshot->entry_count);
4718
4719 unsigned int next = memorystatus_jetsam_snapshot_count;
4720
4721 if (memorystatus_init_jetsam_snapshot_entry_locked(p, &snapshot_list[next], (snapshot->js_gencount)) == TRUE) {
4722 entry = &snapshot_list[next];
4723 entry->killed = kill_cause;
4724 entry->jse_killtime = killtime;
4725
4726 snapshot->entry_count = ++next;
4727 memorystatus_jetsam_snapshot_count = next;
4728
4729 if (memorystatus_jetsam_snapshot_count >= memorystatus_jetsam_snapshot_max) {
4730 /*
4731 * We just used the last slot in the snapshot buffer.
4732 * We only want to log it once... so we do it here
4733 * when we notice we've hit the max.
4734 */
4735 printf("memorystatus: WARNING snapshot buffer is full, count %d\n",
4736 memorystatus_jetsam_snapshot_count);
4737 }
4738 }
4739 }
4740 }
4741
4742 exit:
4743 if (entry == NULL) {
4744 /*
4745 * If we reach here, the snapshot buffer could not be updated.
4746 * Most likely, the buffer is full, in which case we would have
4747 * logged a warning in the previous call.
4748 *
4749 * For now, we will stop appending snapshot entries.
4750 * When the buffer is consumed, the snapshot state will reset.
4751 */
4752
4753 MEMORYSTATUS_DEBUG(4, "memorystatus_update_jetsam_snapshot_entry_locked: failed to update pid %d, priority %d, count %d\n",
4754 p->p_pid, p->p_memstat_effectivepriority, memorystatus_jetsam_snapshot_count);
4755 }
4756
4757 return;
4758 }
4759
4760 #if CONFIG_JETSAM
4761 void
4762 memorystatus_pages_update(unsigned int pages_avail)
4763 {
4764 memorystatus_available_pages = pages_avail;
4765
4766 #if VM_PRESSURE_EVENTS
4767 /*
4768 * Since memorystatus_available_pages changes, we should
4769 * re-evaluate the pressure levels on the system and
4770 * check if we need to wake the pressure thread.
4771 * We also update memorystatus_level in that routine.
4772 */
4773 vm_pressure_response();
4774
4775 if (memorystatus_available_pages <= memorystatus_available_pages_pressure) {
4776 if (memorystatus_hwm_candidates || (memorystatus_available_pages <= memorystatus_available_pages_critical)) {
4777 memorystatus_thread_wake();
4778 }
4779 }
4780 #if CONFIG_FREEZE
4781 /*
4782 * We can't grab the freezer_mutex here even though that synchronization would be correct to inspect
4783 * the # of frozen processes and wakeup the freezer thread. Reason being that we come here into this
4784 * code with (possibly) the page-queue locks held and preemption disabled. So trying to grab a mutex here
4785 * will result in the "mutex with preemption disabled" panic.
4786 */
4787
4788 if (memorystatus_freeze_thread_should_run() == TRUE) {
4789 /*
4790 * The freezer thread is usually woken up by some user-space call i.e. pid_hibernate(any process).
4791 * That trigger isn't invoked often enough and so we are enabling this explicit wakeup here.
4792 */
4793 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
4794 thread_wakeup((event_t)&memorystatus_freeze_wakeup);
4795 }
4796 }
4797 #endif /* CONFIG_FREEZE */
4798
4799 #else /* VM_PRESSURE_EVENTS */
4800
4801 boolean_t critical, delta;
4802
4803 if (!memorystatus_delta) {
4804 return;
4805 }
4806
4807 critical = (pages_avail < memorystatus_available_pages_critical) ? TRUE : FALSE;
4808 delta = ((pages_avail >= (memorystatus_available_pages + memorystatus_delta))
4809 || (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) ? TRUE : FALSE;
4810
4811 if (critical || delta) {
4812 unsigned int total_pages;
4813
4814 total_pages = (unsigned int) atop_64(max_mem);
4815 #if CONFIG_SECLUDED_MEMORY
4816 total_pages -= vm_page_secluded_count;
4817 #endif /* CONFIG_SECLUDED_MEMORY */
4818 memorystatus_level = memorystatus_available_pages * 100 / total_pages;
4819 memorystatus_thread_wake();
4820 }
4821 #endif /* VM_PRESSURE_EVENTS */
4822 }
4823 #endif /* CONFIG_JETSAM */
4824
4825 static boolean_t
4826 memorystatus_init_jetsam_snapshot_entry_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry, uint64_t gencount)
4827 {
4828 clock_sec_t tv_sec;
4829 clock_usec_t tv_usec;
4830 uint32_t pages = 0;
4831 uint32_t max_pages_lifetime = 0;
4832 uint32_t purgeable_pages = 0;
4833 uint64_t internal_pages = 0;
4834 uint64_t internal_compressed_pages = 0;
4835 uint64_t purgeable_nonvolatile_pages = 0;
4836 uint64_t purgeable_nonvolatile_compressed_pages = 0;
4837 uint64_t alternate_accounting_pages = 0;
4838 uint64_t alternate_accounting_compressed_pages = 0;
4839 uint64_t iokit_mapped_pages = 0;
4840 uint64_t page_table_pages = 0;
4841 uint64_t region_count = 0;
4842 uint64_t cids[COALITION_NUM_TYPES];
4843
4844 memset(entry, 0, sizeof(memorystatus_jetsam_snapshot_entry_t));
4845
4846 entry->pid = p->p_pid;
4847 strlcpy(&entry->name[0], p->p_name, sizeof(entry->name));
4848 entry->priority = p->p_memstat_effectivepriority;
4849
4850 memorystatus_get_task_page_counts(p->task, &pages, &max_pages_lifetime, &purgeable_pages);
4851 entry->pages = (uint64_t)pages;
4852 entry->max_pages_lifetime = (uint64_t)max_pages_lifetime;
4853 entry->purgeable_pages = (uint64_t)purgeable_pages;
4854
4855 memorystatus_get_task_phys_footprint_page_counts(p->task, &internal_pages, &internal_compressed_pages,
4856 &purgeable_nonvolatile_pages, &purgeable_nonvolatile_compressed_pages,
4857 &alternate_accounting_pages, &alternate_accounting_compressed_pages,
4858 &iokit_mapped_pages, &page_table_pages);
4859
4860 entry->jse_internal_pages = internal_pages;
4861 entry->jse_internal_compressed_pages = internal_compressed_pages;
4862 entry->jse_purgeable_nonvolatile_pages = purgeable_nonvolatile_pages;
4863 entry->jse_purgeable_nonvolatile_compressed_pages = purgeable_nonvolatile_compressed_pages;
4864 entry->jse_alternate_accounting_pages = alternate_accounting_pages;
4865 entry->jse_alternate_accounting_compressed_pages = alternate_accounting_compressed_pages;
4866 entry->jse_iokit_mapped_pages = iokit_mapped_pages;
4867 entry->jse_page_table_pages = page_table_pages;
4868
4869 memorystatus_get_task_memory_region_count(p->task, &region_count);
4870 entry->jse_memory_region_count = region_count;
4871
4872 entry->state = memorystatus_build_state(p);
4873 entry->user_data = p->p_memstat_userdata;
4874 memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid));
4875 entry->fds = p->p_fd->fd_nfiles;
4876
4877 absolutetime_to_microtime(get_task_cpu_time(p->task), &tv_sec, &tv_usec);
4878 entry->cpu_time.tv_sec = (int64_t)tv_sec;
4879 entry->cpu_time.tv_usec = (int64_t)tv_usec;
4880
4881 assert(p->p_stats != NULL);
4882 entry->jse_starttime = p->p_stats->ps_start; /* abstime process started */
4883 entry->jse_killtime = 0; /* abstime jetsam chose to kill process */
4884 entry->killed = 0; /* the jetsam kill cause */
4885 entry->jse_gencount = gencount; /* indicates a pass through jetsam thread, when process was targeted to be killed */
4886
4887 entry->jse_idle_delta = p->p_memstat_idle_delta; /* Most recent timespan spent in idle-band */
4888
4889 #if CONFIG_FREEZE
4890 entry->jse_thaw_count = p->p_memstat_thaw_count;
4891 #else /* CONFIG_FREEZE */
4892 entry->jse_thaw_count = 0;
4893 #endif /* CONFIG_FREEZE */
4894
4895 proc_coalitionids(p, cids);
4896 entry->jse_coalition_jetsam_id = cids[COALITION_TYPE_JETSAM];
4897
4898 return TRUE;
4899 }
4900
4901 static void
4902 memorystatus_init_snapshot_vmstats(memorystatus_jetsam_snapshot_t *snapshot)
4903 {
4904 kern_return_t kr = KERN_SUCCESS;
4905 mach_msg_type_number_t count = HOST_VM_INFO64_COUNT;
4906 vm_statistics64_data_t vm_stat;
4907
4908 if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count)) != KERN_SUCCESS) {
4909 printf("memorystatus_init_jetsam_snapshot_stats: host_statistics64 failed with %d\n", kr);
4910 memset(&snapshot->stats, 0, sizeof(snapshot->stats));
4911 } else {
4912 snapshot->stats.free_pages = vm_stat.free_count;
4913 snapshot->stats.active_pages = vm_stat.active_count;
4914 snapshot->stats.inactive_pages = vm_stat.inactive_count;
4915 snapshot->stats.throttled_pages = vm_stat.throttled_count;
4916 snapshot->stats.purgeable_pages = vm_stat.purgeable_count;
4917 snapshot->stats.wired_pages = vm_stat.wire_count;
4918
4919 snapshot->stats.speculative_pages = vm_stat.speculative_count;
4920 snapshot->stats.filebacked_pages = vm_stat.external_page_count;
4921 snapshot->stats.anonymous_pages = vm_stat.internal_page_count;
4922 snapshot->stats.compressions = vm_stat.compressions;
4923 snapshot->stats.decompressions = vm_stat.decompressions;
4924 snapshot->stats.compressor_pages = vm_stat.compressor_page_count;
4925 snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor;
4926 }
4927
4928 get_zone_map_size(&snapshot->stats.zone_map_size, &snapshot->stats.zone_map_capacity);
4929
4930 bzero(snapshot->stats.largest_zone_name, sizeof(snapshot->stats.largest_zone_name));
4931 get_largest_zone_info(snapshot->stats.largest_zone_name, sizeof(snapshot->stats.largest_zone_name),
4932 &snapshot->stats.largest_zone_size);
4933 }
4934
4935 /*
4936 * Collect vm statistics at boot.
4937 * Called only once (see kern_exec.c)
4938 * Data can be consumed at any time.
4939 */
4940 void
4941 memorystatus_init_at_boot_snapshot()
4942 {
4943 memorystatus_init_snapshot_vmstats(&memorystatus_at_boot_snapshot);
4944 memorystatus_at_boot_snapshot.entry_count = 0;
4945 memorystatus_at_boot_snapshot.notification_time = 0; /* updated when consumed */
4946 memorystatus_at_boot_snapshot.snapshot_time = mach_absolute_time();
4947 }
4948
4949 static void
4950 memorystatus_init_jetsam_snapshot_locked(memorystatus_jetsam_snapshot_t *od_snapshot, uint32_t ods_list_count )
4951 {
4952 proc_t p, next_p;
4953 unsigned int b = 0, i = 0;
4954
4955 memorystatus_jetsam_snapshot_t *snapshot = NULL;
4956 memorystatus_jetsam_snapshot_entry_t *snapshot_list = NULL;
4957 unsigned int snapshot_max = 0;
4958
4959 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
4960
4961 if (od_snapshot) {
4962 /*
4963 * This is an on_demand snapshot
4964 */
4965 snapshot = od_snapshot;
4966 snapshot_list = od_snapshot->entries;
4967 snapshot_max = ods_list_count;
4968 } else {
4969 /*
4970 * This is a jetsam event snapshot
4971 */
4972 snapshot = memorystatus_jetsam_snapshot;
4973 snapshot_list = memorystatus_jetsam_snapshot->entries;
4974 snapshot_max = memorystatus_jetsam_snapshot_max;
4975 }
4976
4977 /*
4978 * Init the snapshot header information
4979 */
4980 memorystatus_init_snapshot_vmstats(snapshot);
4981 snapshot->snapshot_time = mach_absolute_time();
4982 snapshot->notification_time = 0;
4983 snapshot->js_gencount = 0;
4984
4985 next_p = memorystatus_get_first_proc_locked(&b, TRUE);
4986 while (next_p) {
4987 p = next_p;
4988 next_p = memorystatus_get_next_proc_locked(&b, p, TRUE);
4989
4990 if (FALSE == memorystatus_init_jetsam_snapshot_entry_locked(p, &snapshot_list[i], snapshot->js_gencount)) {
4991 continue;
4992 }
4993
4994 MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
4995 p->p_pid,
4996 p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7],
4997 p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]);
4998
4999 if (++i == snapshot_max) {
5000 break;
5001 }
5002 }
5003
5004 snapshot->entry_count = i;
5005
5006 if (!od_snapshot) {
5007 /* update the system buffer count */
5008 memorystatus_jetsam_snapshot_count = i;
5009 }
5010 }
5011
5012 #if DEVELOPMENT || DEBUG
5013
5014 #if CONFIG_JETSAM
5015 static int
5016 memorystatus_cmd_set_panic_bits(user_addr_t buffer, uint32_t buffer_size)
5017 {
5018 int ret;
5019 memorystatus_jetsam_panic_options_t debug;
5020
5021 if (buffer_size != sizeof(memorystatus_jetsam_panic_options_t)) {
5022 return EINVAL;
5023 }
5024
5025 ret = copyin(buffer, &debug, buffer_size);
5026 if (ret) {
5027 return ret;
5028 }
5029
5030 /* Panic bits match kMemorystatusKilled* enum */
5031 memorystatus_jetsam_panic_debug = (memorystatus_jetsam_panic_debug & ~debug.mask) | (debug.data & debug.mask);
5032
5033 /* Copyout new value */
5034 debug.data = memorystatus_jetsam_panic_debug;
5035 ret = copyout(&debug, buffer, sizeof(memorystatus_jetsam_panic_options_t));
5036
5037 return ret;
5038 }
5039 #endif /* CONFIG_JETSAM */
5040
5041 /*
5042 * Triggers a sort_order on a specified jetsam priority band.
5043 * This is for testing only, used to force a path through the sort
5044 * function.
5045 */
5046 static int
5047 memorystatus_cmd_test_jetsam_sort(int priority, int sort_order)
5048 {
5049 int error = 0;
5050
5051 unsigned int bucket_index = 0;
5052
5053 if (priority == -1) {
5054 /* Use as shorthand for default priority */
5055 bucket_index = JETSAM_PRIORITY_DEFAULT;
5056 } else {
5057 bucket_index = (unsigned int)priority;
5058 }
5059
5060 error = memorystatus_sort_bucket(bucket_index, sort_order);
5061
5062 return error;
5063 }
5064
5065 #endif /* DEVELOPMENT || DEBUG */
5066
5067 /*
5068 * Prepare the process to be killed (set state, update snapshot) and kill it.
5069 */
5070 static uint64_t memorystatus_purge_before_jetsam_success = 0;
5071
5072 static boolean_t
5073 memorystatus_kill_proc(proc_t p, uint32_t cause, os_reason_t jetsam_reason, boolean_t *killed, uint64_t *footprint_of_killed_proc)
5074 {
5075 pid_t aPid = 0;
5076 uint32_t aPid_ep = 0;
5077
5078 uint64_t killtime = 0;
5079 clock_sec_t tv_sec;
5080 clock_usec_t tv_usec;
5081 uint32_t tv_msec;
5082 boolean_t retval = FALSE;
5083
5084 aPid = p->p_pid;
5085 aPid_ep = p->p_memstat_effectivepriority;
5086
5087 if (cause != kMemorystatusKilledVnodes && cause != kMemorystatusKilledZoneMapExhaustion) {
5088 /*
5089 * Genuine memory pressure and not other (vnode/zone) resource exhaustion.
5090 */
5091 boolean_t success = FALSE;
5092 uint64_t num_pages_purged;
5093 uint64_t num_pages_reclaimed = 0;
5094 uint64_t num_pages_unsecluded = 0;
5095
5096 networking_memstatus_callout(p, cause);
5097 num_pages_purged = vm_purgeable_purge_task_owned(p->task);
5098 num_pages_reclaimed += num_pages_purged;
5099 #if CONFIG_SECLUDED_MEMORY
5100 if (cause == kMemorystatusKilledVMPageShortage &&
5101 vm_page_secluded_count > 0 &&
5102 task_can_use_secluded_mem(p->task, FALSE)) {
5103 /*
5104 * We're about to kill a process that has access
5105 * to the secluded pool. Drain that pool into the
5106 * free or active queues to make these pages re-appear
5107 * as "available", which might make us no longer need
5108 * to kill that process.
5109 * Since the secluded pool does not get refilled while
5110 * a process has access to it, it should remain
5111 * drained.
5112 */
5113 num_pages_unsecluded = vm_page_secluded_drain();
5114 num_pages_reclaimed += num_pages_unsecluded;
5115 }
5116 #endif /* CONFIG_SECLUDED_MEMORY */
5117
5118 if (num_pages_reclaimed) {
5119 /*
5120 * We actually reclaimed something and so let's
5121 * check if we need to continue with the kill.
5122 */
5123 if (cause == kMemorystatusKilledHiwat) {
5124 uint64_t footprint_in_bytes = get_task_phys_footprint(p->task);
5125 uint64_t memlimit_in_bytes = (((uint64_t)p->p_memstat_memlimit) * 1024ULL * 1024ULL); /* convert MB to bytes */
5126 success = (footprint_in_bytes <= memlimit_in_bytes);
5127 } else {
5128 success = (memorystatus_avail_pages_below_pressure() == FALSE);
5129 #if CONFIG_SECLUDED_MEMORY
5130 if (!success && num_pages_unsecluded) {
5131 /*
5132 * We just drained the secluded pool
5133 * because we're about to kill a
5134 * process that has access to it.
5135 * This is an important process and
5136 * we'd rather not kill it unless
5137 * absolutely necessary, so declare
5138 * success even if draining the pool
5139 * did not quite get us out of the
5140 * "pressure" level but still got
5141 * us out of the "critical" level.
5142 */
5143 success = (memorystatus_avail_pages_below_critical() == FALSE);
5144 }
5145 #endif /* CONFIG_SECLUDED_MEMORY */
5146 }
5147
5148 if (success) {
5149 memorystatus_purge_before_jetsam_success++;
5150
5151 os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: reclaimed %llu pages (%llu purged, %llu unsecluded) from pid %d [%s] and avoided %s\n",
5152 num_pages_reclaimed, num_pages_purged, num_pages_unsecluded, aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_kill_cause_name[cause]);
5153
5154 *killed = FALSE;
5155
5156 return TRUE;
5157 }
5158 }
5159 }
5160
5161 #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
5162 MEMORYSTATUS_DEBUG(1, "jetsam: killing pid %d [%s] - %lld Mb > 1 (%d Mb)\n",
5163 aPid, (*p->p_name ? p->p_name : "unknown"),
5164 (footprint_in_bytes / (1024ULL * 1024ULL)), /* converted bytes to MB */
5165 p->p_memstat_memlimit);
5166 #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */
5167
5168 killtime = mach_absolute_time();
5169 absolutetime_to_microtime(killtime, &tv_sec, &tv_usec);
5170 tv_msec = tv_usec / 1000;
5171
5172 proc_list_lock();
5173 memorystatus_update_jetsam_snapshot_entry_locked(p, cause, killtime);
5174 proc_list_unlock();
5175
5176 char kill_reason_string[128];
5177
5178 if (cause == kMemorystatusKilledHiwat) {
5179 strlcpy(kill_reason_string, "killing_highwater_process", 128);
5180 } else {
5181 if (aPid_ep == JETSAM_PRIORITY_IDLE) {
5182 strlcpy(kill_reason_string, "killing_idle_process", 128);
5183 } else {
5184 strlcpy(kill_reason_string, "killing_top_process", 128);
5185 }
5186 }
5187
5188 /*
5189 * memorystatus_do_kill drops a reference, so take another one so we can
5190 * continue to use this exit reason even after memorystatus_do_kill()
5191 * returns
5192 */
5193 os_reason_ref(jetsam_reason);
5194
5195 retval = memorystatus_do_kill(p, cause, jetsam_reason, footprint_of_killed_proc);
5196 *killed = retval;
5197
5198 os_log_with_startup_serial(OS_LOG_DEFAULT, "%lu.%03d memorystatus: %s pid %d [%s] (%s %d) %lluKB - memorystatus_available_pages: %llu",
5199 (unsigned long)tv_sec, tv_msec, kill_reason_string,
5200 aPid, ((p && *p->p_name) ? p->p_name : "unknown"),
5201 memorystatus_kill_cause_name[cause], aPid_ep,
5202 (*footprint_of_killed_proc) >> 10, (uint64_t)memorystatus_available_pages);
5203
5204 return retval;
5205 }
5206
5207 /*
5208 * Jetsam the first process in the queue.
5209 */
5210 static boolean_t
5211 memorystatus_kill_top_process(boolean_t any, boolean_t sort_flag, uint32_t cause, os_reason_t jetsam_reason,
5212 int32_t *priority, uint32_t *errors, uint64_t *memory_reclaimed)
5213 {
5214 pid_t aPid;
5215 proc_t p = PROC_NULL, next_p = PROC_NULL;
5216 boolean_t new_snapshot = FALSE, force_new_snapshot = FALSE, killed = FALSE, freed_mem = FALSE;
5217 unsigned int i = 0;
5218 uint32_t aPid_ep;
5219 int32_t local_max_kill_prio = JETSAM_PRIORITY_IDLE;
5220 uint64_t footprint_of_killed_proc = 0;
5221
5222 #ifndef CONFIG_FREEZE
5223 #pragma unused(any)
5224 #endif
5225
5226 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START,
5227 memorystatus_available_pages, 0, 0, 0, 0);
5228
5229
5230 #if CONFIG_JETSAM
5231 if (sort_flag == TRUE) {
5232 (void)memorystatus_sort_bucket(JETSAM_PRIORITY_FOREGROUND, JETSAM_SORT_DEFAULT);
5233 }
5234
5235 local_max_kill_prio = max_kill_priority;
5236
5237 force_new_snapshot = FALSE;
5238
5239 #else /* CONFIG_JETSAM */
5240
5241 if (sort_flag == TRUE) {
5242 (void)memorystatus_sort_bucket(JETSAM_PRIORITY_IDLE, JETSAM_SORT_DEFAULT);
5243 }
5244
5245 /*
5246 * On macos, we currently only have 2 reasons to be here:
5247 *
5248 * kMemorystatusKilledZoneMapExhaustion
5249 * AND
5250 * kMemorystatusKilledVMCompressorSpaceShortage
5251 *
5252 * If we are here because of kMemorystatusKilledZoneMapExhaustion, we will consider
5253 * any and all processes as eligible kill candidates since we need to avoid a panic.
5254 *
5255 * Since this function can be called async. it is harder to toggle the max_kill_priority
5256 * value before and after a call. And so we use this local variable to set the upper band
5257 * on the eligible kill bands.
5258 */
5259 if (cause == kMemorystatusKilledZoneMapExhaustion) {
5260 local_max_kill_prio = JETSAM_PRIORITY_MAX;
5261 } else {
5262 local_max_kill_prio = max_kill_priority;
5263 }
5264
5265 /*
5266 * And, because we are here under extreme circumstances, we force a snapshot even for
5267 * IDLE kills.
5268 */
5269 force_new_snapshot = TRUE;
5270
5271 #endif /* CONFIG_JETSAM */
5272
5273 if (cause != kMemorystatusKilledZoneMapExhaustion &&
5274 jetsam_current_thread() != NULL &&
5275 jetsam_current_thread()->limit_to_low_bands &&
5276 local_max_kill_prio > JETSAM_PRIORITY_BACKGROUND) {
5277 local_max_kill_prio = JETSAM_PRIORITY_BACKGROUND;
5278 }
5279
5280 proc_list_lock();
5281
5282 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
5283 while (next_p && (next_p->p_memstat_effectivepriority <= local_max_kill_prio)) {
5284 p = next_p;
5285 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
5286
5287
5288 aPid = p->p_pid;
5289 aPid_ep = p->p_memstat_effectivepriority;
5290
5291 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
5292 continue; /* with lock held */
5293 }
5294
5295 if (cause == kMemorystatusKilledVnodes) {
5296 /*
5297 * If the system runs out of vnodes, we systematically jetsam
5298 * processes in hopes of stumbling onto a vnode gain that helps
5299 * the system recover. The process that happens to trigger
5300 * this path has no known relationship to the vnode shortage.
5301 * Deadlock avoidance: attempt to safeguard the caller.
5302 */
5303
5304 if (p == current_proc()) {
5305 /* do not jetsam the current process */
5306 continue;
5307 }
5308 }
5309
5310 #if CONFIG_FREEZE
5311 boolean_t skip;
5312 boolean_t reclaim_proc = !(p->p_memstat_state & P_MEMSTAT_LOCKED);
5313 if (any || reclaim_proc) {
5314 skip = FALSE;
5315 } else {
5316 skip = TRUE;
5317 }
5318
5319 if (skip) {
5320 continue;
5321 } else
5322 #endif
5323 {
5324 if (proc_ref_locked(p) == p) {
5325 /*
5326 * Mark as terminated so that if exit1() indicates success, but the process (for example)
5327 * is blocked in task_exception_notify(), it'll be skipped if encountered again - see
5328 * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the
5329 * acquisition of the proc lock.
5330 */
5331 p->p_memstat_state |= P_MEMSTAT_TERMINATED;
5332 } else {
5333 /*
5334 * We need to restart the search again because
5335 * proc_ref_locked _can_ drop the proc_list lock
5336 * and we could have lost our stored next_p via
5337 * an exit() on another core.
5338 */
5339 i = 0;
5340 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
5341 continue;
5342 }
5343
5344 /*
5345 * Capture a snapshot if none exists and:
5346 * - we are forcing a new snapshot creation, either because:
5347 * - on a particular platform we need these snapshots every time, OR
5348 * - a boot-arg/embedded device tree property has been set.
5349 * - priority was not requested (this is something other than an ambient kill)
5350 * - the priority was requested *and* the targeted process is not at idle priority
5351 */
5352 if ((memorystatus_jetsam_snapshot_count == 0) &&
5353 (force_new_snapshot || memorystatus_idle_snapshot || ((!priority) || (priority && (aPid_ep != JETSAM_PRIORITY_IDLE))))) {
5354 memorystatus_init_jetsam_snapshot_locked(NULL, 0);
5355 new_snapshot = TRUE;
5356 }
5357
5358 proc_list_unlock();
5359
5360 freed_mem = memorystatus_kill_proc(p, cause, jetsam_reason, &killed, &footprint_of_killed_proc); /* purged and/or killed 'p' */
5361 /* Success? */
5362 if (freed_mem) {
5363 if (killed) {
5364 *memory_reclaimed = footprint_of_killed_proc;
5365 if (priority) {
5366 *priority = aPid_ep;
5367 }
5368 } else {
5369 /* purged */
5370 proc_list_lock();
5371 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
5372 proc_list_unlock();
5373 }
5374 proc_rele(p);
5375 goto exit;
5376 }
5377
5378 /*
5379 * Failure - first unwind the state,
5380 * then fall through to restart the search.
5381 */
5382 proc_list_lock();
5383 proc_rele_locked(p);
5384 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
5385 p->p_memstat_state |= P_MEMSTAT_ERROR;
5386 *errors += 1;
5387
5388 i = 0;
5389 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
5390 }
5391 }
5392
5393 proc_list_unlock();
5394
5395 exit:
5396 os_reason_free(jetsam_reason);
5397
5398 if (!killed) {
5399 *memory_reclaimed = 0;
5400
5401 /* Clear snapshot if freshly captured and no target was found */
5402 if (new_snapshot) {
5403 proc_list_lock();
5404 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
5405 proc_list_unlock();
5406 }
5407 }
5408
5409 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END,
5410 memorystatus_available_pages, killed ? aPid : 0, killed, *memory_reclaimed, 0);
5411
5412 return killed;
5413 }
5414
5415 /*
5416 * Jetsam aggressively
5417 */
5418 static boolean_t
5419 memorystatus_kill_processes_aggressive(uint32_t cause, int aggr_count,
5420 int32_t priority_max, uint32_t *errors, uint64_t *memory_reclaimed)
5421 {
5422 pid_t aPid;
5423 proc_t p = PROC_NULL, next_p = PROC_NULL;
5424 boolean_t new_snapshot = FALSE, killed = FALSE;
5425 int kill_count = 0;
5426 unsigned int i = 0;
5427 int32_t aPid_ep = 0;
5428 unsigned int memorystatus_level_snapshot = 0;
5429 uint64_t killtime = 0;
5430 clock_sec_t tv_sec;
5431 clock_usec_t tv_usec;
5432 uint32_t tv_msec;
5433 os_reason_t jetsam_reason = OS_REASON_NULL;
5434 uint64_t footprint_of_killed_proc = 0;
5435
5436 *memory_reclaimed = 0;
5437
5438 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START,
5439 memorystatus_available_pages, priority_max, 0, 0, 0);
5440
5441 if (priority_max >= JETSAM_PRIORITY_FOREGROUND) {
5442 /*
5443 * Check if aggressive jetsam has been asked to kill upto or beyond the
5444 * JETSAM_PRIORITY_FOREGROUND bucket. If yes, sort the FG band based on
5445 * coalition footprint.
5446 */
5447 memorystatus_sort_bucket(JETSAM_PRIORITY_FOREGROUND, JETSAM_SORT_DEFAULT);
5448 }
5449
5450 jetsam_reason = os_reason_create(OS_REASON_JETSAM, cause);
5451 if (jetsam_reason == OS_REASON_NULL) {
5452 printf("memorystatus_kill_processes_aggressive: failed to allocate exit reason\n");
5453 }
5454
5455 proc_list_lock();
5456
5457 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
5458 while (next_p) {
5459 if (((next_p->p_listflag & P_LIST_EXITED) != 0) ||
5460 ((unsigned int)(next_p->p_memstat_effectivepriority) != i)) {
5461 /*
5462 * We have raced with next_p running on another core.
5463 * It may be exiting or it may have moved to a different
5464 * jetsam priority band. This means we have lost our
5465 * place in line while traversing the jetsam list. We
5466 * attempt to recover by rewinding to the beginning of the band
5467 * we were already traversing. By doing this, we do not guarantee
5468 * that no process escapes this aggressive march, but we can make
5469 * skipping an entire range of processes less likely. (PR-21069019)
5470 */
5471
5472 MEMORYSTATUS_DEBUG(1, "memorystatus: aggressive%d: rewinding band %d, %s(%d) moved or exiting.\n",
5473 aggr_count, i, (*next_p->p_name ? next_p->p_name : "unknown"), next_p->p_pid);
5474
5475 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
5476 continue;
5477 }
5478
5479 p = next_p;
5480 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
5481
5482 if (p->p_memstat_effectivepriority > priority_max) {
5483 /*
5484 * Bail out of this killing spree if we have
5485 * reached beyond the priority_max jetsam band.
5486 * That is, we kill up to and through the
5487 * priority_max jetsam band.
5488 */
5489 proc_list_unlock();
5490 goto exit;
5491 }
5492
5493 aPid = p->p_pid;
5494 aPid_ep = p->p_memstat_effectivepriority;
5495
5496 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
5497 continue;
5498 }
5499
5500 /*
5501 * Capture a snapshot if none exists.
5502 */
5503 if (memorystatus_jetsam_snapshot_count == 0) {
5504 memorystatus_init_jetsam_snapshot_locked(NULL, 0);
5505 new_snapshot = TRUE;
5506 }
5507
5508 /*
5509 * Mark as terminated so that if exit1() indicates success, but the process (for example)
5510 * is blocked in task_exception_notify(), it'll be skipped if encountered again - see
5511 * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the
5512 * acquisition of the proc lock.
5513 */
5514 p->p_memstat_state |= P_MEMSTAT_TERMINATED;
5515
5516 killtime = mach_absolute_time();
5517 absolutetime_to_microtime(killtime, &tv_sec, &tv_usec);
5518 tv_msec = tv_usec / 1000;
5519
5520 /* Shift queue, update stats */
5521 memorystatus_update_jetsam_snapshot_entry_locked(p, cause, killtime);
5522
5523 /*
5524 * In order to kill the target process, we will drop the proc_list_lock.
5525 * To guaranteee that p and next_p don't disappear out from under the lock,
5526 * we must take a ref on both.
5527 * If we cannot get a reference, then it's likely we've raced with
5528 * that process exiting on another core.
5529 */
5530 if (proc_ref_locked(p) == p) {
5531 if (next_p) {
5532 while (next_p && (proc_ref_locked(next_p) != next_p)) {
5533 proc_t temp_p;
5534
5535 /*
5536 * We must have raced with next_p exiting on another core.
5537 * Recover by getting the next eligible process in the band.
5538 */
5539
5540 MEMORYSTATUS_DEBUG(1, "memorystatus: aggressive%d: skipping %d [%s] (exiting?)\n",
5541 aggr_count, next_p->p_pid, (*next_p->p_name ? next_p->p_name : "(unknown)"));
5542
5543 temp_p = next_p;
5544 next_p = memorystatus_get_next_proc_locked(&i, temp_p, TRUE);
5545 }
5546 }
5547 proc_list_unlock();
5548
5549 printf("%lu.%03d memorystatus: %s%d pid %d [%s] (%s %d) - memorystatus_available_pages: %llu\n",
5550 (unsigned long)tv_sec, tv_msec,
5551 ((aPid_ep == JETSAM_PRIORITY_IDLE) ? "killing_idle_process_aggressive" : "killing_top_process_aggressive"),
5552 aggr_count, aPid, (*p->p_name ? p->p_name : "unknown"),
5553 memorystatus_kill_cause_name[cause], aPid_ep, (uint64_t)memorystatus_available_pages);
5554
5555 memorystatus_level_snapshot = memorystatus_level;
5556
5557 /*
5558 * memorystatus_do_kill() drops a reference, so take another one so we can
5559 * continue to use this exit reason even after memorystatus_do_kill()
5560 * returns.
5561 */
5562 os_reason_ref(jetsam_reason);
5563 killed = memorystatus_do_kill(p, cause, jetsam_reason, &footprint_of_killed_proc);
5564
5565 /* Success? */
5566 if (killed) {
5567 *memory_reclaimed += footprint_of_killed_proc;
5568 proc_rele(p);
5569 kill_count++;
5570 p = NULL;
5571 killed = FALSE;
5572
5573 /*
5574 * Continue the killing spree.
5575 */
5576 proc_list_lock();
5577 if (next_p) {
5578 proc_rele_locked(next_p);
5579 }
5580
5581 if (aPid_ep == JETSAM_PRIORITY_FOREGROUND && memorystatus_aggressive_jetsam_lenient == TRUE) {
5582 if (memorystatus_level > memorystatus_level_snapshot && ((memorystatus_level - memorystatus_level_snapshot) >= AGGRESSIVE_JETSAM_LENIENT_MODE_THRESHOLD)) {
5583 #if DEVELOPMENT || DEBUG
5584 printf("Disabling Lenient mode after one-time deployment.\n");
5585 #endif /* DEVELOPMENT || DEBUG */
5586 memorystatus_aggressive_jetsam_lenient = FALSE;
5587 break;
5588 }
5589 }
5590
5591 continue;
5592 }
5593
5594 /*
5595 * Failure - first unwind the state,
5596 * then fall through to restart the search.
5597 */
5598 proc_list_lock();
5599 proc_rele_locked(p);
5600 if (next_p) {
5601 proc_rele_locked(next_p);
5602 }
5603 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
5604 p->p_memstat_state |= P_MEMSTAT_ERROR;
5605 *errors += 1;
5606 p = NULL;
5607 }
5608
5609 /*
5610 * Failure - restart the search at the beginning of
5611 * the band we were already traversing.
5612 *
5613 * We might have raced with "p" exiting on another core, resulting in no
5614 * ref on "p". Or, we may have failed to kill "p".
5615 *
5616 * Either way, we fall thru to here, leaving the proc in the
5617 * P_MEMSTAT_TERMINATED or P_MEMSTAT_ERROR state.
5618 *
5619 * And, we hold the the proc_list_lock at this point.
5620 */
5621
5622 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
5623 }
5624
5625 proc_list_unlock();
5626
5627 exit:
5628 os_reason_free(jetsam_reason);
5629
5630 /* Clear snapshot if freshly captured and no target was found */
5631 if (new_snapshot && (kill_count == 0)) {
5632 proc_list_lock();
5633 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
5634 proc_list_unlock();
5635 }
5636
5637 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END,
5638 memorystatus_available_pages, 0, kill_count, *memory_reclaimed, 0);
5639
5640 if (kill_count > 0) {
5641 return TRUE;
5642 } else {
5643 return FALSE;
5644 }
5645 }
5646
5647 static boolean_t
5648 memorystatus_kill_hiwat_proc(uint32_t *errors, boolean_t *purged, uint64_t *memory_reclaimed)
5649 {
5650 pid_t aPid = 0;
5651 proc_t p = PROC_NULL, next_p = PROC_NULL;
5652 boolean_t new_snapshot = FALSE, killed = FALSE, freed_mem = FALSE;
5653 unsigned int i = 0;
5654 uint32_t aPid_ep;
5655 os_reason_t jetsam_reason = OS_REASON_NULL;
5656 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START,
5657 memorystatus_available_pages, 0, 0, 0, 0);
5658
5659 jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_HIGHWATER);
5660 if (jetsam_reason == OS_REASON_NULL) {
5661 printf("memorystatus_kill_hiwat_proc: failed to allocate exit reason\n");
5662 }
5663
5664 proc_list_lock();
5665
5666 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
5667 while (next_p) {
5668 uint64_t footprint_in_bytes = 0;
5669 uint64_t memlimit_in_bytes = 0;
5670 boolean_t skip = 0;
5671
5672 p = next_p;
5673 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
5674
5675 aPid = p->p_pid;
5676 aPid_ep = p->p_memstat_effectivepriority;
5677
5678 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
5679 continue;
5680 }
5681
5682 /* skip if no limit set */
5683 if (p->p_memstat_memlimit <= 0) {
5684 continue;
5685 }
5686
5687 footprint_in_bytes = get_task_phys_footprint(p->task);
5688 memlimit_in_bytes = (((uint64_t)p->p_memstat_memlimit) * 1024ULL * 1024ULL); /* convert MB to bytes */
5689 skip = (footprint_in_bytes <= memlimit_in_bytes);
5690
5691 #if CONFIG_FREEZE
5692 if (!skip) {
5693 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
5694 skip = TRUE;
5695 } else {
5696 skip = FALSE;
5697 }
5698 }
5699 #endif
5700
5701 if (skip) {
5702 continue;
5703 } else {
5704 if (memorystatus_jetsam_snapshot_count == 0) {
5705 memorystatus_init_jetsam_snapshot_locked(NULL, 0);
5706 new_snapshot = TRUE;
5707 }
5708
5709 if (proc_ref_locked(p) == p) {
5710 /*
5711 * Mark as terminated so that if exit1() indicates success, but the process (for example)
5712 * is blocked in task_exception_notify(), it'll be skipped if encountered again - see
5713 * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the
5714 * acquisition of the proc lock.
5715 */
5716 p->p_memstat_state |= P_MEMSTAT_TERMINATED;
5717
5718 proc_list_unlock();
5719 } else {
5720 /*
5721 * We need to restart the search again because
5722 * proc_ref_locked _can_ drop the proc_list lock
5723 * and we could have lost our stored next_p via
5724 * an exit() on another core.
5725 */
5726 i = 0;
5727 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
5728 continue;
5729 }
5730
5731 footprint_in_bytes = 0;
5732 freed_mem = memorystatus_kill_proc(p, kMemorystatusKilledHiwat, jetsam_reason, &killed, &footprint_in_bytes); /* purged and/or killed 'p' */
5733
5734 /* Success? */
5735 if (freed_mem) {
5736 if (killed == FALSE) {
5737 /* purged 'p'..don't reset HWM candidate count */
5738 *purged = TRUE;
5739
5740 proc_list_lock();
5741 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
5742 proc_list_unlock();
5743 } else {
5744 *memory_reclaimed = footprint_in_bytes;
5745 }
5746 proc_rele(p);
5747 goto exit;
5748 }
5749 /*
5750 * Failure - first unwind the state,
5751 * then fall through to restart the search.
5752 */
5753 proc_list_lock();
5754 proc_rele_locked(p);
5755 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
5756 p->p_memstat_state |= P_MEMSTAT_ERROR;
5757 *errors += 1;
5758
5759 i = 0;
5760 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
5761 }
5762 }
5763
5764 proc_list_unlock();
5765
5766 exit:
5767 os_reason_free(jetsam_reason);
5768
5769 if (!killed) {
5770 *memory_reclaimed = 0;
5771
5772 /* Clear snapshot if freshly captured and no target was found */
5773 if (new_snapshot) {
5774 proc_list_lock();
5775 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
5776 proc_list_unlock();
5777 }
5778 }
5779
5780 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END,
5781 memorystatus_available_pages, killed ? aPid : 0, killed, *memory_reclaimed, 0);
5782
5783 return killed;
5784 }
5785
5786 /*
5787 * Jetsam a process pinned in the elevated band.
5788 *
5789 * Return: true -- a pinned process was jetsammed
5790 * false -- no pinned process was jetsammed
5791 */
5792 boolean_t
5793 memorystatus_kill_elevated_process(uint32_t cause, os_reason_t jetsam_reason, unsigned int band, int aggr_count, uint32_t *errors, uint64_t *memory_reclaimed)
5794 {
5795 pid_t aPid = 0;
5796 proc_t p = PROC_NULL, next_p = PROC_NULL;
5797 boolean_t new_snapshot = FALSE, killed = FALSE;
5798 int kill_count = 0;
5799 uint32_t aPid_ep;
5800 uint64_t killtime = 0;
5801 clock_sec_t tv_sec;
5802 clock_usec_t tv_usec;
5803 uint32_t tv_msec;
5804 uint64_t footprint_of_killed_proc = 0;
5805
5806
5807 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START,
5808 memorystatus_available_pages, 0, 0, 0, 0);
5809
5810 #if CONFIG_FREEZE
5811 boolean_t consider_frozen_only = FALSE;
5812
5813 if (band == (unsigned int) memorystatus_freeze_jetsam_band) {
5814 consider_frozen_only = TRUE;
5815 }
5816 #endif /* CONFIG_FREEZE */
5817
5818 proc_list_lock();
5819
5820 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
5821 while (next_p) {
5822 p = next_p;
5823 next_p = memorystatus_get_next_proc_locked(&band, p, FALSE);
5824
5825 aPid = p->p_pid;
5826 aPid_ep = p->p_memstat_effectivepriority;
5827
5828 /*
5829 * Only pick a process pinned in this elevated band
5830 */
5831 if (!(p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND)) {
5832 continue;
5833 }
5834
5835 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
5836 continue;
5837 }
5838
5839 #if CONFIG_FREEZE
5840 if (consider_frozen_only && !(p->p_memstat_state & P_MEMSTAT_FROZEN)) {
5841 continue;
5842 }
5843
5844 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
5845 continue;
5846 }
5847 #endif /* CONFIG_FREEZE */
5848
5849 #if DEVELOPMENT || DEBUG
5850 MEMORYSTATUS_DEBUG(1, "jetsam: elevated%d process pid %d [%s] - memorystatus_available_pages: %d\n",
5851 aggr_count,
5852 aPid, (*p->p_name ? p->p_name : "unknown"),
5853 memorystatus_available_pages);
5854 #endif /* DEVELOPMENT || DEBUG */
5855
5856 if (memorystatus_jetsam_snapshot_count == 0) {
5857 memorystatus_init_jetsam_snapshot_locked(NULL, 0);
5858 new_snapshot = TRUE;
5859 }
5860
5861 p->p_memstat_state |= P_MEMSTAT_TERMINATED;
5862
5863 killtime = mach_absolute_time();
5864 absolutetime_to_microtime(killtime, &tv_sec, &tv_usec);
5865 tv_msec = tv_usec / 1000;
5866
5867 memorystatus_update_jetsam_snapshot_entry_locked(p, cause, killtime);
5868
5869 if (proc_ref_locked(p) == p) {
5870 proc_list_unlock();
5871
5872 /*
5873 * memorystatus_do_kill drops a reference, so take another one so we can
5874 * continue to use this exit reason even after memorystatus_do_kill()
5875 * returns
5876 */
5877 os_reason_ref(jetsam_reason);
5878 killed = memorystatus_do_kill(p, cause, jetsam_reason, &footprint_of_killed_proc);
5879
5880 os_log_with_startup_serial(OS_LOG_DEFAULT, "%lu.%03d memorystatus: killing_top_process_elevated%d pid %d [%s] (%s %d) %lluKB - memorystatus_available_pages: %llu\n",
5881 (unsigned long)tv_sec, tv_msec,
5882 aggr_count,
5883 aPid, ((p && *p->p_name) ? p->p_name : "unknown"),
5884 memorystatus_kill_cause_name[cause], aPid_ep,
5885 footprint_of_killed_proc >> 10, (uint64_t)memorystatus_available_pages);
5886
5887 /* Success? */
5888 if (killed) {
5889 *memory_reclaimed = footprint_of_killed_proc;
5890 proc_rele(p);
5891 kill_count++;
5892 goto exit;
5893 }
5894
5895 /*
5896 * Failure - first unwind the state,
5897 * then fall through to restart the search.
5898 */
5899 proc_list_lock();
5900 proc_rele_locked(p);
5901 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
5902 p->p_memstat_state |= P_MEMSTAT_ERROR;
5903 *errors += 1;
5904 }
5905
5906 /*
5907 * Failure - restart the search.
5908 *
5909 * We might have raced with "p" exiting on another core, resulting in no
5910 * ref on "p". Or, we may have failed to kill "p".
5911 *
5912 * Either way, we fall thru to here, leaving the proc in the
5913 * P_MEMSTAT_TERMINATED state or P_MEMSTAT_ERROR state.
5914 *
5915 * And, we hold the the proc_list_lock at this point.
5916 */
5917
5918 next_p = memorystatus_get_first_proc_locked(&band, FALSE);
5919 }
5920
5921 proc_list_unlock();
5922
5923 exit:
5924 os_reason_free(jetsam_reason);
5925
5926 if (kill_count == 0) {
5927 *memory_reclaimed = 0;
5928
5929 /* Clear snapshot if freshly captured and no target was found */
5930 if (new_snapshot) {
5931 proc_list_lock();
5932 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
5933 proc_list_unlock();
5934 }
5935 }
5936
5937 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END,
5938 memorystatus_available_pages, killed ? aPid : 0, kill_count, *memory_reclaimed, 0);
5939
5940 return killed;
5941 }
5942
5943 static boolean_t
5944 memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause)
5945 {
5946 /*
5947 * TODO: allow a general async path
5948 *
5949 * NOTE: If a new async kill cause is added, make sure to update memorystatus_thread() to
5950 * add the appropriate exit reason code mapping.
5951 */
5952 if ((victim_pid != -1) ||
5953 (cause != kMemorystatusKilledVMPageShortage &&
5954 cause != kMemorystatusKilledVMCompressorThrashing &&
5955 cause != kMemorystatusKilledVMCompressorSpaceShortage &&
5956 cause != kMemorystatusKilledFCThrashing &&
5957 cause != kMemorystatusKilledZoneMapExhaustion)) {
5958 return FALSE;
5959 }
5960
5961 kill_under_pressure_cause = cause;
5962 memorystatus_thread_wake();
5963 return TRUE;
5964 }
5965
5966 boolean_t
5967 memorystatus_kill_on_VM_compressor_space_shortage(boolean_t async)
5968 {
5969 if (async) {
5970 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMCompressorSpaceShortage);
5971 } else {
5972 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_VMCOMPRESSOR_SPACE_SHORTAGE);
5973 if (jetsam_reason == OS_REASON_NULL) {
5974 printf("memorystatus_kill_on_VM_compressor_space_shortage -- sync: failed to allocate jetsam reason\n");
5975 }
5976
5977 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMCompressorSpaceShortage, jetsam_reason);
5978 }
5979 }
5980
5981 #if CONFIG_JETSAM
5982 boolean_t
5983 memorystatus_kill_on_VM_compressor_thrashing(boolean_t async)
5984 {
5985 if (async) {
5986 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMCompressorThrashing);
5987 } else {
5988 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_VMCOMPRESSOR_THRASHING);
5989 if (jetsam_reason == OS_REASON_NULL) {
5990 printf("memorystatus_kill_on_VM_compressor_thrashing -- sync: failed to allocate jetsam reason\n");
5991 }
5992
5993 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMCompressorThrashing, jetsam_reason);
5994 }
5995 }
5996
5997 boolean_t
5998 memorystatus_kill_on_VM_page_shortage(boolean_t async)
5999 {
6000 if (async) {
6001 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMPageShortage);
6002 } else {
6003 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_VMPAGESHORTAGE);
6004 if (jetsam_reason == OS_REASON_NULL) {
6005 printf("memorystatus_kill_on_VM_page_shortage -- sync: failed to allocate jetsam reason\n");
6006 }
6007
6008 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMPageShortage, jetsam_reason);
6009 }
6010 }
6011
6012 boolean_t
6013 memorystatus_kill_on_FC_thrashing(boolean_t async)
6014 {
6015 if (async) {
6016 return memorystatus_kill_process_async(-1, kMemorystatusKilledFCThrashing);
6017 } else {
6018 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_FCTHRASHING);
6019 if (jetsam_reason == OS_REASON_NULL) {
6020 printf("memorystatus_kill_on_FC_thrashing -- sync: failed to allocate jetsam reason\n");
6021 }
6022
6023 return memorystatus_kill_process_sync(-1, kMemorystatusKilledFCThrashing, jetsam_reason);
6024 }
6025 }
6026
6027 boolean_t
6028 memorystatus_kill_on_vnode_limit(void)
6029 {
6030 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_VNODE);
6031 if (jetsam_reason == OS_REASON_NULL) {
6032 printf("memorystatus_kill_on_vnode_limit: failed to allocate jetsam reason\n");
6033 }
6034
6035 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes, jetsam_reason);
6036 }
6037
6038 #endif /* CONFIG_JETSAM */
6039
6040 boolean_t
6041 memorystatus_kill_on_zone_map_exhaustion(pid_t pid)
6042 {
6043 boolean_t res = FALSE;
6044 if (pid == -1) {
6045 res = memorystatus_kill_process_async(-1, kMemorystatusKilledZoneMapExhaustion);
6046 } else {
6047 os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_ZONE_MAP_EXHAUSTION);
6048 if (jetsam_reason == OS_REASON_NULL) {
6049 printf("memorystatus_kill_on_zone_map_exhaustion: failed to allocate jetsam reason\n");
6050 }
6051
6052 res = memorystatus_kill_process_sync(pid, kMemorystatusKilledZoneMapExhaustion, jetsam_reason);
6053 }
6054 return res;
6055 }
6056
6057 void
6058 memorystatus_on_pageout_scan_end(void)
6059 {
6060 /* No-op */
6061 }
6062
6063 /* Return both allocated and actual size, since there's a race between allocation and list compilation */
6064 static int
6065 memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t *buffer_size, size_t *list_size, boolean_t size_only)
6066 {
6067 uint32_t list_count, i = 0;
6068 memorystatus_priority_entry_t *list_entry;
6069 proc_t p;
6070
6071 list_count = memorystatus_list_count;
6072 *list_size = sizeof(memorystatus_priority_entry_t) * list_count;
6073
6074 /* Just a size check? */
6075 if (size_only) {
6076 return 0;
6077 }
6078
6079 /* Otherwise, validate the size of the buffer */
6080 if (*buffer_size < *list_size) {
6081 return EINVAL;
6082 }
6083
6084 *list_ptr = (memorystatus_priority_entry_t*)kalloc(*list_size);
6085 if (!*list_ptr) {
6086 return ENOMEM;
6087 }
6088
6089 memset(*list_ptr, 0, *list_size);
6090
6091 *buffer_size = *list_size;
6092 *list_size = 0;
6093
6094 list_entry = *list_ptr;
6095
6096 proc_list_lock();
6097
6098 p = memorystatus_get_first_proc_locked(&i, TRUE);
6099 while (p && (*list_size < *buffer_size)) {
6100 list_entry->pid = p->p_pid;
6101 list_entry->priority = p->p_memstat_effectivepriority;
6102 list_entry->user_data = p->p_memstat_userdata;
6103
6104 if (p->p_memstat_memlimit <= 0) {
6105 task_get_phys_footprint_limit(p->task, &list_entry->limit);
6106 } else {
6107 list_entry->limit = p->p_memstat_memlimit;
6108 }
6109
6110 list_entry->state = memorystatus_build_state(p);
6111 list_entry++;
6112
6113 *list_size += sizeof(memorystatus_priority_entry_t);
6114
6115 p = memorystatus_get_next_proc_locked(&i, p, TRUE);
6116 }
6117
6118 proc_list_unlock();
6119
6120 MEMORYSTATUS_DEBUG(1, "memorystatus_get_priority_list: returning %lu for size\n", (unsigned long)*list_size);
6121
6122 return 0;
6123 }
6124
6125 static int
6126 memorystatus_get_priority_pid(pid_t pid, user_addr_t buffer, size_t buffer_size)
6127 {
6128 int error = 0;
6129 memorystatus_priority_entry_t mp_entry;
6130 kern_return_t ret;
6131
6132 /* Validate inputs */
6133 if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_priority_entry_t))) {
6134 return EINVAL;
6135 }
6136
6137 proc_t p = proc_find(pid);
6138 if (!p) {
6139 return ESRCH;
6140 }
6141
6142 memset(&mp_entry, 0, sizeof(memorystatus_priority_entry_t));
6143
6144 mp_entry.pid = p->p_pid;
6145 mp_entry.priority = p->p_memstat_effectivepriority;
6146 mp_entry.user_data = p->p_memstat_userdata;
6147 if (p->p_memstat_memlimit <= 0) {
6148 ret = task_get_phys_footprint_limit(p->task, &mp_entry.limit);
6149 if (ret != KERN_SUCCESS) {
6150 proc_rele(p);
6151 return EINVAL;
6152 }
6153 } else {
6154 mp_entry.limit = p->p_memstat_memlimit;
6155 }
6156 mp_entry.state = memorystatus_build_state(p);
6157
6158 proc_rele(p);
6159
6160 error = copyout(&mp_entry, buffer, buffer_size);
6161
6162 return error;
6163 }
6164
6165 static int
6166 memorystatus_cmd_get_priority_list(pid_t pid, user_addr_t buffer, size_t buffer_size, int32_t *retval)
6167 {
6168 int error = 0;
6169 boolean_t size_only;
6170 size_t list_size;
6171
6172 /*
6173 * When a non-zero pid is provided, the 'list' has only one entry.
6174 */
6175
6176 size_only = ((buffer == USER_ADDR_NULL) ? TRUE: FALSE);
6177
6178 if (pid != 0) {
6179 list_size = sizeof(memorystatus_priority_entry_t) * 1;
6180 if (!size_only) {
6181 error = memorystatus_get_priority_pid(pid, buffer, buffer_size);
6182 }
6183 } else {
6184 memorystatus_priority_entry_t *list = NULL;
6185 error = memorystatus_get_priority_list(&list, &buffer_size, &list_size, size_only);
6186
6187 if (error == 0) {
6188 if (!size_only) {
6189 error = copyout(list, buffer, list_size);
6190 }
6191 }
6192
6193 if (list) {
6194 kfree(list, buffer_size);
6195 }
6196 }
6197
6198 if (error == 0) {
6199 *retval = list_size;
6200 }
6201
6202 return error;
6203 }
6204
6205 static void
6206 memorystatus_clear_errors(void)
6207 {
6208 proc_t p;
6209 unsigned int i = 0;
6210
6211 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6212
6213 proc_list_lock();
6214
6215 p = memorystatus_get_first_proc_locked(&i, TRUE);
6216 while (p) {
6217 if (p->p_memstat_state & P_MEMSTAT_ERROR) {
6218 p->p_memstat_state &= ~P_MEMSTAT_ERROR;
6219 }
6220 p = memorystatus_get_next_proc_locked(&i, p, TRUE);
6221 }
6222
6223 proc_list_unlock();
6224
6225 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6226 }
6227
6228 #if CONFIG_JETSAM
6229 static void
6230 memorystatus_update_levels_locked(boolean_t critical_only)
6231 {
6232 memorystatus_available_pages_critical = memorystatus_available_pages_critical_base;
6233
6234 /*
6235 * If there's an entry in the first bucket, we have idle processes.
6236 */
6237
6238 memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
6239 if (first_bucket->count) {
6240 memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset;
6241
6242 if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure) {
6243 /*
6244 * The critical threshold must never exceed the pressure threshold
6245 */
6246 memorystatus_available_pages_critical = memorystatus_available_pages_pressure;
6247 }
6248 }
6249
6250 if (memorystatus_jetsam_policy & kPolicyMoreFree) {
6251 memorystatus_available_pages_critical += memorystatus_policy_more_free_offset_pages;
6252 }
6253
6254 if (critical_only) {
6255 return;
6256 }
6257
6258 #if VM_PRESSURE_EVENTS
6259 memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta;
6260 #endif
6261 }
6262
6263 void
6264 memorystatus_fast_jetsam_override(boolean_t enable_override)
6265 {
6266 /* If fast jetsam is not enabled, simply return */
6267 if (!fast_jetsam_enabled) {
6268 return;
6269 }
6270
6271 if (enable_override) {
6272 if ((memorystatus_jetsam_policy & kPolicyMoreFree) == kPolicyMoreFree) {
6273 return;
6274 }
6275 proc_list_lock();
6276 memorystatus_jetsam_policy |= kPolicyMoreFree;
6277 memorystatus_thread_pool_max();
6278 memorystatus_update_levels_locked(TRUE);
6279 proc_list_unlock();
6280 } else {
6281 if ((memorystatus_jetsam_policy & kPolicyMoreFree) == 0) {
6282 return;
6283 }
6284 proc_list_lock();
6285 memorystatus_jetsam_policy &= ~kPolicyMoreFree;
6286 memorystatus_thread_pool_default();
6287 memorystatus_update_levels_locked(TRUE);
6288 proc_list_unlock();
6289 }
6290 }
6291
6292
6293 static int
6294 sysctl_kern_memorystatus_policy_more_free SYSCTL_HANDLER_ARGS
6295 {
6296 #pragma unused(arg1, arg2, oidp)
6297 int error = 0, more_free = 0;
6298
6299 /*
6300 * TODO: Enable this privilege check?
6301 *
6302 * error = priv_check_cred(kauth_cred_get(), PRIV_VM_JETSAM, 0);
6303 * if (error)
6304 * return (error);
6305 */
6306
6307 error = sysctl_handle_int(oidp, &more_free, 0, req);
6308 if (error || !req->newptr) {
6309 return error;
6310 }
6311
6312 if (more_free) {
6313 memorystatus_fast_jetsam_override(true);
6314 } else {
6315 memorystatus_fast_jetsam_override(false);
6316 }
6317
6318 return 0;
6319 }
6320 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_policy_more_free, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
6321 0, 0, &sysctl_kern_memorystatus_policy_more_free, "I", "");
6322
6323 #endif /* CONFIG_JETSAM */
6324
6325 /*
6326 * Get the at_boot snapshot
6327 */
6328 static int
6329 memorystatus_get_at_boot_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only)
6330 {
6331 size_t input_size = *snapshot_size;
6332
6333 /*
6334 * The at_boot snapshot has no entry list.
6335 */
6336 *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t);
6337
6338 if (size_only) {
6339 return 0;
6340 }
6341
6342 /*
6343 * Validate the size of the snapshot buffer
6344 */
6345 if (input_size < *snapshot_size) {
6346 return EINVAL;
6347 }
6348
6349 /*
6350 * Update the notification_time only
6351 */
6352 memorystatus_at_boot_snapshot.notification_time = mach_absolute_time();
6353 *snapshot = &memorystatus_at_boot_snapshot;
6354
6355 MEMORYSTATUS_DEBUG(7, "memorystatus_get_at_boot_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%d)\n",
6356 (long)input_size, (long)*snapshot_size, 0);
6357 return 0;
6358 }
6359
6360 /*
6361 * Get the previous fully populated snapshot
6362 */
6363 static int
6364 memorystatus_get_jetsam_snapshot_copy(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only)
6365 {
6366 size_t input_size = *snapshot_size;
6367
6368 if (memorystatus_jetsam_snapshot_copy_count > 0) {
6369 *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_copy_count));
6370 } else {
6371 *snapshot_size = 0;
6372 }
6373
6374 if (size_only) {
6375 return 0;
6376 }
6377
6378 if (input_size < *snapshot_size) {
6379 return EINVAL;
6380 }
6381
6382 *snapshot = memorystatus_jetsam_snapshot_copy;
6383
6384 MEMORYSTATUS_DEBUG(7, "memorystatus_get_jetsam_snapshot_copy: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n",
6385 (long)input_size, (long)*snapshot_size, (long)memorystatus_jetsam_snapshot_copy_count);
6386
6387 return 0;
6388 }
6389
6390 static int
6391 memorystatus_get_on_demand_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only)
6392 {
6393 size_t input_size = *snapshot_size;
6394 uint32_t ods_list_count = memorystatus_list_count;
6395 memorystatus_jetsam_snapshot_t *ods = NULL; /* The on_demand snapshot buffer */
6396
6397 *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (ods_list_count));
6398
6399 if (size_only) {
6400 return 0;
6401 }
6402
6403 /*
6404 * Validate the size of the snapshot buffer.
6405 * This is inherently racey. May want to revisit
6406 * this error condition and trim the output when
6407 * it doesn't fit.
6408 */
6409 if (input_size < *snapshot_size) {
6410 return EINVAL;
6411 }
6412
6413 /*
6414 * Allocate and initialize a snapshot buffer.
6415 */
6416 ods = (memorystatus_jetsam_snapshot_t *)kalloc(*snapshot_size);
6417 if (!ods) {
6418 return ENOMEM;
6419 }
6420
6421 memset(ods, 0, *snapshot_size);
6422
6423 proc_list_lock();
6424 memorystatus_init_jetsam_snapshot_locked(ods, ods_list_count);
6425 proc_list_unlock();
6426
6427 /*
6428 * Return the kernel allocated, on_demand buffer.
6429 * The caller of this routine will copy the data out
6430 * to user space and then free the kernel allocated
6431 * buffer.
6432 */
6433 *snapshot = ods;
6434
6435 MEMORYSTATUS_DEBUG(7, "memorystatus_get_on_demand_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n",
6436 (long)input_size, (long)*snapshot_size, (long)ods_list_count);
6437
6438 return 0;
6439 }
6440
6441 static int
6442 memorystatus_get_jetsam_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only)
6443 {
6444 size_t input_size = *snapshot_size;
6445
6446 if (memorystatus_jetsam_snapshot_count > 0) {
6447 *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count));
6448 } else {
6449 *snapshot_size = 0;
6450 }
6451
6452 if (size_only) {
6453 return 0;
6454 }
6455
6456 if (input_size < *snapshot_size) {
6457 return EINVAL;
6458 }
6459
6460 *snapshot = memorystatus_jetsam_snapshot;
6461
6462 MEMORYSTATUS_DEBUG(7, "memorystatus_get_jetsam_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n",
6463 (long)input_size, (long)*snapshot_size, (long)memorystatus_jetsam_snapshot_count);
6464
6465 return 0;
6466 }
6467
6468
6469 static int
6470 memorystatus_cmd_get_jetsam_snapshot(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval)
6471 {
6472 int error = EINVAL;
6473 boolean_t size_only;
6474 boolean_t is_default_snapshot = FALSE;
6475 boolean_t is_on_demand_snapshot = FALSE;
6476 boolean_t is_at_boot_snapshot = FALSE;
6477 memorystatus_jetsam_snapshot_t *snapshot;
6478
6479 size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE);
6480
6481 if (flags == 0) {
6482 /* Default */
6483 is_default_snapshot = TRUE;
6484 error = memorystatus_get_jetsam_snapshot(&snapshot, &buffer_size, size_only);
6485 } else {
6486 if (flags & ~(MEMORYSTATUS_SNAPSHOT_ON_DEMAND | MEMORYSTATUS_SNAPSHOT_AT_BOOT | MEMORYSTATUS_SNAPSHOT_COPY)) {
6487 /*
6488 * Unsupported bit set in flag.
6489 */
6490 return EINVAL;
6491 }
6492
6493 if (flags & (flags - 0x1)) {
6494 /*
6495 * Can't have multiple flags set at the same time.
6496 */
6497 return EINVAL;
6498 }
6499
6500 if (flags & MEMORYSTATUS_SNAPSHOT_ON_DEMAND) {
6501 is_on_demand_snapshot = TRUE;
6502 /*
6503 * When not requesting the size only, the following call will allocate
6504 * an on_demand snapshot buffer, which is freed below.
6505 */
6506 error = memorystatus_get_on_demand_snapshot(&snapshot, &buffer_size, size_only);
6507 } else if (flags & MEMORYSTATUS_SNAPSHOT_AT_BOOT) {
6508 is_at_boot_snapshot = TRUE;
6509 error = memorystatus_get_at_boot_snapshot(&snapshot, &buffer_size, size_only);
6510 } else if (flags & MEMORYSTATUS_SNAPSHOT_COPY) {
6511 error = memorystatus_get_jetsam_snapshot_copy(&snapshot, &buffer_size, size_only);
6512 } else {
6513 /*
6514 * Invalid flag setting.
6515 */
6516 return EINVAL;
6517 }
6518 }
6519
6520 if (error) {
6521 goto out;
6522 }
6523
6524 /*
6525 * Copy the data out to user space and clear the snapshot buffer.
6526 * If working with the jetsam snapshot,
6527 * clearing the buffer means, reset the count.
6528 * If working with an on_demand snapshot
6529 * clearing the buffer means, free it.
6530 * If working with the at_boot snapshot
6531 * there is nothing to clear or update.
6532 * If working with a copy of the snapshot
6533 * there is nothing to clear or update.
6534 */
6535 if (!size_only) {
6536 if ((error = copyout(snapshot, buffer, buffer_size)) == 0) {
6537 if (is_default_snapshot) {
6538 /*
6539 * The jetsam snapshot is never freed, its count is simply reset.
6540 * However, we make a copy for any parties that might be interested
6541 * in the previous fully populated snapshot.
6542 */
6543 proc_list_lock();
6544 memcpy(memorystatus_jetsam_snapshot_copy, memorystatus_jetsam_snapshot, memorystatus_jetsam_snapshot_size);
6545 memorystatus_jetsam_snapshot_copy_count = memorystatus_jetsam_snapshot_count;
6546 snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
6547 memorystatus_jetsam_snapshot_last_timestamp = 0;
6548 proc_list_unlock();
6549 }
6550 }
6551
6552 if (is_on_demand_snapshot) {
6553 /*
6554 * The on_demand snapshot is always freed,
6555 * even if the copyout failed.
6556 */
6557 if (snapshot) {
6558 kfree(snapshot, buffer_size);
6559 }
6560 }
6561 }
6562
6563 if (error == 0) {
6564 *retval = buffer_size;
6565 }
6566 out:
6567 return error;
6568 }
6569
6570 /*
6571 * Routine: memorystatus_cmd_grp_set_priorities
6572 * Purpose: Update priorities for a group of processes.
6573 *
6574 * [priority]
6575 * Move each process out of its effective priority
6576 * band and into a new priority band.
6577 * Maintains relative order from lowest to highest priority.
6578 * In single band, maintains relative order from head to tail.
6579 *
6580 * eg: before [effectivepriority | pid]
6581 * [18 | p101 ]
6582 * [17 | p55, p67, p19 ]
6583 * [12 | p103 p10 ]
6584 * [ 7 | p25 ]
6585 * [ 0 | p71, p82, ]
6586 *
6587 * after [ new band | pid]
6588 * [ xxx | p71, p82, p25, p103, p10, p55, p67, p19, p101]
6589 *
6590 * Returns: 0 on success, else non-zero.
6591 *
6592 * Caveat: We know there is a race window regarding recycled pids.
6593 * A process could be killed before the kernel can act on it here.
6594 * If a pid cannot be found in any of the jetsam priority bands,
6595 * then we simply ignore it. No harm.
6596 * But, if the pid has been recycled then it could be an issue.
6597 * In that scenario, we might move an unsuspecting process to the new
6598 * priority band. It's not clear how the kernel can safeguard
6599 * against this, but it would be an extremely rare case anyway.
6600 * The caller of this api might avoid such race conditions by
6601 * ensuring that the processes passed in the pid list are suspended.
6602 */
6603
6604
6605 static int
6606 memorystatus_cmd_grp_set_priorities(user_addr_t buffer, size_t buffer_size)
6607 {
6608 /*
6609 * We only handle setting priority
6610 * per process
6611 */
6612
6613 int error = 0;
6614 memorystatus_properties_entry_v1_t *entries = NULL;
6615 uint32_t entry_count = 0;
6616
6617 /* This will be the ordered proc list */
6618 typedef struct memorystatus_internal_properties {
6619 proc_t proc;
6620 int32_t priority;
6621 } memorystatus_internal_properties_t;
6622
6623 memorystatus_internal_properties_t *table = NULL;
6624 size_t table_size = 0;
6625 uint32_t table_count = 0;
6626
6627 uint32_t i = 0;
6628 uint32_t bucket_index = 0;
6629 boolean_t head_insert;
6630 int32_t new_priority;
6631
6632 proc_t p;
6633
6634 /* Verify inputs */
6635 if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
6636 error = EINVAL;
6637 goto out;
6638 }
6639
6640 entry_count = (buffer_size / sizeof(memorystatus_properties_entry_v1_t));
6641 if ((entries = (memorystatus_properties_entry_v1_t *)kalloc(buffer_size)) == NULL) {
6642 error = ENOMEM;
6643 goto out;
6644 }
6645
6646 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_START, MEMORYSTATUS_FLAGS_GRP_SET_PRIORITY, entry_count, 0, 0, 0);
6647
6648 if ((error = copyin(buffer, entries, buffer_size)) != 0) {
6649 goto out;
6650 }
6651
6652 /* Verify sanity of input priorities */
6653 if (entries[0].version == MEMORYSTATUS_MPE_VERSION_1) {
6654 if ((buffer_size % MEMORYSTATUS_MPE_VERSION_1_SIZE) != 0) {
6655 error = EINVAL;
6656 goto out;
6657 }
6658 } else {
6659 error = EINVAL;
6660 goto out;
6661 }
6662
6663 for (i = 0; i < entry_count; i++) {
6664 if (entries[i].priority == -1) {
6665 /* Use as shorthand for default priority */
6666 entries[i].priority = JETSAM_PRIORITY_DEFAULT;
6667 } else if ((entries[i].priority == system_procs_aging_band) || (entries[i].priority == applications_aging_band)) {
6668 /* Both the aging bands are reserved for internal use;
6669 * if requested, adjust to JETSAM_PRIORITY_IDLE. */
6670 entries[i].priority = JETSAM_PRIORITY_IDLE;
6671 } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_HEAD) {
6672 /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle
6673 * queue */
6674 /* Deal with this later */
6675 } else if ((entries[i].priority < 0) || (entries[i].priority >= MEMSTAT_BUCKET_COUNT)) {
6676 /* Sanity check */
6677 error = EINVAL;
6678 goto out;
6679 }
6680 }
6681
6682 table_size = sizeof(memorystatus_internal_properties_t) * entry_count;
6683 if ((table = (memorystatus_internal_properties_t *)kalloc(table_size)) == NULL) {
6684 error = ENOMEM;
6685 goto out;
6686 }
6687 memset(table, 0, table_size);
6688
6689
6690 /*
6691 * For each jetsam bucket entry, spin through the input property list.
6692 * When a matching pid is found, populate an adjacent table with the
6693 * appropriate proc pointer and new property values.
6694 * This traversal automatically preserves order from lowest
6695 * to highest priority.
6696 */
6697
6698 bucket_index = 0;
6699
6700 proc_list_lock();
6701
6702 /* Create the ordered table */
6703 p = memorystatus_get_first_proc_locked(&bucket_index, TRUE);
6704 while (p && (table_count < entry_count)) {
6705 for (i = 0; i < entry_count; i++) {
6706 if (p->p_pid == entries[i].pid) {
6707 /* Build the table data */
6708 table[table_count].proc = p;
6709 table[table_count].priority = entries[i].priority;
6710 table_count++;
6711 break;
6712 }
6713 }
6714 p = memorystatus_get_next_proc_locked(&bucket_index, p, TRUE);
6715 }
6716
6717 /* We now have ordered list of procs ready to move */
6718 for (i = 0; i < table_count; i++) {
6719 p = table[i].proc;
6720 assert(p != NULL);
6721
6722 /* Allow head inserts -- but relative order is now */
6723 if (table[i].priority == JETSAM_PRIORITY_IDLE_HEAD) {
6724 new_priority = JETSAM_PRIORITY_IDLE;
6725 head_insert = true;
6726 } else {
6727 new_priority = table[i].priority;
6728 head_insert = false;
6729 }
6730
6731 /* Not allowed */
6732 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
6733 continue;
6734 }
6735
6736 /*
6737 * Take appropriate steps if moving proc out of
6738 * either of the aging bands.
6739 */
6740 if ((p->p_memstat_effectivepriority == system_procs_aging_band) || (p->p_memstat_effectivepriority == applications_aging_band)) {
6741 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
6742 }
6743
6744 memorystatus_update_priority_locked(p, new_priority, head_insert, false);
6745 }
6746
6747 proc_list_unlock();
6748
6749 /*
6750 * if (table_count != entry_count)
6751 * then some pids were not found in a jetsam band.
6752 * harmless but interesting...
6753 */
6754 out:
6755 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_END, MEMORYSTATUS_FLAGS_GRP_SET_PRIORITY, entry_count, table_count, 0, 0);
6756
6757 if (entries) {
6758 kfree(entries, buffer_size);
6759 }
6760 if (table) {
6761 kfree(table, table_size);
6762 }
6763
6764 return error;
6765 }
6766
6767 memorystatus_internal_probabilities_t *memorystatus_global_probabilities_table = NULL;
6768 size_t memorystatus_global_probabilities_size = 0;
6769
6770 static int
6771 memorystatus_cmd_grp_set_probabilities(user_addr_t buffer, size_t buffer_size)
6772 {
6773 int error = 0;
6774 memorystatus_properties_entry_v1_t *entries = NULL;
6775 uint32_t entry_count = 0, i = 0;
6776 memorystatus_internal_probabilities_t *tmp_table_new = NULL, *tmp_table_old = NULL;
6777 size_t tmp_table_new_size = 0, tmp_table_old_size = 0;
6778
6779 /* Verify inputs */
6780 if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
6781 error = EINVAL;
6782 goto out;
6783 }
6784
6785 entry_count = (buffer_size / sizeof(memorystatus_properties_entry_v1_t));
6786
6787 if ((entries = (memorystatus_properties_entry_v1_t *) kalloc(buffer_size)) == NULL) {
6788 error = ENOMEM;
6789 goto out;
6790 }
6791
6792 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_START, MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY, entry_count, 0, 0, 0);
6793
6794 if ((error = copyin(buffer, entries, buffer_size)) != 0) {
6795 goto out;
6796 }
6797
6798 if (entries[0].version == MEMORYSTATUS_MPE_VERSION_1) {
6799 if ((buffer_size % MEMORYSTATUS_MPE_VERSION_1_SIZE) != 0) {
6800 error = EINVAL;
6801 goto out;
6802 }
6803 } else {
6804 error = EINVAL;
6805 goto out;
6806 }
6807
6808 /* Verify sanity of input priorities */
6809 for (i = 0; i < entry_count; i++) {
6810 /*
6811 * 0 - low probability of use.
6812 * 1 - high probability of use.
6813 *
6814 * Keeping this field an int (& not a bool) to allow
6815 * us to experiment with different values/approaches
6816 * later on.
6817 */
6818 if (entries[i].use_probability > 1) {
6819 error = EINVAL;
6820 goto out;
6821 }
6822 }
6823
6824 tmp_table_new_size = sizeof(memorystatus_internal_probabilities_t) * entry_count;
6825
6826 if ((tmp_table_new = (memorystatus_internal_probabilities_t *) kalloc(tmp_table_new_size)) == NULL) {
6827 error = ENOMEM;
6828 goto out;
6829 }
6830 memset(tmp_table_new, 0, tmp_table_new_size);
6831
6832 proc_list_lock();
6833
6834 if (memorystatus_global_probabilities_table) {
6835 tmp_table_old = memorystatus_global_probabilities_table;
6836 tmp_table_old_size = memorystatus_global_probabilities_size;
6837 }
6838
6839 memorystatus_global_probabilities_table = tmp_table_new;
6840 memorystatus_global_probabilities_size = tmp_table_new_size;
6841 tmp_table_new = NULL;
6842
6843 for (i = 0; i < entry_count; i++) {
6844 /* Build the table data */
6845 strlcpy(memorystatus_global_probabilities_table[i].proc_name, entries[i].proc_name, MAXCOMLEN + 1);
6846 memorystatus_global_probabilities_table[i].use_probability = entries[i].use_probability;
6847 }
6848
6849 proc_list_unlock();
6850
6851 out:
6852 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_END, MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY, entry_count, tmp_table_new_size, 0, 0);
6853
6854 if (entries) {
6855 kfree(entries, buffer_size);
6856 entries = NULL;
6857 }
6858
6859 if (tmp_table_old) {
6860 kfree(tmp_table_old, tmp_table_old_size);
6861 tmp_table_old = NULL;
6862 }
6863
6864 return error;
6865 }
6866
6867 static int
6868 memorystatus_cmd_grp_set_properties(int32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval)
6869 {
6870 int error = 0;
6871
6872 if ((flags & MEMORYSTATUS_FLAGS_GRP_SET_PRIORITY) == MEMORYSTATUS_FLAGS_GRP_SET_PRIORITY) {
6873 error = memorystatus_cmd_grp_set_priorities(buffer, buffer_size);
6874 } else if ((flags & MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY) == MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY) {
6875 error = memorystatus_cmd_grp_set_probabilities(buffer, buffer_size);
6876 } else {
6877 error = EINVAL;
6878 }
6879
6880 return error;
6881 }
6882
6883 /*
6884 * This routine is used to update a process's jetsam priority position and stored user_data.
6885 * It is not used for the setting of memory limits, which is why the last 6 args to the
6886 * memorystatus_update() call are 0 or FALSE.
6887 *
6888 * Flags passed into this call are used to distinguish the motivation behind a jetsam priority
6889 * transition. By default, the kernel updates the process's original requested priority when
6890 * no flag is passed. But when the MEMORYSTATUS_SET_PRIORITY_ASSERTION flag is used, the kernel
6891 * updates the process's assertion driven priority.
6892 *
6893 * The assertion flag was introduced for use by the device's assertion mediator (eg: runningboardd).
6894 * When an assertion is controlling a process's jetsam priority, it may conflict with that process's
6895 * dirty/clean (active/inactive) jetsam state. The kernel attempts to resolve a priority transition
6896 * conflict by reviewing the process state and then choosing the maximum jetsam band at play,
6897 * eg: requested priority versus assertion priority.
6898 */
6899
6900 static int
6901 memorystatus_cmd_set_priority_properties(pid_t pid, uint32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval)
6902 {
6903 int error = 0;
6904 boolean_t is_assertion = FALSE; /* priority is driven by an assertion */
6905 memorystatus_priority_properties_t mpp_entry;
6906
6907 /* Validate inputs */
6908 if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_priority_properties_t))) {
6909 return EINVAL;
6910 }
6911
6912 /* Validate flags */
6913 if (flags == 0) {
6914 /*
6915 * Default. This path updates requestedpriority.
6916 */
6917 } else {
6918 if (flags & ~(MEMORYSTATUS_SET_PRIORITY_ASSERTION)) {
6919 /*
6920 * Unsupported bit set in flag.
6921 */
6922 return EINVAL;
6923 } else if (flags & MEMORYSTATUS_SET_PRIORITY_ASSERTION) {
6924 is_assertion = TRUE;
6925 }
6926 }
6927
6928 error = copyin(buffer, &mpp_entry, buffer_size);
6929
6930 if (error == 0) {
6931 proc_t p;
6932
6933 p = proc_find(pid);
6934 if (!p) {
6935 return ESRCH;
6936 }
6937
6938 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
6939 proc_rele(p);
6940 return EPERM;
6941 }
6942
6943 if (is_assertion) {
6944 os_log(OS_LOG_DEFAULT, "memorystatus: set assertion priority(%d) target %s:%d\n",
6945 mpp_entry.priority, (*p->p_name ? p->p_name : "unknown"), p->p_pid);
6946 }
6947
6948 error = memorystatus_update(p, mpp_entry.priority, mpp_entry.user_data, is_assertion, FALSE, FALSE, 0, 0, FALSE, FALSE);
6949 proc_rele(p);
6950 }
6951
6952 return error;
6953 }
6954
6955 static int
6956 memorystatus_cmd_set_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval)
6957 {
6958 int error = 0;
6959 memorystatus_memlimit_properties_t mmp_entry;
6960
6961 /* Validate inputs */
6962 if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_memlimit_properties_t))) {
6963 return EINVAL;
6964 }
6965
6966 error = copyin(buffer, &mmp_entry, buffer_size);
6967
6968 if (error == 0) {
6969 error = memorystatus_set_memlimit_properties(pid, &mmp_entry);
6970 }
6971
6972 return error;
6973 }
6974
6975 static void
6976 memorystatus_get_memlimit_properties_internal(proc_t p, memorystatus_memlimit_properties_t* p_entry)
6977 {
6978 memset(p_entry, 0, sizeof(memorystatus_memlimit_properties_t));
6979
6980 if (p->p_memstat_memlimit_active > 0) {
6981 p_entry->memlimit_active = p->p_memstat_memlimit_active;
6982 } else {
6983 task_convert_phys_footprint_limit(-1, &p_entry->memlimit_active);
6984 }
6985
6986 if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL) {
6987 p_entry->memlimit_active_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
6988 }
6989
6990 /*
6991 * Get the inactive limit and attributes
6992 */
6993 if (p->p_memstat_memlimit_inactive <= 0) {
6994 task_convert_phys_footprint_limit(-1, &p_entry->memlimit_inactive);
6995 } else {
6996 p_entry->memlimit_inactive = p->p_memstat_memlimit_inactive;
6997 }
6998 if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) {
6999 p_entry->memlimit_inactive_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
7000 }
7001 }
7002
7003 /*
7004 * When getting the memlimit settings, we can't simply call task_get_phys_footprint_limit().
7005 * That gets the proc's cached memlimit and there is no guarantee that the active/inactive
7006 * limits will be the same in the no-limit case. Instead we convert limits <= 0 using
7007 * task_convert_phys_footprint_limit(). It computes the same limit value that would be written
7008 * to the task's ledgers via task_set_phys_footprint_limit().
7009 */
7010 static int
7011 memorystatus_cmd_get_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval)
7012 {
7013 memorystatus_memlimit_properties2_t mmp_entry;
7014
7015 /* Validate inputs */
7016 if ((pid == 0) || (buffer == USER_ADDR_NULL) ||
7017 ((buffer_size != sizeof(memorystatus_memlimit_properties_t)) &&
7018 (buffer_size != sizeof(memorystatus_memlimit_properties2_t)))) {
7019 return EINVAL;
7020 }
7021
7022 memset(&mmp_entry, 0, sizeof(memorystatus_memlimit_properties2_t));
7023
7024 proc_t p = proc_find(pid);
7025 if (!p) {
7026 return ESRCH;
7027 }
7028
7029 /*
7030 * Get the active limit and attributes.
7031 * No locks taken since we hold a reference to the proc.
7032 */
7033
7034 memorystatus_get_memlimit_properties_internal(p, &mmp_entry.v1);
7035
7036 #if CONFIG_JETSAM
7037 #if DEVELOPMENT || DEBUG
7038 /*
7039 * Get the limit increased via SPI
7040 */
7041 mmp_entry.memlimit_increase = roundToNearestMB(p->p_memlimit_increase);
7042 mmp_entry.memlimit_increase_bytes = p->p_memlimit_increase;
7043 #endif /* DEVELOPMENT || DEBUG */
7044 #endif /* CONFIG_JETSAM */
7045
7046 proc_rele(p);
7047
7048 int error = copyout(&mmp_entry, buffer, buffer_size);
7049
7050 return error;
7051 }
7052
7053
7054 /*
7055 * SPI for kbd - pr24956468
7056 * This is a very simple snapshot that calculates how much a
7057 * process's phys_footprint exceeds a specific memory limit.
7058 * Only the inactive memory limit is supported for now.
7059 * The delta is returned as bytes in excess or zero.
7060 */
7061 static int
7062 memorystatus_cmd_get_memlimit_excess_np(pid_t pid, uint32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval)
7063 {
7064 int error = 0;
7065 uint64_t footprint_in_bytes = 0;
7066 uint64_t delta_in_bytes = 0;
7067 int32_t memlimit_mb = 0;
7068 uint64_t memlimit_bytes = 0;
7069
7070 /* Validate inputs */
7071 if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(uint64_t)) || (flags != 0)) {
7072 return EINVAL;
7073 }
7074
7075 proc_t p = proc_find(pid);
7076 if (!p) {
7077 return ESRCH;
7078 }
7079
7080 /*
7081 * Get the inactive limit.
7082 * No locks taken since we hold a reference to the proc.
7083 */
7084
7085 if (p->p_memstat_memlimit_inactive <= 0) {
7086 task_convert_phys_footprint_limit(-1, &memlimit_mb);
7087 } else {
7088 memlimit_mb = p->p_memstat_memlimit_inactive;
7089 }
7090
7091 footprint_in_bytes = get_task_phys_footprint(p->task);
7092
7093 proc_rele(p);
7094
7095 memlimit_bytes = memlimit_mb * 1024 * 1024; /* MB to bytes */
7096
7097 /*
7098 * Computed delta always returns >= 0 bytes
7099 */
7100 if (footprint_in_bytes > memlimit_bytes) {
7101 delta_in_bytes = footprint_in_bytes - memlimit_bytes;
7102 }
7103
7104 error = copyout(&delta_in_bytes, buffer, sizeof(delta_in_bytes));
7105
7106 return error;
7107 }
7108
7109
7110 static int
7111 memorystatus_cmd_get_pressure_status(int32_t *retval)
7112 {
7113 int error;
7114
7115 /* Need privilege for check */
7116 error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0);
7117 if (error) {
7118 return error;
7119 }
7120
7121 /* Inherently racy, so it's not worth taking a lock here */
7122 *retval = (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0;
7123
7124 return error;
7125 }
7126
7127 int
7128 memorystatus_get_pressure_status_kdp()
7129 {
7130 return (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0;
7131 }
7132
7133 /*
7134 * Every process, including a P_MEMSTAT_INTERNAL process (currently only pid 1), is allowed to set a HWM.
7135 *
7136 * This call is inflexible -- it does not distinguish between active/inactive, fatal/non-fatal
7137 * So, with 2-level HWM preserving previous behavior will map as follows.
7138 * - treat the limit passed in as both an active and inactive limit.
7139 * - treat the is_fatal_limit flag as though it applies to both active and inactive limits.
7140 *
7141 * When invoked via MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK
7142 * - the is_fatal_limit is FALSE, meaning the active and inactive limits are non-fatal/soft
7143 * - so mapping is (active/non-fatal, inactive/non-fatal)
7144 *
7145 * When invoked via MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT
7146 * - the is_fatal_limit is TRUE, meaning the process's active and inactive limits are fatal/hard
7147 * - so mapping is (active/fatal, inactive/fatal)
7148 */
7149
7150 #if CONFIG_JETSAM
7151 static int
7152 memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit)
7153 {
7154 int error = 0;
7155 memorystatus_memlimit_properties_t entry;
7156
7157 entry.memlimit_active = high_water_mark;
7158 entry.memlimit_active_attr = 0;
7159 entry.memlimit_inactive = high_water_mark;
7160 entry.memlimit_inactive_attr = 0;
7161
7162 if (is_fatal_limit == TRUE) {
7163 entry.memlimit_active_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
7164 entry.memlimit_inactive_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
7165 }
7166
7167 error = memorystatus_set_memlimit_properties(pid, &entry);
7168 return error;
7169 }
7170 #endif /* CONFIG_JETSAM */
7171
7172 static int
7173 memorystatus_set_memlimit_properties_internal(proc_t p, memorystatus_memlimit_properties_t *p_entry)
7174 {
7175 int error = 0;
7176
7177 LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED);
7178
7179 /*
7180 * Store the active limit variants in the proc.
7181 */
7182 SET_ACTIVE_LIMITS_LOCKED(p, p_entry->memlimit_active, p_entry->memlimit_active_attr);
7183
7184 /*
7185 * Store the inactive limit variants in the proc.
7186 */
7187 SET_INACTIVE_LIMITS_LOCKED(p, p_entry->memlimit_inactive, p_entry->memlimit_inactive_attr);
7188
7189 /*
7190 * Enforce appropriate limit variant by updating the cached values
7191 * and writing the ledger.
7192 * Limit choice is based on process active/inactive state.
7193 */
7194
7195 if (memorystatus_highwater_enabled) {
7196 boolean_t is_fatal;
7197 boolean_t use_active;
7198
7199 if (proc_jetsam_state_is_active_locked(p) == TRUE) {
7200 CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal);
7201 use_active = TRUE;
7202 } else {
7203 CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal);
7204 use_active = FALSE;
7205 }
7206
7207 /* Enforce the limit by writing to the ledgers */
7208 error = (task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, use_active, is_fatal) == 0) ? 0 : EINVAL;
7209
7210 MEMORYSTATUS_DEBUG(3, "memorystatus_set_memlimit_properties: new limit on pid %d (%dMB %s) current priority (%d) dirty_state?=0x%x %s\n",
7211 p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
7212 (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty,
7213 (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
7214 DTRACE_MEMORYSTATUS2(memorystatus_set_memlimit, proc_t, p, int32_t, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1));
7215 }
7216
7217 return error;
7218 }
7219
7220 static int
7221 memorystatus_set_memlimit_properties(pid_t pid, memorystatus_memlimit_properties_t *entry)
7222 {
7223 memorystatus_memlimit_properties_t set_entry;
7224
7225 proc_t p = proc_find(pid);
7226 if (!p) {
7227 return ESRCH;
7228 }
7229
7230 /*
7231 * Check for valid attribute flags.
7232 */
7233 const uint32_t valid_attrs = MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
7234 if ((entry->memlimit_active_attr & (~valid_attrs)) != 0) {
7235 proc_rele(p);
7236 return EINVAL;
7237 }
7238 if ((entry->memlimit_inactive_attr & (~valid_attrs)) != 0) {
7239 proc_rele(p);
7240 return EINVAL;
7241 }
7242
7243 /*
7244 * Setup the active memlimit properties
7245 */
7246 set_entry.memlimit_active = entry->memlimit_active;
7247 set_entry.memlimit_active_attr = entry->memlimit_active_attr & MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
7248
7249 /*
7250 * Setup the inactive memlimit properties
7251 */
7252 set_entry.memlimit_inactive = entry->memlimit_inactive;
7253 set_entry.memlimit_inactive_attr = entry->memlimit_inactive_attr & MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
7254
7255 /*
7256 * Setting a limit of <= 0 implies that the process has no
7257 * high-water-mark and has no per-task-limit. That means
7258 * the system_wide task limit is in place, which by the way,
7259 * is always fatal.
7260 */
7261
7262 if (set_entry.memlimit_active <= 0) {
7263 /*
7264 * Enforce the fatal system_wide task limit while process is active.
7265 */
7266 set_entry.memlimit_active = -1;
7267 set_entry.memlimit_active_attr = MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
7268 }
7269 #if CONFIG_JETSAM
7270 #if DEVELOPMENT || DEBUG
7271 else {
7272 /* add the current increase to it, for roots */
7273 set_entry.memlimit_active += roundToNearestMB(p->p_memlimit_increase);
7274 }
7275 #endif /* DEVELOPMENT || DEBUG */
7276 #endif /* CONFIG_JETSAM */
7277
7278 if (set_entry.memlimit_inactive <= 0) {
7279 /*
7280 * Enforce the fatal system_wide task limit while process is inactive.
7281 */
7282 set_entry.memlimit_inactive = -1;
7283 set_entry.memlimit_inactive_attr = MEMORYSTATUS_MEMLIMIT_ATTR_FATAL;
7284 }
7285 #if CONFIG_JETSAM
7286 #if DEVELOPMENT || DEBUG
7287 else {
7288 /* add the current increase to it, for roots */
7289 set_entry.memlimit_inactive += roundToNearestMB(p->p_memlimit_increase);
7290 }
7291 #endif /* DEVELOPMENT || DEBUG */
7292 #endif /* CONFIG_JETSAM */
7293
7294 proc_list_lock();
7295
7296 int error = memorystatus_set_memlimit_properties_internal(p, &set_entry);
7297
7298 proc_list_unlock();
7299 proc_rele(p);
7300
7301 return error;
7302 }
7303
7304 /*
7305 * Returns the jetsam priority (effective or requested) of the process
7306 * associated with this task.
7307 */
7308 int
7309 proc_get_memstat_priority(proc_t p, boolean_t effective_priority)
7310 {
7311 if (p) {
7312 if (effective_priority) {
7313 return p->p_memstat_effectivepriority;
7314 } else {
7315 return p->p_memstat_requestedpriority;
7316 }
7317 }
7318 return 0;
7319 }
7320
7321 static int
7322 memorystatus_get_process_is_managed(pid_t pid, int *is_managed)
7323 {
7324 proc_t p = NULL;
7325
7326 /* Validate inputs */
7327 if (pid == 0) {
7328 return EINVAL;
7329 }
7330
7331 p = proc_find(pid);
7332 if (!p) {
7333 return ESRCH;
7334 }
7335
7336 proc_list_lock();
7337 *is_managed = ((p->p_memstat_state & P_MEMSTAT_MANAGED) ? 1 : 0);
7338 proc_rele_locked(p);
7339 proc_list_unlock();
7340
7341 return 0;
7342 }
7343
7344 static int
7345 memorystatus_set_process_is_managed(pid_t pid, boolean_t set_managed)
7346 {
7347 proc_t p = NULL;
7348
7349 /* Validate inputs */
7350 if (pid == 0) {
7351 return EINVAL;
7352 }
7353
7354 p = proc_find(pid);
7355 if (!p) {
7356 return ESRCH;
7357 }
7358
7359 proc_list_lock();
7360 if (set_managed == TRUE) {
7361 p->p_memstat_state |= P_MEMSTAT_MANAGED;
7362 /*
7363 * The P_MEMSTAT_MANAGED bit is set by assertiond for Apps.
7364 * Also opt them in to being frozen (they might have started
7365 * off with the P_MEMSTAT_FREEZE_DISABLED bit set.)
7366 */
7367 p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED;
7368 } else {
7369 p->p_memstat_state &= ~P_MEMSTAT_MANAGED;
7370 }
7371 proc_rele_locked(p);
7372 proc_list_unlock();
7373
7374 return 0;
7375 }
7376
7377 int
7378 memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *args, int *ret)
7379 {
7380 int error = EINVAL;
7381 boolean_t skip_auth_check = FALSE;
7382 os_reason_t jetsam_reason = OS_REASON_NULL;
7383
7384 #if !CONFIG_JETSAM
7385 #pragma unused(ret)
7386 #pragma unused(jetsam_reason)
7387 #endif
7388
7389 /* We don't need entitlements if we're setting/ querying the freeze preference for a process. Skip the check below. */
7390 if (args->command == MEMORYSTATUS_CMD_SET_PROCESS_IS_FREEZABLE || args->command == MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE) {
7391 skip_auth_check = TRUE;
7392 }
7393
7394 /* Need to be root or have entitlement. */
7395 if (!kauth_cred_issuser(kauth_cred_get()) && !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT) && !skip_auth_check) {
7396 error = EPERM;
7397 goto out;
7398 }
7399
7400 /*
7401 * Sanity check.
7402 * Do not enforce it for snapshots.
7403 */
7404 if (args->command != MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT) {
7405 if (args->buffersize > MEMORYSTATUS_BUFFERSIZE_MAX) {
7406 error = EINVAL;
7407 goto out;
7408 }
7409 }
7410
7411 switch (args->command) {
7412 case MEMORYSTATUS_CMD_GET_PRIORITY_LIST:
7413 error = memorystatus_cmd_get_priority_list(args->pid, args->buffer, args->buffersize, ret);
7414 break;
7415 case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES:
7416 error = memorystatus_cmd_set_priority_properties(args->pid, args->flags, args->buffer, args->buffersize, ret);
7417 break;
7418 case MEMORYSTATUS_CMD_SET_MEMLIMIT_PROPERTIES:
7419 error = memorystatus_cmd_set_memlimit_properties(args->pid, args->buffer, args->buffersize, ret);
7420 break;
7421 case MEMORYSTATUS_CMD_GET_MEMLIMIT_PROPERTIES:
7422 error = memorystatus_cmd_get_memlimit_properties(args->pid, args->buffer, args->buffersize, ret);
7423 break;
7424 case MEMORYSTATUS_CMD_GET_MEMLIMIT_EXCESS:
7425 error = memorystatus_cmd_get_memlimit_excess_np(args->pid, args->flags, args->buffer, args->buffersize, ret);
7426 break;
7427 case MEMORYSTATUS_CMD_GRP_SET_PROPERTIES:
7428 error = memorystatus_cmd_grp_set_properties((int32_t)args->flags, args->buffer, args->buffersize, ret);
7429 break;
7430 case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT:
7431 error = memorystatus_cmd_get_jetsam_snapshot((int32_t)args->flags, args->buffer, args->buffersize, ret);
7432 break;
7433 case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS:
7434 error = memorystatus_cmd_get_pressure_status(ret);
7435 break;
7436 #if CONFIG_JETSAM
7437 case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK:
7438 /*
7439 * This call does not distinguish between active and inactive limits.
7440 * Default behavior in 2-level HWM world is to set both.
7441 * Non-fatal limit is also assumed for both.
7442 */
7443 error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, FALSE);
7444 break;
7445 case MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT:
7446 /*
7447 * This call does not distinguish between active and inactive limits.
7448 * Default behavior in 2-level HWM world is to set both.
7449 * Fatal limit is also assumed for both.
7450 */
7451 error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, TRUE);
7452 break;
7453 #endif /* CONFIG_JETSAM */
7454 /* Test commands */
7455 #if DEVELOPMENT || DEBUG
7456 case MEMORYSTATUS_CMD_TEST_JETSAM:
7457 jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC);
7458 if (jetsam_reason == OS_REASON_NULL) {
7459 printf("memorystatus_control: failed to allocate jetsam reason\n");
7460 }
7461
7462 error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled, jetsam_reason) ? 0 : EINVAL;
7463 break;
7464 case MEMORYSTATUS_CMD_TEST_JETSAM_SORT:
7465 error = memorystatus_cmd_test_jetsam_sort(args->pid, (int32_t)args->flags);
7466 break;
7467 #if CONFIG_JETSAM
7468 case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS:
7469 error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize);
7470 break;
7471 #endif /* CONFIG_JETSAM */
7472 #else /* DEVELOPMENT || DEBUG */
7473 #pragma unused(jetsam_reason)
7474 #endif /* DEVELOPMENT || DEBUG */
7475 case MEMORYSTATUS_CMD_AGGRESSIVE_JETSAM_LENIENT_MODE_ENABLE:
7476 if (memorystatus_aggressive_jetsam_lenient_allowed == FALSE) {
7477 #if DEVELOPMENT || DEBUG
7478 printf("Enabling Lenient Mode\n");
7479 #endif /* DEVELOPMENT || DEBUG */
7480
7481 memorystatus_aggressive_jetsam_lenient_allowed = TRUE;
7482 memorystatus_aggressive_jetsam_lenient = TRUE;
7483 error = 0;
7484 }
7485 break;
7486 case MEMORYSTATUS_CMD_AGGRESSIVE_JETSAM_LENIENT_MODE_DISABLE:
7487 #if DEVELOPMENT || DEBUG
7488 printf("Disabling Lenient mode\n");
7489 #endif /* DEVELOPMENT || DEBUG */
7490 memorystatus_aggressive_jetsam_lenient_allowed = FALSE;
7491 memorystatus_aggressive_jetsam_lenient = FALSE;
7492 error = 0;
7493 break;
7494 case MEMORYSTATUS_CMD_GET_AGGRESSIVE_JETSAM_LENIENT_MODE:
7495 *ret = (memorystatus_aggressive_jetsam_lenient ? 1 : 0);
7496 error = 0;
7497 break;
7498 case MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE:
7499 case MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE:
7500 error = memorystatus_low_mem_privileged_listener(args->command);
7501 break;
7502
7503 case MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE:
7504 case MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_DISABLE:
7505 error = memorystatus_update_inactive_jetsam_priority_band(args->pid, args->command, JETSAM_PRIORITY_ELEVATED_INACTIVE, args->flags ? TRUE : FALSE);
7506 break;
7507 case MEMORYSTATUS_CMD_SET_PROCESS_IS_MANAGED:
7508 error = memorystatus_set_process_is_managed(args->pid, args->flags);
7509 break;
7510
7511 case MEMORYSTATUS_CMD_GET_PROCESS_IS_MANAGED:
7512 error = memorystatus_get_process_is_managed(args->pid, ret);
7513 break;
7514
7515 #if CONFIG_FREEZE
7516 case MEMORYSTATUS_CMD_SET_PROCESS_IS_FREEZABLE:
7517 error = memorystatus_set_process_is_freezable(args->pid, args->flags ? TRUE : FALSE);
7518 break;
7519
7520 case MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE:
7521 error = memorystatus_get_process_is_freezable(args->pid, ret);
7522 break;
7523
7524 #if DEVELOPMENT || DEBUG
7525 case MEMORYSTATUS_CMD_FREEZER_CONTROL:
7526 error = memorystatus_freezer_control(args->flags, args->buffer, args->buffersize, ret);
7527 break;
7528 #endif /* DEVELOPMENT || DEBUG */
7529 #endif /* CONFIG_FREEZE */
7530
7531 #if CONFIG_JETSAM
7532 #if DEVELOPMENT || DEBUG
7533 case MEMORYSTATUS_CMD_INCREASE_JETSAM_TASK_LIMIT:
7534 error = memorystatus_cmd_increase_jetsam_task_limit(args->pid, args->flags);
7535 break;
7536 #endif /* DEVELOPMENT */
7537 #endif /* CONFIG_JETSAM */
7538
7539 default:
7540 break;
7541 }
7542
7543 out:
7544 return error;
7545 }
7546
7547 /* Coalition support */
7548
7549 /* sorting info for a particular priority bucket */
7550 typedef struct memstat_sort_info {
7551 coalition_t msi_coal;
7552 uint64_t msi_page_count;
7553 pid_t msi_pid;
7554 int msi_ntasks;
7555 } memstat_sort_info_t;
7556
7557 /*
7558 * qsort from smallest page count to largest page count
7559 *
7560 * return < 0 for a < b
7561 * 0 for a == b
7562 * > 0 for a > b
7563 */
7564 static int
7565 memstat_asc_cmp(const void *a, const void *b)
7566 {
7567 const memstat_sort_info_t *msA = (const memstat_sort_info_t *)a;
7568 const memstat_sort_info_t *msB = (const memstat_sort_info_t *)b;
7569
7570 return (int)((uint64_t)msA->msi_page_count - (uint64_t)msB->msi_page_count);
7571 }
7572
7573 /*
7574 * Return the number of pids rearranged during this sort.
7575 */
7576 static int
7577 memorystatus_sort_by_largest_coalition_locked(unsigned int bucket_index, int coal_sort_order)
7578 {
7579 #define MAX_SORT_PIDS 80
7580 #define MAX_COAL_LEADERS 10
7581
7582 unsigned int b = bucket_index;
7583 int nleaders = 0;
7584 int ntasks = 0;
7585 proc_t p = NULL;
7586 coalition_t coal = COALITION_NULL;
7587 int pids_moved = 0;
7588 int total_pids_moved = 0;
7589 int i;
7590
7591 /*
7592 * The system is typically under memory pressure when in this
7593 * path, hence, we want to avoid dynamic memory allocation.
7594 */
7595 memstat_sort_info_t leaders[MAX_COAL_LEADERS];
7596 pid_t pid_list[MAX_SORT_PIDS];
7597
7598 if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
7599 return 0;
7600 }
7601
7602 /*
7603 * Clear the array that holds coalition leader information
7604 */
7605 for (i = 0; i < MAX_COAL_LEADERS; i++) {
7606 leaders[i].msi_coal = COALITION_NULL;
7607 leaders[i].msi_page_count = 0; /* will hold total coalition page count */
7608 leaders[i].msi_pid = 0; /* will hold coalition leader pid */
7609 leaders[i].msi_ntasks = 0; /* will hold the number of tasks in a coalition */
7610 }
7611
7612 p = memorystatus_get_first_proc_locked(&b, FALSE);
7613 while (p) {
7614 coal = task_get_coalition(p->task, COALITION_TYPE_JETSAM);
7615 if (coalition_is_leader(p->task, coal)) {
7616 if (nleaders < MAX_COAL_LEADERS) {
7617 int coal_ntasks = 0;
7618 uint64_t coal_page_count = coalition_get_page_count(coal, &coal_ntasks);
7619 leaders[nleaders].msi_coal = coal;
7620 leaders[nleaders].msi_page_count = coal_page_count;
7621 leaders[nleaders].msi_pid = p->p_pid; /* the coalition leader */
7622 leaders[nleaders].msi_ntasks = coal_ntasks;
7623 nleaders++;
7624 } else {
7625 /*
7626 * We've hit MAX_COAL_LEADERS meaning we can handle no more coalitions.
7627 * Abandoned coalitions will linger at the tail of the priority band
7628 * when this sort session ends.
7629 * TODO: should this be an assert?
7630 */
7631 printf("%s: WARNING: more than %d leaders in priority band [%d]\n",
7632 __FUNCTION__, MAX_COAL_LEADERS, bucket_index);
7633 break;
7634 }
7635 }
7636 p = memorystatus_get_next_proc_locked(&b, p, FALSE);
7637 }
7638
7639 if (nleaders == 0) {
7640 /* Nothing to sort */
7641 return 0;
7642 }
7643
7644 /*
7645 * Sort the coalition leader array, from smallest coalition page count
7646 * to largest coalition page count. When inserted in the priority bucket,
7647 * smallest coalition is handled first, resulting in the last to be jetsammed.
7648 */
7649 if (nleaders > 1) {
7650 qsort(leaders, nleaders, sizeof(memstat_sort_info_t), memstat_asc_cmp);
7651 }
7652
7653 #if 0
7654 for (i = 0; i < nleaders; i++) {
7655 printf("%s: coal_leader[%d of %d] pid[%d] pages[%llu] ntasks[%d]\n",
7656 __FUNCTION__, i, nleaders, leaders[i].msi_pid, leaders[i].msi_page_count,
7657 leaders[i].msi_ntasks);
7658 }
7659 #endif
7660
7661 /*
7662 * During coalition sorting, processes in a priority band are rearranged
7663 * by being re-inserted at the head of the queue. So, when handling a
7664 * list, the first process that gets moved to the head of the queue,
7665 * ultimately gets pushed toward the queue tail, and hence, jetsams last.
7666 *
7667 * So, for example, the coalition leader is expected to jetsam last,
7668 * after its coalition members. Therefore, the coalition leader is
7669 * inserted at the head of the queue first.
7670 *
7671 * After processing a coalition, the jetsam order is as follows:
7672 * undefs(jetsam first), extensions, xpc services, leader(jetsam last)
7673 */
7674
7675 /*
7676 * Coalition members are rearranged in the priority bucket here,
7677 * based on their coalition role.
7678 */
7679 total_pids_moved = 0;
7680 for (i = 0; i < nleaders; i++) {
7681 /* a bit of bookkeeping */
7682 pids_moved = 0;
7683
7684 /* Coalition leaders are jetsammed last, so move into place first */
7685 pid_list[0] = leaders[i].msi_pid;
7686 pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, 1);
7687
7688 /* xpc services should jetsam after extensions */
7689 ntasks = coalition_get_pid_list(leaders[i].msi_coal, COALITION_ROLEMASK_XPC,
7690 coal_sort_order, pid_list, MAX_SORT_PIDS);
7691
7692 if (ntasks > 0) {
7693 pids_moved += memorystatus_move_list_locked(bucket_index, pid_list,
7694 (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS));
7695 }
7696
7697 /* extensions should jetsam after unmarked processes */
7698 ntasks = coalition_get_pid_list(leaders[i].msi_coal, COALITION_ROLEMASK_EXT,
7699 coal_sort_order, pid_list, MAX_SORT_PIDS);
7700
7701 if (ntasks > 0) {
7702 pids_moved += memorystatus_move_list_locked(bucket_index, pid_list,
7703 (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS));
7704 }
7705
7706 /* undefined coalition members should be the first to jetsam */
7707 ntasks = coalition_get_pid_list(leaders[i].msi_coal, COALITION_ROLEMASK_UNDEF,
7708 coal_sort_order, pid_list, MAX_SORT_PIDS);
7709
7710 if (ntasks > 0) {
7711 pids_moved += memorystatus_move_list_locked(bucket_index, pid_list,
7712 (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS));
7713 }
7714
7715 #if 0
7716 if (pids_moved == leaders[i].msi_ntasks) {
7717 /*
7718 * All the pids in the coalition were found in this band.
7719 */
7720 printf("%s: pids_moved[%d] equal total coalition ntasks[%d] \n", __FUNCTION__,
7721 pids_moved, leaders[i].msi_ntasks);
7722 } else if (pids_moved > leaders[i].msi_ntasks) {
7723 /*
7724 * Apparently new coalition members showed up during the sort?
7725 */
7726 printf("%s: pids_moved[%d] were greater than expected coalition ntasks[%d] \n", __FUNCTION__,
7727 pids_moved, leaders[i].msi_ntasks);
7728 } else {
7729 /*
7730 * Apparently not all the pids in the coalition were found in this band?
7731 */
7732 printf("%s: pids_moved[%d] were less than expected coalition ntasks[%d] \n", __FUNCTION__,
7733 pids_moved, leaders[i].msi_ntasks);
7734 }
7735 #endif
7736
7737 total_pids_moved += pids_moved;
7738 } /* end for */
7739
7740 return total_pids_moved;
7741 }
7742
7743
7744 /*
7745 * Traverse a list of pids, searching for each within the priority band provided.
7746 * If pid is found, move it to the front of the priority band.
7747 * Never searches outside the priority band provided.
7748 *
7749 * Input:
7750 * bucket_index - jetsam priority band.
7751 * pid_list - pointer to a list of pids.
7752 * list_sz - number of pids in the list.
7753 *
7754 * Pid list ordering is important in that,
7755 * pid_list[n] is expected to jetsam ahead of pid_list[n+1].
7756 * The sort_order is set by the coalition default.
7757 *
7758 * Return:
7759 * the number of pids found and hence moved within the priority band.
7760 */
7761 static int
7762 memorystatus_move_list_locked(unsigned int bucket_index, pid_t *pid_list, int list_sz)
7763 {
7764 memstat_bucket_t *current_bucket;
7765 int i;
7766 int found_pids = 0;
7767
7768 if ((pid_list == NULL) || (list_sz <= 0)) {
7769 return 0;
7770 }
7771
7772 if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
7773 return 0;
7774 }
7775
7776 current_bucket = &memstat_bucket[bucket_index];
7777 for (i = 0; i < list_sz; i++) {
7778 unsigned int b = bucket_index;
7779 proc_t p = NULL;
7780 proc_t aProc = NULL;
7781 pid_t aPid;
7782 int list_index;
7783
7784 list_index = ((list_sz - 1) - i);
7785 aPid = pid_list[list_index];
7786
7787 /* never search beyond bucket_index provided */
7788 p = memorystatus_get_first_proc_locked(&b, FALSE);
7789 while (p) {
7790 if (p->p_pid == aPid) {
7791 aProc = p;
7792 break;
7793 }
7794 p = memorystatus_get_next_proc_locked(&b, p, FALSE);
7795 }
7796
7797 if (aProc == NULL) {
7798 /* pid not found in this band, just skip it */
7799 continue;
7800 } else {
7801 TAILQ_REMOVE(&current_bucket->list, aProc, p_memstat_list);
7802 TAILQ_INSERT_HEAD(&current_bucket->list, aProc, p_memstat_list);
7803 found_pids++;
7804 }
7805 }
7806 return found_pids;
7807 }
7808
7809 int
7810 memorystatus_get_proccnt_upto_priority(int32_t max_bucket_index)
7811 {
7812 int32_t i = JETSAM_PRIORITY_IDLE;
7813 int count = 0;
7814
7815 if (max_bucket_index >= MEMSTAT_BUCKET_COUNT) {
7816 return -1;
7817 }
7818
7819 while (i <= max_bucket_index) {
7820 count += memstat_bucket[i++].count;
7821 }
7822
7823 return count;
7824 }
7825
7826 int
7827 memorystatus_update_priority_for_appnap(proc_t p, boolean_t is_appnap)
7828 {
7829 #if !CONFIG_JETSAM
7830 if (!p || (!isApp(p)) || (p->p_memstat_state & (P_MEMSTAT_INTERNAL | P_MEMSTAT_MANAGED))) {
7831 /*
7832 * Ineligible processes OR system processes e.g. launchd.
7833 *
7834 * We also skip processes that have the P_MEMSTAT_MANAGED bit set, i.e.
7835 * they're managed by assertiond. These are iOS apps that have been ported
7836 * to macOS. assertiond might be in the process of modifying the app's
7837 * priority / memory limit - so it might have the proc_list lock, and then try
7838 * to take the task lock. Meanwhile we've entered this function with the task lock
7839 * held, and we need the proc_list lock below. So we'll deadlock with assertiond.
7840 *
7841 * It should be fine to read the P_MEMSTAT_MANAGED bit without the proc_list
7842 * lock here, since assertiond only sets this bit on process launch.
7843 */
7844 return -1;
7845 }
7846
7847 /*
7848 * For macOS only:
7849 * We would like to use memorystatus_update() here to move the processes
7850 * within the bands. Unfortunately memorystatus_update() calls
7851 * memorystatus_update_priority_locked() which uses any band transitions
7852 * as an indication to modify ledgers. For that it needs the task lock
7853 * and since we came into this function with the task lock held, we'll deadlock.
7854 *
7855 * Unfortunately we can't completely disable ledger updates because we still
7856 * need the ledger updates for a subset of processes i.e. daemons.
7857 * When all processes on all platforms support memory limits, we can simply call
7858 * memorystatus_update().
7859 *
7860 * It also has some logic to deal with 'aging' which, currently, is only applicable
7861 * on CONFIG_JETSAM configs. So, till every platform has CONFIG_JETSAM we'll need
7862 * to do this explicit band transition.
7863 */
7864
7865 memstat_bucket_t *current_bucket, *new_bucket;
7866 int32_t priority = 0;
7867
7868 proc_list_lock();
7869
7870 if (((p->p_listflag & P_LIST_EXITED) != 0) ||
7871 (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED))) {
7872 /*
7873 * If the process is on its way out OR
7874 * jetsam has alread tried and failed to kill this process,
7875 * let's skip the whole jetsam band transition.
7876 */
7877 proc_list_unlock();
7878 return 0;
7879 }
7880
7881 if (is_appnap) {
7882 current_bucket = &memstat_bucket[p->p_memstat_effectivepriority];
7883 new_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
7884 priority = JETSAM_PRIORITY_IDLE;
7885 } else {
7886 if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) {
7887 /*
7888 * It is possible that someone pulled this process
7889 * out of the IDLE band without updating its app-nap
7890 * parameters.
7891 */
7892 proc_list_unlock();
7893 return 0;
7894 }
7895
7896 current_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
7897 new_bucket = &memstat_bucket[p->p_memstat_requestedpriority];
7898 priority = p->p_memstat_requestedpriority;
7899 }
7900
7901 TAILQ_REMOVE(&current_bucket->list, p, p_memstat_list);
7902 current_bucket->count--;
7903 if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) {
7904 current_bucket->relaunch_high_count--;
7905 }
7906 TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list);
7907 new_bucket->count++;
7908 if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) {
7909 new_bucket->relaunch_high_count++;
7910 }
7911 /*
7912 * Record idle start or idle delta.
7913 */
7914 if (p->p_memstat_effectivepriority == priority) {
7915 /*
7916 * This process is not transitioning between
7917 * jetsam priority buckets. Do nothing.
7918 */
7919 } else if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) {
7920 uint64_t now;
7921 /*
7922 * Transitioning out of the idle priority bucket.
7923 * Record idle delta.
7924 */
7925 assert(p->p_memstat_idle_start != 0);
7926 now = mach_absolute_time();
7927 if (now > p->p_memstat_idle_start) {
7928 p->p_memstat_idle_delta = now - p->p_memstat_idle_start;
7929 }
7930 } else if (priority == JETSAM_PRIORITY_IDLE) {
7931 /*
7932 * Transitioning into the idle priority bucket.
7933 * Record idle start.
7934 */
7935 p->p_memstat_idle_start = mach_absolute_time();
7936 }
7937
7938 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CHANGE_PRIORITY), p->p_pid, priority, p->p_memstat_effectivepriority, 0, 0);
7939
7940 p->p_memstat_effectivepriority = priority;
7941
7942 proc_list_unlock();
7943
7944 return 0;
7945
7946 #else /* !CONFIG_JETSAM */
7947 #pragma unused(p)
7948 #pragma unused(is_appnap)
7949 return -1;
7950 #endif /* !CONFIG_JETSAM */
7951 }
7952
7953 uint64_t
7954 memorystatus_available_memory_internal(proc_t p)
7955 {
7956 #ifdef XNU_TARGET_OS_OSX
7957 #pragma unused(p)
7958 return 0;
7959 #else
7960 const uint64_t footprint_in_bytes = get_task_phys_footprint(p->task);
7961 int32_t memlimit_mb;
7962 int64_t memlimit_bytes;
7963 int64_t rc;
7964
7965 if (isApp(p) == FALSE) {
7966 return 0;
7967 }
7968
7969 if (p->p_memstat_memlimit > 0) {
7970 memlimit_mb = p->p_memstat_memlimit;
7971 } else if (task_convert_phys_footprint_limit(-1, &memlimit_mb) != KERN_SUCCESS) {
7972 return 0;
7973 }
7974
7975 if (memlimit_mb <= 0) {
7976 memlimit_bytes = INT_MAX & ~((1 << 20) - 1);
7977 } else {
7978 memlimit_bytes = ((int64_t) memlimit_mb) << 20;
7979 }
7980
7981 rc = memlimit_bytes - footprint_in_bytes;
7982
7983 return (rc >= 0) ? rc : 0;
7984 #endif
7985 }
7986
7987 int
7988 memorystatus_available_memory(struct proc *p, __unused struct memorystatus_available_memory_args *args, uint64_t *ret)
7989 {
7990 *ret = memorystatus_available_memory_internal(p);
7991
7992 return 0;
7993 }
7994
7995 #if CONFIG_JETSAM
7996 #if DEVELOPMENT || DEBUG
7997 static int
7998 memorystatus_cmd_increase_jetsam_task_limit(pid_t pid, uint32_t byte_increase)
7999 {
8000 memorystatus_memlimit_properties_t mmp_entry;
8001
8002 /* Validate inputs */
8003 if ((pid == 0) || (byte_increase == 0)) {
8004 return EINVAL;
8005 }
8006
8007 proc_t p = proc_find(pid);
8008
8009 if (!p) {
8010 return ESRCH;
8011 }
8012
8013 const uint32_t current_memlimit_increase = roundToNearestMB(p->p_memlimit_increase);
8014 const uint32_t page_aligned_increase = round_page(p->p_memlimit_increase + byte_increase); /* round to page */
8015
8016 proc_list_lock();
8017
8018 memorystatus_get_memlimit_properties_internal(p, &mmp_entry);
8019
8020 if (mmp_entry.memlimit_active > 0) {
8021 mmp_entry.memlimit_active -= current_memlimit_increase;
8022 mmp_entry.memlimit_active += roundToNearestMB(page_aligned_increase);
8023 }
8024
8025 if (mmp_entry.memlimit_inactive > 0) {
8026 mmp_entry.memlimit_inactive -= current_memlimit_increase;
8027 mmp_entry.memlimit_inactive += roundToNearestMB(page_aligned_increase);
8028 }
8029
8030 /*
8031 * Store the updated delta limit in the proc.
8032 */
8033 p->p_memlimit_increase = page_aligned_increase;
8034
8035 int error = memorystatus_set_memlimit_properties_internal(p, &mmp_entry);
8036
8037 proc_list_unlock();
8038 proc_rele(p);
8039
8040 return error;
8041 }
8042 #endif /* DEVELOPMENT */
8043 #endif /* CONFIG_JETSAM */