2 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/lock.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <libkern/libkern.h>
39 #include <mach/mach_time.h>
40 #include <mach/task.h>
41 #include <mach/host_priv.h>
42 #include <mach/mach_host.h>
43 #include <pexpert/pexpert.h>
44 #include <sys/kern_event.h>
46 #include <sys/proc_info.h>
47 #include <sys/signal.h>
48 #include <sys/signalvar.h>
49 #include <sys/sysctl.h>
50 #include <sys/sysproto.h>
54 #include <vm/vm_pageout.h>
55 #include <vm/vm_protos.h>
58 #include <vm/vm_map.h>
59 #endif /* CONFIG_FREEZE */
61 #include <sys/kern_memorystatus.h>
63 /* These are very verbose printfs(), enable with
64 * MEMORYSTATUS_DEBUG_LOG
66 #if MEMORYSTATUS_DEBUG_LOG
67 #define MEMORYSTATUS_DEBUG(cond, format, ...) \
69 if (cond) { printf(format, ##__VA_ARGS__); } \
72 #define MEMORYSTATUS_DEBUG(cond, format, ...)
75 /* General tunables */
77 unsigned long delta_percentage
= 5;
78 unsigned long critical_threshold_percentage
= 5;
79 unsigned long idle_offset_percentage
= 5;
80 unsigned long pressure_threshold_percentage
= 15;
81 unsigned long freeze_threshold_percentage
= 50;
83 /* General memorystatus stuff */
85 struct klist memorystatus_klist
;
86 static lck_mtx_t memorystatus_klist_mutex
;
88 static void memorystatus_klist_lock(void);
89 static void memorystatus_klist_unlock(void);
91 static uint64_t memorystatus_idle_delay_time
= 0;
94 * Memorystatus kevents
97 static int filt_memorystatusattach(struct knote
*kn
);
98 static void filt_memorystatusdetach(struct knote
*kn
);
99 static int filt_memorystatus(struct knote
*kn
, long hint
);
101 struct filterops memorystatus_filtops
= {
102 .f_attach
= filt_memorystatusattach
,
103 .f_detach
= filt_memorystatusdetach
,
104 .f_event
= filt_memorystatus
,
108 kMemorystatusNoPressure
= 1,
109 kMemorystatusPressure
= 2
112 /* Idle guard handling */
114 static int32_t memorystatus_scheduled_idle_demotions
= 0;
116 static thread_call_t memorystatus_idle_demotion_call
;
118 static void memorystatus_perform_idle_demotion(__unused
void *spare1
, __unused
void *spare2
);
119 static void memorystatus_schedule_idle_demotion_locked(proc_t p
, boolean_t set_state
);
120 static void memorystatus_invalidate_idle_demotion_locked(proc_t p
, boolean_t clean_state
);
121 static void memorystatus_reschedule_idle_demotion_locked(void);
123 static void memorystatus_update_priority_locked(proc_t p
, int priority
);
125 int memorystatus_wakeup
= 0;
127 unsigned int memorystatus_level
= 0;
129 static int memorystatus_list_count
= 0;
131 #define MEMSTAT_BUCKET_COUNT (JETSAM_PRIORITY_MAX + 1)
133 typedef struct memstat_bucket
{
134 TAILQ_HEAD(, proc
) list
;
138 memstat_bucket_t memstat_bucket
[MEMSTAT_BUCKET_COUNT
];
140 uint64_t memstat_idle_demotion_deadline
= 0;
142 static unsigned int memorystatus_dirty_count
= 0;
145 static boolean_t kill_idle_exit
= FALSE
;
150 memorystatus_get_level(__unused
struct proc
*p
, struct memorystatus_get_level_args
*args
, __unused
int *ret
)
152 user_addr_t level
= 0;
156 if (copyout(&memorystatus_level
, level
, sizeof(memorystatus_level
)) != 0) {
163 static proc_t
memorystatus_get_first_proc_locked(unsigned int *bucket_index
, boolean_t search
);
164 static proc_t
memorystatus_get_next_proc_locked(unsigned int *bucket_index
, proc_t p
, boolean_t search
);
166 static void memorystatus_thread(void *param __unused
, wait_result_t wr __unused
);
172 /* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */
173 #define LEGACY_HIWATER 1
175 static int memorystatus_highwater_enabled
= 1;
177 extern unsigned int vm_page_free_count
;
178 extern unsigned int vm_page_active_count
;
179 extern unsigned int vm_page_inactive_count
;
180 extern unsigned int vm_page_throttled_count
;
181 extern unsigned int vm_page_purgeable_count
;
182 extern unsigned int vm_page_wire_count
;
184 unsigned int memorystatus_delta
= 0;
186 static unsigned int memorystatus_available_pages
= (unsigned int)-1;
187 static unsigned int memorystatus_available_pages_pressure
= 0;
188 static unsigned int memorystatus_available_pages_critical
= 0;
189 static unsigned int memorystatus_available_pages_critical_base
= 0;
190 static unsigned int memorystatus_last_foreground_pressure_pages
= (unsigned int)-1;
192 static unsigned int memorystatus_available_pages_critical_idle_offset
= 0;
195 #if DEVELOPMENT || DEBUG
196 static unsigned int memorystatus_jetsam_panic_debug
= 0;
198 static unsigned int memorystatus_jetsam_policy
= kPolicyDefault
;
199 static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic
= 0;
202 static boolean_t kill_under_pressure
= FALSE
;
204 static memorystatus_jetsam_snapshot_t
*memorystatus_jetsam_snapshot
;
205 #define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries
207 static unsigned int memorystatus_jetsam_snapshot_count
= 0;
208 static unsigned int memorystatus_jetsam_snapshot_max
= 0;
210 static void memorystatus_clear_errors(void);
211 static void memorystatus_get_task_page_counts(task_t task
, uint32_t *footprint
, uint32_t *max_footprint
);
212 static int memorystatus_send_note(int event_code
, void *data
, size_t data_length
);
213 static uint32_t memorystatus_build_state(proc_t p
);
214 static void memorystatus_update_levels_locked(boolean_t critical_only
);
215 static boolean_t
memorystatus_issue_pressure_kevent(boolean_t pressured
);
217 static boolean_t
memorystatus_kill_specific_process(pid_t victim_pid
, uint32_t cause
);
218 static boolean_t
memorystatus_kill_top_process(boolean_t any
, uint32_t cause
, int32_t *priority
, uint32_t *errors
);
220 static boolean_t
memorystatus_kill_hiwat_proc(uint32_t *errors
);
223 static boolean_t
memorystatus_kill_process_async(pid_t victim_pid
, uint32_t cause
);
224 static boolean_t
memorystatus_kill_process_sync(pid_t victim_pid
, uint32_t cause
);
226 #endif /* CONFIG_JETSAM */
230 #if VM_PRESSURE_EVENTS
232 #include "vm_pressure.h"
234 extern boolean_t
memorystatus_warn_process(pid_t pid
);
236 vm_pressure_level_t memorystatus_vm_pressure_level
= kVMPressureNormal
;
238 #endif /* VM_PRESSURE_EVENTS */
244 boolean_t memorystatus_freeze_enabled
= FALSE
;
245 int memorystatus_freeze_wakeup
= 0;
247 static inline boolean_t
memorystatus_can_freeze_processes(void);
248 static boolean_t
memorystatus_can_freeze(boolean_t
*memorystatus_freeze_swap_low
);
250 static void memorystatus_freeze_thread(void *param __unused
, wait_result_t wr __unused
);
253 static unsigned int memorystatus_freeze_threshold
= 0;
255 static unsigned int memorystatus_freeze_pages_min
= FREEZE_PAGES_MIN
;
256 static unsigned int memorystatus_freeze_pages_max
= FREEZE_PAGES_MAX
;
258 static unsigned int memorystatus_frozen_count
= 0;
260 static unsigned int memorystatus_freeze_suspended_threshold
= FREEZE_SUSPENDED_THRESHOLD_DEFAULT
;
263 static uint64_t memorystatus_freeze_count
= 0;
264 static uint64_t memorystatus_freeze_pageouts
= 0;
267 static throttle_interval_t throttle_intervals
[] = {
268 { 60, 8, 0, 0, { 0, 0 }, FALSE
}, /* 1 hour intermediate interval, 8x burst */
269 { 24 * 60, 1, 0, 0, { 0, 0 }, FALSE
}, /* 24 hour long interval, no burst */
272 static uint64_t memorystatus_freeze_throttle_count
= 0;
274 static unsigned int memorystatus_suspended_count
= 0;
275 static unsigned int memorystatus_suspended_footprint_total
= 0;
277 #endif /* CONFIG_FREEZE */
281 #if DEVELOPMENT || DEBUG
285 /* Debug aid to aid determination of limit */
288 sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS
290 #pragma unused(oidp, arg2)
293 int error
, enable
= 0;
296 error
= SYSCTL_OUT(req
, arg1
, sizeof(int));
297 if (error
|| !req
->newptr
) {
301 error
= SYSCTL_IN(req
, &enable
, sizeof(int));
302 if (error
|| !req
->newptr
) {
306 if (!(enable
== 0 || enable
== 1)) {
312 p
= memorystatus_get_first_proc_locked(&b
, TRUE
);
315 if ((p
->p_memstat_state
& P_MEMSTAT_MEMLIMIT_BACKGROUND
) && (p
->p_memstat_effectivepriority
>= JETSAM_PRIORITY_FOREGROUND
)) {
318 memlimit
= p
->p_memstat_memlimit
;
323 task_set_phys_footprint_limit_internal(p
->task
, (memlimit
> 0) ? memlimit
: -1, NULL
, TRUE
);
325 p
= memorystatus_get_next_proc_locked(&b
, p
, TRUE
);
328 memorystatus_highwater_enabled
= enable
;
335 SYSCTL_PROC(_kern
, OID_AUTO
, memorystatus_highwater_enabled
, CTLTYPE_INT
|CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_highwater_enabled
, 0, sysctl_memorystatus_highwater_enable
, "I", "");
337 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_available_pages
, CTLFLAG_RD
|CTLFLAG_LOCKED
, &memorystatus_available_pages
, 0, "");
338 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_available_pages_critical
, CTLFLAG_RD
|CTLFLAG_LOCKED
, &memorystatus_available_pages_critical
, 0, "");
339 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_available_pages_critical_base
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_available_pages_critical_base
, 0, "");
341 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_available_pages_critical_idle_offset
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_available_pages_critical_idle_offset
, 0, "");
344 /* Diagnostic code */
347 kJetsamDiagnosticModeNone
= 0,
348 kJetsamDiagnosticModeAll
= 1,
349 kJetsamDiagnosticModeStopAtFirstActive
= 2,
350 kJetsamDiagnosticModeCount
351 } jetsam_diagnostic_mode
= kJetsamDiagnosticModeNone
;
353 static int jetsam_diagnostic_suspended_one_active_proc
= 0;
356 sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS
358 #pragma unused(arg1, arg2)
360 const char *diagnosticStrings
[] = {
361 "jetsam: diagnostic mode: resetting critical level.",
362 "jetsam: diagnostic mode: will examine all processes",
363 "jetsam: diagnostic mode: will stop at first active process"
366 int error
, val
= jetsam_diagnostic_mode
;
367 boolean_t changed
= FALSE
;
369 error
= sysctl_handle_int(oidp
, &val
, 0, req
);
370 if (error
|| !req
->newptr
)
372 if ((val
< 0) || (val
>= kJetsamDiagnosticModeCount
)) {
373 printf("jetsam: diagnostic mode: invalid value - %d\n", val
);
379 if ((unsigned int) val
!= jetsam_diagnostic_mode
) {
380 jetsam_diagnostic_mode
= val
;
382 memorystatus_jetsam_policy
&= ~kPolicyDiagnoseActive
;
384 switch (jetsam_diagnostic_mode
) {
385 case kJetsamDiagnosticModeNone
:
386 /* Already cleared */
388 case kJetsamDiagnosticModeAll
:
389 memorystatus_jetsam_policy
|= kPolicyDiagnoseAll
;
391 case kJetsamDiagnosticModeStopAtFirstActive
:
392 memorystatus_jetsam_policy
|= kPolicyDiagnoseFirst
;
395 /* Already validated */
399 memorystatus_update_levels_locked(FALSE
);
406 printf("%s\n", diagnosticStrings
[val
]);
412 SYSCTL_PROC(_debug
, OID_AUTO
, jetsam_diagnostic_mode
, CTLTYPE_INT
|CTLFLAG_RW
|CTLFLAG_LOCKED
|CTLFLAG_ANYBODY
,
413 &jetsam_diagnostic_mode
, 0, sysctl_jetsam_diagnostic_mode
, "I", "Jetsam Diagnostic Mode");
415 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_jetsam_policy_offset_pages_diagnostic
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_jetsam_policy_offset_pages_diagnostic
, 0, "");
417 #if VM_PRESSURE_EVENTS
419 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_available_pages_pressure
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_available_pages_pressure
, 0, "");
422 sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS
424 #pragma unused(arg1, arg2, oidp)
427 error
= priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE
, 0);
431 return SYSCTL_OUT(req
, &memorystatus_vm_pressure_level
, sizeof(memorystatus_vm_pressure_level
));
434 SYSCTL_PROC(_kern
, OID_AUTO
, memorystatus_vm_pressure_level
, CTLTYPE_INT
|CTLFLAG_RD
|CTLFLAG_LOCKED
|CTLFLAG_MASKED
,
435 0, 0, &sysctl_memorystatus_vm_pressure_level
, "I", "");
438 sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS
440 #pragma unused(arg1, arg2)
444 error
= sysctl_handle_int(oidp
, &pid
, 0, req
);
445 if (error
|| !req
->newptr
)
448 return vm_dispatch_pressure_note_to_pid(pid
, FALSE
);
451 SYSCTL_PROC(_kern
, OID_AUTO
, memorystatus_vm_pressure_send
, CTLTYPE_INT
|CTLFLAG_WR
|CTLFLAG_LOCKED
|CTLFLAG_MASKED
,
452 0, 0, &sysctl_memorystatus_vm_pressure_send
, "I", "");
454 #endif /* VM_PRESSURE_EVENTS */
456 #endif /* CONFIG_JETSAM */
458 #endif /* DEVELOPMENT || DEBUG */
462 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_freeze_threshold
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_freeze_threshold
, 0, "");
464 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_freeze_pages_min
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_freeze_pages_min
, 0, "");
465 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_freeze_pages_max
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_freeze_pages_max
, 0, "");
467 SYSCTL_QUAD(_kern
, OID_AUTO
, memorystatus_freeze_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
, &memorystatus_freeze_count
, "");
468 SYSCTL_QUAD(_kern
, OID_AUTO
, memorystatus_freeze_pageouts
, CTLFLAG_RD
|CTLFLAG_LOCKED
, &memorystatus_freeze_pageouts
, "");
469 SYSCTL_QUAD(_kern
, OID_AUTO
, memorystatus_freeze_throttle_count
, CTLFLAG_RD
|CTLFLAG_LOCKED
, &memorystatus_freeze_throttle_count
, "");
470 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_freeze_min_processes
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_freeze_suspended_threshold
, 0, "");
472 boolean_t memorystatus_freeze_throttle_enabled
= TRUE
;
473 SYSCTL_UINT(_kern
, OID_AUTO
, memorystatus_freeze_throttle_enabled
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_freeze_throttle_enabled
, 0, "");
476 * Enabled via: <rdar://problem/13248767> Enable the sysctl_memorystatus_freeze/thaw sysctls on Release KC
478 * TODO: Manual trigger of freeze and thaw for dev / debug kernels only.
479 * <rdar://problem/13248795> Disable/restrict the sysctl_memorystatus_freeze/thaw sysctls on Release KC
482 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
484 #pragma unused(arg1, arg2)
489 error
= sysctl_handle_int(oidp
, &pid
, 0, req
);
490 if (error
|| !req
->newptr
)
495 uint32_t purgeable
, wired
, clean
, dirty
;
497 uint32_t max_pages
= 0;
499 if (DEFAULT_FREEZER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
500 max_pages
= MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max
);
502 max_pages
= UINT32_MAX
- 1;
504 error
= task_freeze(p
->task
, &purgeable
, &wired
, &clean
, &dirty
, max_pages
, &shared
, FALSE
);
514 SYSCTL_PROC(_kern
, OID_AUTO
, memorystatus_freeze
, CTLTYPE_INT
|CTLFLAG_WR
|CTLFLAG_LOCKED
|CTLFLAG_MASKED
,
515 0, 0, &sysctl_memorystatus_freeze
, "I", "");
518 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
520 #pragma unused(arg1, arg2)
525 error
= sysctl_handle_int(oidp
, &pid
, 0, req
);
526 if (error
|| !req
->newptr
)
531 error
= task_thaw(p
->task
);
542 SYSCTL_PROC(_kern
, OID_AUTO
, memorystatus_thaw
, CTLTYPE_INT
|CTLFLAG_WR
|CTLFLAG_LOCKED
|CTLFLAG_MASKED
,
543 0, 0, &sysctl_memorystatus_available_pages_thaw
, "I", "");
545 #endif /* CONFIG_FREEZE */
547 extern kern_return_t
kernel_thread_start_priority(thread_continue_t continuation
,
550 thread_t
*new_thread
);
552 static proc_t
memorystatus_get_first_proc_locked(unsigned int *bucket_index
, boolean_t search
) {
553 memstat_bucket_t
*current_bucket
;
556 if ((*bucket_index
) >= MEMSTAT_BUCKET_COUNT
) {
560 current_bucket
= &memstat_bucket
[*bucket_index
];
561 next_p
= TAILQ_FIRST(¤t_bucket
->list
);
562 if (!next_p
&& search
) {
563 while (!next_p
&& (++(*bucket_index
) < MEMSTAT_BUCKET_COUNT
)) {
564 current_bucket
= &memstat_bucket
[*bucket_index
];
565 next_p
= TAILQ_FIRST(¤t_bucket
->list
);
572 static proc_t
memorystatus_get_next_proc_locked(unsigned int *bucket_index
, proc_t p
, boolean_t search
) {
573 memstat_bucket_t
*current_bucket
;
576 if (!p
|| ((*bucket_index
) >= MEMSTAT_BUCKET_COUNT
)) {
580 next_p
= TAILQ_NEXT(p
, p_memstat_list
);
581 while (!next_p
&& search
&& (++(*bucket_index
) < MEMSTAT_BUCKET_COUNT
)) {
582 current_bucket
= &memstat_bucket
[*bucket_index
];
583 next_p
= TAILQ_FIRST(¤t_bucket
->list
);
589 __private_extern__
void
590 memorystatus_init(void)
592 thread_t thread
= THREAD_NULL
;
593 kern_return_t result
;
596 nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS
* NSEC_PER_SEC
, &memorystatus_idle_delay_time
);
599 for (i
= 0; i
< MEMSTAT_BUCKET_COUNT
; i
++) {
600 TAILQ_INIT(&memstat_bucket
[i
].list
);
601 memstat_bucket
[i
].count
= 0;
604 memorystatus_idle_demotion_call
= thread_call_allocate((thread_call_func_t
)memorystatus_perform_idle_demotion
, NULL
);
606 /* Apply overrides */
607 PE_get_default("kern.jetsam_delta", &delta_percentage
, sizeof(delta_percentage
));
608 assert(delta_percentage
< 100);
609 PE_get_default("kern.jetsam_critical_threshold", &critical_threshold_percentage
, sizeof(critical_threshold_percentage
));
610 assert(critical_threshold_percentage
< 100);
611 PE_get_default("kern.jetsam_idle_offset", &idle_offset_percentage
, sizeof(idle_offset_percentage
));
612 assert(idle_offset_percentage
< 100);
613 PE_get_default("kern.jetsam_pressure_threshold", &pressure_threshold_percentage
, sizeof(pressure_threshold_percentage
));
614 assert(pressure_threshold_percentage
< 100);
615 PE_get_default("kern.jetsam_freeze_threshold", &freeze_threshold_percentage
, sizeof(freeze_threshold_percentage
));
616 assert(freeze_threshold_percentage
< 100);
619 memorystatus_delta
= delta_percentage
* atop_64(max_mem
) / 100;
621 memorystatus_available_pages_critical_idle_offset
= idle_offset_percentage
* atop_64(max_mem
) / 100;
624 memorystatus_available_pages_critical_base
= (critical_threshold_percentage
/ delta_percentage
) * memorystatus_delta
;
626 memorystatus_jetsam_snapshot_max
= maxproc
;
627 memorystatus_jetsam_snapshot
=
628 (memorystatus_jetsam_snapshot_t
*)kalloc(sizeof(memorystatus_jetsam_snapshot_t
) +
629 sizeof(memorystatus_jetsam_snapshot_entry_t
) * memorystatus_jetsam_snapshot_max
);
630 if (!memorystatus_jetsam_snapshot
) {
631 panic("Could not allocate memorystatus_jetsam_snapshot");
634 /* No contention at this point */
635 memorystatus_update_levels_locked(FALSE
);
639 memorystatus_freeze_threshold
= (freeze_threshold_percentage
/ delta_percentage
) * memorystatus_delta
;
642 result
= kernel_thread_start_priority(memorystatus_thread
, NULL
, 95 /* MAXPRI_KERNEL */, &thread
);
643 if (result
== KERN_SUCCESS
) {
644 thread_deallocate(thread
);
646 panic("Could not create memorystatus_thread");
650 /* Centralised for the purposes of allowing panic-on-jetsam */
652 vm_wake_compactor_swapper(void);
655 memorystatus_do_kill(proc_t p
, uint32_t cause
) {
659 #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
660 if (memorystatus_jetsam_panic_debug
& (1 << cause
)) {
661 panic("memorystatus_do_kill(): jetsam debug panic (cause: %d)", cause
);
664 #pragma unused(cause)
666 int jetsam_flags
= P_LTERM_JETSAM
;
668 case kMemorystatusKilledHiwat
: jetsam_flags
|= P_JETSAM_HIWAT
; break;
669 case kMemorystatusKilledVnodes
: jetsam_flags
|= P_JETSAM_VNODE
; break;
670 case kMemorystatusKilledVMPageShortage
: jetsam_flags
|= P_JETSAM_VMPAGESHORTAGE
; break;
671 case kMemorystatusKilledVMThrashing
: jetsam_flags
|= P_JETSAM_VMTHRASHING
; break;
672 case kMemorystatusKilledPerProcessLimit
: jetsam_flags
|= P_JETSAM_PID
; break;
673 case kMemorystatusKilledIdleExit
: jetsam_flags
|= P_JETSAM_IDLEEXIT
; break;
675 retval
= exit1_internal(p
, W_EXITCODE(0, SIGKILL
), (int *)NULL
, FALSE
, FALSE
, jetsam_flags
);
677 if (COMPRESSED_PAGER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
678 vm_wake_compactor_swapper();
681 return (retval
== 0);
689 memorystatus_check_levels_locked(void) {
692 memorystatus_update_levels_locked(TRUE
);
697 memorystatus_perform_idle_demotion(__unused
void *spare1
, __unused
void *spare2
)
700 uint64_t current_time
;
701 memstat_bucket_t
*demotion_bucket
;
703 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion()\n");
705 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_IDLE_DEMOTE
) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
707 current_time
= mach_absolute_time();
711 demotion_bucket
= &memstat_bucket
[JETSAM_PRIORITY_IDLE_DEFERRED
];
712 p
= TAILQ_FIRST(&demotion_bucket
->list
);
715 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion() found %d\n", p
->p_pid
);
717 assert(p
->p_memstat_idledeadline
);
718 assert(p
->p_memstat_dirty
& P_DIRTY_DEFER_IN_PROGRESS
);
719 assert((p
->p_memstat_dirty
& (P_DIRTY_IDLE_EXIT_ENABLED
|P_DIRTY_IS_DIRTY
)) == P_DIRTY_IDLE_EXIT_ENABLED
);
721 if (current_time
>= p
->p_memstat_idledeadline
) {
722 #if DEBUG || DEVELOPMENT
723 if (!(p
->p_memstat_dirty
& P_DIRTY_MARKED
)) {
724 printf("memorystatus_perform_idle_demotion: moving process %d to idle band, but never dirtied (0x%x)!\n", p
->p_pid
, p
->p_memstat_dirty
);
727 memorystatus_invalidate_idle_demotion_locked(p
, TRUE
);
728 memorystatus_update_priority_locked(p
, JETSAM_PRIORITY_IDLE
);
730 // The prior process has moved out of the demotion bucket, so grab the new head and continue
731 p
= TAILQ_FIRST(&demotion_bucket
->list
);
735 // No further candidates
739 memorystatus_reschedule_idle_demotion_locked();
743 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_IDLE_DEMOTE
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
747 memorystatus_schedule_idle_demotion_locked(proc_t p
, boolean_t set_state
)
749 MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for process %d (dirty:0x%x, set_state %d, demotions %d).\n",
750 p
->p_pid
, p
->p_memstat_dirty
, set_state
, memorystatus_scheduled_idle_demotions
);
752 assert((p
->p_memstat_dirty
& (P_DIRTY_IDLE_EXIT_ENABLED
|P_DIRTY_DEFER_IN_PROGRESS
)) == (P_DIRTY_IDLE_EXIT_ENABLED
|P_DIRTY_DEFER_IN_PROGRESS
));
755 assert(p
->p_memstat_idledeadline
== 0);
756 p
->p_memstat_idledeadline
= mach_absolute_time() + memorystatus_idle_delay_time
;
759 assert(p
->p_memstat_idledeadline
);
761 memorystatus_scheduled_idle_demotions
++;
765 memorystatus_invalidate_idle_demotion_locked(proc_t p
, boolean_t clear_state
)
767 MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for process %d (clear_state %d, demotions %d).\n",
768 p
->p_pid
, clear_state
, memorystatus_scheduled_idle_demotions
);
770 assert(p
->p_memstat_idledeadline
);
773 p
->p_memstat_idledeadline
= 0;
774 p
->p_memstat_dirty
&= ~P_DIRTY_DEFER_IN_PROGRESS
;
777 memorystatus_scheduled_idle_demotions
--;
778 assert(memorystatus_scheduled_idle_demotions
>= 0);
782 memorystatus_reschedule_idle_demotion_locked(void) {
783 if (0 == memorystatus_scheduled_idle_demotions
) {
784 if (memstat_idle_demotion_deadline
) {
785 /* Transitioned 1->0, so cancel next call */
786 thread_call_cancel(memorystatus_idle_demotion_call
);
787 memstat_idle_demotion_deadline
= 0;
790 memstat_bucket_t
*demotion_bucket
;
792 demotion_bucket
= &memstat_bucket
[JETSAM_PRIORITY_IDLE_DEFERRED
];
793 p
= TAILQ_FIRST(&demotion_bucket
->list
);
794 assert(p
&& p
->p_memstat_idledeadline
);
796 if (memstat_idle_demotion_deadline
!= p
->p_memstat_idledeadline
){
797 thread_call_enter_delayed(memorystatus_idle_demotion_call
, p
->p_memstat_idledeadline
);
798 memstat_idle_demotion_deadline
= p
->p_memstat_idledeadline
;
808 memorystatus_add(proc_t p
, boolean_t locked
)
810 memstat_bucket_t
*bucket
;
812 MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p
->pid
, priority
);
818 /* Processes marked internal do not have priority tracked */
819 if (p
->p_memstat_state
& P_MEMSTAT_INTERNAL
) {
823 bucket
= &memstat_bucket
[p
->p_memstat_effectivepriority
];
825 TAILQ_INSERT_TAIL(&bucket
->list
, p
, p_memstat_list
);
828 memorystatus_list_count
++;
830 memorystatus_check_levels_locked();
841 memorystatus_update_priority_locked(proc_t p
, int priority
)
843 memstat_bucket_t
*old_bucket
, *new_bucket
;
845 assert(priority
< MEMSTAT_BUCKET_COUNT
);
847 /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */
848 if ((p
->p_listflag
& P_LIST_EXITED
) != 0) {
852 MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d\n", p
->p_pid
, priority
);
854 old_bucket
= &memstat_bucket
[p
->p_memstat_effectivepriority
];
855 TAILQ_REMOVE(&old_bucket
->list
, p
, p_memstat_list
);
858 new_bucket
= &memstat_bucket
[priority
];
859 TAILQ_INSERT_TAIL(&new_bucket
->list
, p
, p_memstat_list
);
863 if (memorystatus_highwater_enabled
&& (p
->p_memstat_state
& P_MEMSTAT_MEMLIMIT_BACKGROUND
)) {
864 if (((priority
>= JETSAM_PRIORITY_FOREGROUND
) && (p
->p_memstat_effectivepriority
< JETSAM_PRIORITY_FOREGROUND
)) ||
865 ((priority
< JETSAM_PRIORITY_FOREGROUND
) && (p
->p_memstat_effectivepriority
>= JETSAM_PRIORITY_FOREGROUND
))) {
866 int32_t memlimit
= (priority
>= JETSAM_PRIORITY_FOREGROUND
) ? -1 : p
->p_memstat_memlimit
;
867 task_set_phys_footprint_limit_internal(p
->task
, (memlimit
> 0) ? memlimit
: -1, NULL
, TRUE
);
872 p
->p_memstat_effectivepriority
= priority
;
874 memorystatus_check_levels_locked();
878 memorystatus_update(proc_t p
, int priority
, uint64_t user_data
, boolean_t effective
, boolean_t update_memlimit
, int32_t memlimit
, boolean_t memlimit_background
)
883 #pragma unused(update_memlimit, memlimit, memlimit_background)
886 MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing process %d: priority %d, user_data 0x%llx\n", p
->p_pid
, priority
, user_data
);
888 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_UPDATE
) | DBG_FUNC_START
, p
->p_pid
, priority
, user_data
, effective
, 0);
890 if (priority
== -1) {
891 /* Use as shorthand for default priority */
892 priority
= JETSAM_PRIORITY_DEFAULT
;
893 } else if (priority
== JETSAM_PRIORITY_IDLE_DEFERRED
) {
894 /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */
895 priority
= JETSAM_PRIORITY_IDLE
;
896 } else if ((priority
< 0) || (priority
>= MEMSTAT_BUCKET_COUNT
)) {
904 assert(!(p
->p_memstat_state
& P_MEMSTAT_INTERNAL
));
906 if (effective
&& (p
->p_memstat_state
& P_MEMSTAT_PRIORITYUPDATED
)) {
909 MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", pid
);
913 p
->p_memstat_state
|= P_MEMSTAT_PRIORITYUPDATED
;
914 p
->p_memstat_userdata
= user_data
;
915 p
->p_memstat_requestedpriority
= priority
;
918 if (update_memlimit
) {
919 p
->p_memstat_memlimit
= memlimit
;
920 if (memlimit_background
) {
921 /* Will be set as priority is updated */
922 p
->p_memstat_state
|= P_MEMSTAT_MEMLIMIT_BACKGROUND
;
924 /* Otherwise, apply now */
925 if (memorystatus_highwater_enabled
) {
926 task_set_phys_footprint_limit_internal(p
->task
, (memlimit
> 0) ? memlimit
: -1, NULL
, TRUE
);
932 memorystatus_update_priority_locked(p
, priority
);
938 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_UPDATE
) | DBG_FUNC_END
, ret
, 0, 0, 0, 0);
944 memorystatus_remove(proc_t p
, boolean_t locked
)
947 memstat_bucket_t
*bucket
;
949 MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", pid
);
955 assert(!(p
->p_memstat_state
& P_MEMSTAT_INTERNAL
));
957 bucket
= &memstat_bucket
[p
->p_memstat_effectivepriority
];
958 TAILQ_REMOVE(&bucket
->list
, p
, p_memstat_list
);
961 memorystatus_list_count
--;
963 /* If awaiting demotion to the idle band, clean up */
964 if (p
->p_memstat_effectivepriority
== JETSAM_PRIORITY_IDLE_DEFERRED
) {
965 memorystatus_invalidate_idle_demotion_locked(p
, TRUE
);
966 memorystatus_reschedule_idle_demotion_locked();
969 memorystatus_check_levels_locked();
972 if (p
->p_memstat_state
& (P_MEMSTAT_FROZEN
)) {
973 memorystatus_frozen_count
--;
976 if (p
->p_memstat_state
& P_MEMSTAT_SUSPENDED
) {
977 memorystatus_suspended_footprint_total
-= p
->p_memstat_suspendedfootprint
;
978 memorystatus_suspended_count
--;
996 memorystatus_validate_track_flags(struct proc
*target_p
, uint32_t pcontrol
) {
997 /* See that the process isn't marked for termination */
998 if (target_p
->p_memstat_dirty
& P_DIRTY_TERMINATED
) {
1002 /* Idle exit requires that process be tracked */
1003 if ((pcontrol
& PROC_DIRTY_ALLOW_IDLE_EXIT
) &&
1004 !(pcontrol
& PROC_DIRTY_TRACK
)) {
1008 /* Deferral is only relevant if idle exit is specified */
1009 if ((pcontrol
& PROC_DIRTY_DEFER
) &&
1010 !(pcontrol
& PROC_DIRTY_ALLOWS_IDLE_EXIT
)) {
1018 memorystatus_update_idle_priority_locked(proc_t p
) {
1021 MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p
->p_pid
, p
->p_memstat_dirty
);
1023 if ((p
->p_memstat_dirty
& (P_DIRTY_IDLE_EXIT_ENABLED
|P_DIRTY_IS_DIRTY
)) == P_DIRTY_IDLE_EXIT_ENABLED
) {
1024 priority
= (p
->p_memstat_dirty
& P_DIRTY_DEFER_IN_PROGRESS
) ? JETSAM_PRIORITY_IDLE_DEFERRED
: JETSAM_PRIORITY_IDLE
;
1026 priority
= p
->p_memstat_requestedpriority
;
1029 memorystatus_update_priority_locked(p
, priority
);
1033 * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle
1034 * (clean). They may also indicate that they support termination when idle, with the result that they are promoted
1035 * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low
1036 * priority idle band when clean (and killed earlier, protecting higher priority procesess).
1038 * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by
1039 * memorystatus_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band
1040 * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to
1041 * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle
1042 * band. The deferral can be cleared early by clearing the appropriate flag.
1044 * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process
1045 * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be
1046 * re-enabled or the guard state cleared, depending on whether the guard deadline has passed.
1050 memorystatus_dirty_track(proc_t p
, uint32_t pcontrol
) {
1051 unsigned int old_dirty
;
1052 boolean_t reschedule
= FALSE
;
1057 if (p
->p_memstat_state
& P_MEMSTAT_INTERNAL
) {
1062 if (!memorystatus_validate_track_flags(p
, pcontrol
)) {
1067 old_dirty
= p
->p_memstat_dirty
;
1069 /* These bits are cumulative, as per <rdar://problem/11159924> */
1070 if (pcontrol
& PROC_DIRTY_TRACK
) {
1071 p
->p_memstat_dirty
|= P_DIRTY_TRACK
;
1074 if (pcontrol
& PROC_DIRTY_ALLOW_IDLE_EXIT
) {
1075 p
->p_memstat_dirty
|= P_DIRTY_ALLOW_IDLE_EXIT
;
1078 /* This can be set and cleared exactly once. */
1079 if ((pcontrol
& PROC_DIRTY_DEFER
) && !(old_dirty
& P_DIRTY_DEFER
)) {
1080 p
->p_memstat_dirty
|= (P_DIRTY_DEFER
|P_DIRTY_DEFER_IN_PROGRESS
);
1082 p
->p_memstat_dirty
&= ~P_DIRTY_DEFER_IN_PROGRESS
;
1085 MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / deferred %s / dirty %s for process %d\n",
1086 ((p
->p_memstat_dirty
& P_DIRTY_IDLE_EXIT_ENABLED
) == P_DIRTY_IDLE_EXIT_ENABLED
) ? "Y" : "N",
1087 p
->p_memstat_dirty
& P_DIRTY_DEFER_IN_PROGRESS
? "Y" : "N",
1088 p
->p_memstat_dirty
& P_DIRTY
? "Y" : "N",
1091 /* Kick off or invalidate the idle exit deferment if there's a state transition. */
1092 if (!(p
->p_memstat_dirty
& P_DIRTY_IS_DIRTY
)) {
1093 if (((p
->p_memstat_dirty
& P_DIRTY_IDLE_EXIT_ENABLED
) == P_DIRTY_IDLE_EXIT_ENABLED
) &&
1094 (p
->p_memstat_dirty
& P_DIRTY_DEFER_IN_PROGRESS
) && !(old_dirty
& P_DIRTY_DEFER_IN_PROGRESS
)) {
1095 memorystatus_schedule_idle_demotion_locked(p
, TRUE
);
1097 } else if (!(p
->p_memstat_dirty
& P_DIRTY_DEFER_IN_PROGRESS
) && (old_dirty
& P_DIRTY_DEFER_IN_PROGRESS
)) {
1098 memorystatus_invalidate_idle_demotion_locked(p
, TRUE
);
1103 memorystatus_update_idle_priority_locked(p
);
1106 memorystatus_reschedule_idle_demotion_locked();
1118 memorystatus_dirty_set(proc_t p
, boolean_t self
, uint32_t pcontrol
) {
1120 boolean_t kill
= false;
1121 boolean_t reschedule
= FALSE
;
1122 boolean_t was_dirty
= FALSE
;
1123 boolean_t now_dirty
= FALSE
;
1125 MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self
, p
->p_pid
, pcontrol
, p
->p_memstat_dirty
);
1129 if (p
->p_memstat_state
& P_MEMSTAT_INTERNAL
) {
1134 if (p
->p_memstat_dirty
& P_DIRTY_IS_DIRTY
)
1137 if (!(p
->p_memstat_dirty
& P_DIRTY_TRACK
)) {
1138 /* Dirty tracking not enabled */
1140 } else if (pcontrol
&& (p
->p_memstat_dirty
& P_DIRTY_TERMINATED
)) {
1142 * Process is set to be terminated and we're attempting to mark it dirty.
1143 * Set for termination and marking as clean is OK - see <rdar://problem/10594349>.
1147 int flag
= (self
== TRUE
) ? P_DIRTY
: P_DIRTY_SHUTDOWN
;
1148 if (pcontrol
&& !(p
->p_memstat_dirty
& flag
)) {
1149 /* Mark the process as having been dirtied at some point */
1150 p
->p_memstat_dirty
|= (flag
| P_DIRTY_MARKED
);
1151 memorystatus_dirty_count
++;
1153 } else if ((pcontrol
== 0) && (p
->p_memstat_dirty
& flag
)) {
1154 if ((flag
== P_DIRTY_SHUTDOWN
) && (!p
->p_memstat_dirty
& P_DIRTY
)) {
1155 /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */
1156 p
->p_memstat_dirty
|= P_DIRTY_TERMINATED
;
1158 } else if ((flag
== P_DIRTY
) && (p
->p_memstat_dirty
& P_DIRTY_TERMINATED
)) {
1159 /* Kill previously terminated processes if set clean */
1162 p
->p_memstat_dirty
&= ~flag
;
1163 memorystatus_dirty_count
--;
1175 if (p
->p_memstat_dirty
& P_DIRTY_IS_DIRTY
)
1178 if ((was_dirty
== TRUE
&& now_dirty
== FALSE
) ||
1179 (was_dirty
== FALSE
&& now_dirty
== TRUE
)) {
1181 /* Manage idle exit deferral, if applied */
1182 if ((p
->p_memstat_dirty
& (P_DIRTY_IDLE_EXIT_ENABLED
|P_DIRTY_DEFER_IN_PROGRESS
)) ==
1183 (P_DIRTY_IDLE_EXIT_ENABLED
|P_DIRTY_DEFER_IN_PROGRESS
)) {
1184 if (p
->p_memstat_dirty
& P_DIRTY_IS_DIRTY
) {
1185 memorystatus_invalidate_idle_demotion_locked(p
, FALSE
);
1188 /* We evaluate lazily, so reset the idle-deadline if it's expired by the time the process becomes clean. */
1189 if (mach_absolute_time() >= p
->p_memstat_idledeadline
) {
1190 p
->p_memstat_idledeadline
= 0;
1191 p
->p_memstat_dirty
&= ~P_DIRTY_DEFER_IN_PROGRESS
;
1193 memorystatus_schedule_idle_demotion_locked(p
, FALSE
);
1199 memorystatus_update_idle_priority_locked(p
);
1201 /* If the deferral state changed, reschedule the demotion timer */
1203 memorystatus_reschedule_idle_demotion_locked();
1208 psignal(p
, SIGKILL
);
1218 memorystatus_dirty_get(proc_t p
) {
1223 if (p
->p_memstat_dirty
& P_DIRTY_TRACK
) {
1224 ret
|= PROC_DIRTY_TRACKED
;
1225 if (p
->p_memstat_dirty
& P_DIRTY_ALLOW_IDLE_EXIT
) {
1226 ret
|= PROC_DIRTY_ALLOWS_IDLE_EXIT
;
1228 if (p
->p_memstat_dirty
& P_DIRTY
) {
1229 ret
|= PROC_DIRTY_IS_DIRTY
;
1239 memorystatus_on_terminate(proc_t p
) {
1244 p
->p_memstat_dirty
|= P_DIRTY_TERMINATED
;
1246 if ((p
->p_memstat_dirty
& (P_DIRTY_TRACK
|P_DIRTY_IS_DIRTY
)) == P_DIRTY_TRACK
) {
1247 /* Clean; mark as terminated and issue SIGKILL */
1250 /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */
1260 memorystatus_on_suspend(proc_t p
)
1264 memorystatus_get_task_page_counts(p
->task
, &pages
, NULL
);
1268 p
->p_memstat_suspendedfootprint
= pages
;
1269 memorystatus_suspended_footprint_total
+= pages
;
1270 memorystatus_suspended_count
++;
1272 p
->p_memstat_state
|= P_MEMSTAT_SUSPENDED
;
1277 memorystatus_on_resume(proc_t p
)
1287 frozen
= (p
->p_memstat_state
& P_MEMSTAT_FROZEN
);
1289 memorystatus_frozen_count
--;
1290 p
->p_memstat_state
|= P_MEMSTAT_PRIOR_THAW
;
1293 memorystatus_suspended_footprint_total
-= p
->p_memstat_suspendedfootprint
;
1294 memorystatus_suspended_count
--;
1299 p
->p_memstat_state
&= ~(P_MEMSTAT_SUSPENDED
| P_MEMSTAT_FROZEN
);
1305 memorystatus_freeze_entry_t data
= { pid
, FALSE
, 0 };
1306 memorystatus_send_note(kMemorystatusFreezeNote
, &data
, sizeof(data
));
1312 memorystatus_on_inactivity(proc_t p
)
1316 /* Wake the freeze thread */
1317 thread_wakeup((event_t
)&memorystatus_freeze_wakeup
);
1322 memorystatus_build_state(proc_t p
) {
1323 uint32_t snapshot_state
= 0;
1326 if (p
->p_memstat_state
& P_MEMSTAT_SUSPENDED
) {
1327 snapshot_state
|= kMemorystatusSuspended
;
1329 if (p
->p_memstat_state
& P_MEMSTAT_FROZEN
) {
1330 snapshot_state
|= kMemorystatusFrozen
;
1332 if (p
->p_memstat_state
& P_MEMSTAT_PRIOR_THAW
) {
1333 snapshot_state
|= kMemorystatusWasThawed
;
1337 if (p
->p_memstat_dirty
& P_DIRTY_TRACK
) {
1338 snapshot_state
|= kMemorystatusTracked
;
1340 if ((p
->p_memstat_dirty
& P_DIRTY_IDLE_EXIT_ENABLED
) == P_DIRTY_IDLE_EXIT_ENABLED
) {
1341 snapshot_state
|= kMemorystatusSupportsIdleExit
;
1343 if (p
->p_memstat_dirty
& P_DIRTY_IS_DIRTY
) {
1344 snapshot_state
|= kMemorystatusDirty
;
1347 return snapshot_state
;
1353 kill_idle_exit_proc(void)
1355 proc_t p
, victim_p
= PROC_NULL
;
1356 uint64_t current_time
;
1357 boolean_t killed
= FALSE
;
1360 /* Pick next idle exit victim. */
1361 current_time
= mach_absolute_time();
1365 p
= memorystatus_get_first_proc_locked(&i
, FALSE
);
1367 /* No need to look beyond the idle band */
1368 if (p
->p_memstat_effectivepriority
!= JETSAM_PRIORITY_IDLE
) {
1372 if ((p
->p_memstat_dirty
& (P_DIRTY_ALLOW_IDLE_EXIT
|P_DIRTY_IS_DIRTY
|P_DIRTY_TERMINATED
)) == (P_DIRTY_ALLOW_IDLE_EXIT
)) {
1373 if (current_time
>= p
->p_memstat_idledeadline
) {
1374 p
->p_memstat_dirty
|= P_DIRTY_TERMINATED
;
1375 victim_p
= proc_ref_locked(p
);
1380 p
= memorystatus_get_next_proc_locked(&i
, p
, FALSE
);
1386 printf("memorystatus_thread: idle exiting pid %d [%s]\n", victim_p
->p_pid
, (victim_p
->p_comm
? victim_p
->p_comm
: "(unknown)"));
1387 killed
= memorystatus_do_kill(victim_p
, kMemorystatusKilledIdleExit
);
1388 proc_rele(victim_p
);
1396 memorystatus_thread_wake(void) {
1397 thread_wakeup((event_t
)&memorystatus_wakeup
);
1401 memorystatus_thread_block(uint32_t interval_ms
, thread_continue_t continuation
)
1404 assert_wait_timeout(&memorystatus_wakeup
, THREAD_UNINT
, interval_ms
, 1000 * NSEC_PER_USEC
);
1406 assert_wait(&memorystatus_wakeup
, THREAD_UNINT
);
1409 return thread_block(continuation
);
1412 extern boolean_t vm_compressor_thrashing_detected
;
1413 extern uint64_t vm_compressor_total_compressions(void);
1416 memorystatus_thread(void *param __unused
, wait_result_t wr __unused
)
1418 static boolean_t is_vm_privileged
= FALSE
;
1420 boolean_t post_snapshot
= FALSE
;
1421 uint32_t errors
= 0;
1424 if (is_vm_privileged
== FALSE
) {
1426 * It's the first time the thread has run, so just mark the thread as privileged and block.
1427 * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>.
1429 thread_wire(host_priv_self(), current_thread(), TRUE
);
1430 is_vm_privileged
= TRUE
;
1432 memorystatus_thread_block(0, memorystatus_thread
);
1437 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_SCAN
) | DBG_FUNC_START
,
1438 memorystatus_available_pages
, 0, 0, 0, 0);
1440 uint32_t cause
= vm_compressor_thrashing_detected
? kMemorystatusKilledVMThrashing
: kMemorystatusKilledVMPageShortage
;
1442 /* Jetsam aware version.
1444 * If woken under pressure, go down the path of killing:
1446 * - processes exceeding their highwater mark if no clean victims available
1447 * - the least recently used process if no highwater mark victims available
1450 while (vm_compressor_thrashing_detected
|| memorystatus_available_pages
<= memorystatus_available_pages_critical
) {
1452 while (kill_under_pressure
) {
1453 const uint32_t SNAPSHOT_WAIT_TIMEOUT_MS
= 100;
1454 wait_result_t wait_result
;
1461 killed
= memorystatus_kill_hiwat_proc(&errors
);
1463 post_snapshot
= TRUE
;
1469 killed
= memorystatus_kill_top_process(TRUE
, cause
, &priority
, &errors
);
1471 if (!kill_under_pressure
&& (priority
!= JETSAM_PRIORITY_IDLE
)) {
1472 /* Don't generate logs for steady-state idle-exit kills */
1473 post_snapshot
= TRUE
;
1478 /* Under pressure and unable to kill a process - panic */
1479 panic("memorystatus_jetsam_thread: no victim! available pages:%d\n", memorystatus_available_pages
);
1482 kill_under_pressure
= FALSE
;
1483 vm_compressor_thrashing_detected
= FALSE
;
1486 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_LATENCY_COALESCE
) | DBG_FUNC_START
,
1487 memorystatus_available_pages
, 0, 0, 0, 0);
1488 thread_wakeup((event_t
)&latency_jetsam_wakeup
);
1490 * Coalesce snapshot reports in the face of repeated jetsams by blocking here with a timeout.
1491 * If the wait expires, issue the note.
1493 wait_result
= memorystatus_thread_block(SNAPSHOT_WAIT_TIMEOUT_MS
, THREAD_CONTINUE_NULL
);
1494 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_LATENCY_COALESCE
) | DBG_FUNC_END
,
1495 memorystatus_available_pages
, 0, 0, 0, 0);
1496 if (wait_result
!= THREAD_AWAKENED
) {
1504 memorystatus_clear_errors();
1507 #if VM_PRESSURE_EVENTS
1508 memorystatus_update_vm_pressure(TRUE
);
1511 if (post_snapshot
) {
1512 size_t snapshot_size
= sizeof(memorystatus_jetsam_snapshot_t
) +
1513 sizeof(memorystatus_jetsam_snapshot_entry_t
) * (memorystatus_jetsam_snapshot_count
);
1514 memorystatus_jetsam_snapshot
->notification_time
= mach_absolute_time();
1515 memorystatus_send_note(kMemorystatusSnapshotNote
, &snapshot_size
, sizeof(snapshot_size
));
1518 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_SCAN
) | DBG_FUNC_END
,
1519 memorystatus_available_pages
, 0, 0, 0, 0);
1521 #else /* CONFIG_JETSAM */
1525 * Jetsam not enabled, so just kill the first suitable clean process
1529 if (kill_idle_exit
) {
1530 kill_idle_exit_proc();
1531 kill_idle_exit
= FALSE
;
1534 #endif /* CONFIG_JETSAM */
1536 memorystatus_thread_block(0, memorystatus_thread
);
1540 boolean_t
memorystatus_idle_exit_from_VM(void) {
1541 kill_idle_exit
= TRUE
;
1542 memorystatus_thread_wake();
1550 * Callback invoked when allowable physical memory footprint exceeded
1551 * (dirty pages + IOKit mappings)
1553 * This is invoked for both advisory, non-fatal per-task high watermarks,
1554 * as well as the fatal system-wide task memory limit.
1557 memorystatus_on_ledger_footprint_exceeded(boolean_t warning
, const int max_footprint_mb
)
1559 proc_t p
= current_proc();
1561 printf("process %d (%s) %s physical memory footprint limit of %d MB\n",
1562 p
->p_pid
, p
->p_comm
,
1563 warning
? "approaching" : "exceeded",
1566 #if VM_PRESSURE_EVENTS
1567 if (warning
== TRUE
) {
1568 if (memorystatus_warn_process(p
->p_pid
) != TRUE
) {
1569 /* Print warning, since it's possible that task has not registered for pressure notifications */
1570 printf("task_exceeded_footprint: failed to warn the current task (exiting?).\n");
1574 #endif /* VM_PRESSURE_EVENTS */
1576 if (p
->p_memstat_memlimit
<= 0) {
1578 * If this process has no high watermark, then we have been invoked because the task
1579 * has violated the system-wide per-task memory limit.
1581 if (memorystatus_kill_process_sync(p
->p_pid
, kMemorystatusKilledPerProcessLimit
) != TRUE
) {
1582 printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n");
1588 memorystatus_get_task_page_counts(task_t task
, uint32_t *footprint
, uint32_t *max_footprint
)
1593 *footprint
= (uint32_t)(get_task_phys_footprint(task
) / PAGE_SIZE_64
);
1594 if (max_footprint
) {
1595 *max_footprint
= (uint32_t)(get_task_phys_footprint_max(task
) / PAGE_SIZE_64
);
1600 memorystatus_send_note(int event_code
, void *data
, size_t data_length
) {
1602 struct kev_msg ev_msg
;
1604 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
1605 ev_msg
.kev_class
= KEV_SYSTEM_CLASS
;
1606 ev_msg
.kev_subclass
= KEV_MEMORYSTATUS_SUBCLASS
;
1608 ev_msg
.event_code
= event_code
;
1610 ev_msg
.dv
[0].data_length
= data_length
;
1611 ev_msg
.dv
[0].data_ptr
= data
;
1612 ev_msg
.dv
[1].data_length
= 0;
1614 ret
= kev_post_msg(&ev_msg
);
1616 printf("%s: kev_post_msg() failed, err %d\n", __func__
, ret
);
1623 memorystatus_update_snapshot_locked(proc_t p
, uint32_t kill_cause
)
1627 for (i
= 0; i
< memorystatus_jetsam_snapshot_count
; i
++) {
1628 if (memorystatus_jetsam_snapshot_list
[i
].pid
== p
->p_pid
) {
1629 /* Update if the priority has changed since the snapshot was taken */
1630 if (memorystatus_jetsam_snapshot_list
[i
].priority
!= p
->p_memstat_effectivepriority
) {
1631 memorystatus_jetsam_snapshot_list
[i
].priority
= p
->p_memstat_effectivepriority
;
1632 strlcpy(memorystatus_jetsam_snapshot_list
[i
].name
, p
->p_comm
, MAXCOMLEN
+1);
1633 memorystatus_jetsam_snapshot_list
[i
].state
= memorystatus_build_state(p
);
1634 memorystatus_jetsam_snapshot_list
[i
].user_data
= p
->p_memstat_userdata
;
1635 memorystatus_jetsam_snapshot_list
[i
].fds
= p
->p_fd
->fd_nfiles
;
1637 memorystatus_jetsam_snapshot_list
[i
].killed
= kill_cause
;
1643 void memorystatus_pages_update(unsigned int pages_avail
)
1645 boolean_t critical
, delta
;
1647 if (!memorystatus_delta
) {
1651 critical
= (pages_avail
< memorystatus_available_pages_critical
) ? TRUE
: FALSE
;
1652 delta
= ((pages_avail
>= (memorystatus_available_pages
+ memorystatus_delta
))
1653 || (memorystatus_available_pages
>= (pages_avail
+ memorystatus_delta
))) ? TRUE
: FALSE
;
1655 if (critical
|| delta
) {
1656 memorystatus_available_pages
= pages_avail
;
1657 memorystatus_level
= memorystatus_available_pages
* 100 / atop_64(max_mem
);
1660 /* Bail early to avoid excessive wake-ups */
1666 memorystatus_thread_wake();
1671 memorystatus_get_snapshot_properties_for_proc_locked(proc_t p
, memorystatus_jetsam_snapshot_entry_t
*entry
)
1673 memset(entry
, 0, sizeof(memorystatus_jetsam_snapshot_entry_t
));
1675 entry
->pid
= p
->p_pid
;
1676 strlcpy(&entry
->name
[0], p
->p_comm
, MAXCOMLEN
+1);
1677 entry
->priority
= p
->p_memstat_effectivepriority
;
1678 memorystatus_get_task_page_counts(p
->task
, &entry
->pages
, &entry
->max_pages
);
1679 entry
->state
= memorystatus_build_state(p
);
1680 entry
->user_data
= p
->p_memstat_userdata
;
1681 memcpy(&entry
->uuid
[0], &p
->p_uuid
[0], sizeof(p
->p_uuid
));
1687 memorystatus_jetsam_snapshot_procs_locked(void)
1690 unsigned int b
= 0, i
= 0;
1691 kern_return_t kr
= KERN_SUCCESS
;
1693 mach_msg_type_number_t count
= HOST_VM_INFO64_COUNT
;
1694 vm_statistics64_data_t vm_stat
;
1696 if ((kr
= host_statistics64(host_self(), HOST_VM_INFO64
, (host_info64_t
)&vm_stat
, &count
) != KERN_SUCCESS
)) {
1697 printf("memorystatus_jetsam_snapshot_procs_locked: host_statistics64 failed with %d\n", kr
);
1698 memset(&memorystatus_jetsam_snapshot
->stats
, 0, sizeof(memorystatus_jetsam_snapshot
->stats
));
1700 memorystatus_jetsam_snapshot
->stats
.free_pages
= vm_stat
.free_count
;
1701 memorystatus_jetsam_snapshot
->stats
.active_pages
= vm_stat
.active_count
;
1702 memorystatus_jetsam_snapshot
->stats
.inactive_pages
= vm_stat
.inactive_count
;
1703 memorystatus_jetsam_snapshot
->stats
.throttled_pages
= vm_stat
.throttled_count
;
1704 memorystatus_jetsam_snapshot
->stats
.purgeable_pages
= vm_stat
.purgeable_count
;
1705 memorystatus_jetsam_snapshot
->stats
.wired_pages
= vm_stat
.wire_count
;
1707 memorystatus_jetsam_snapshot
->stats
.speculative_pages
= vm_stat
.speculative_count
;
1708 memorystatus_jetsam_snapshot
->stats
.filebacked_pages
= vm_stat
.external_page_count
;
1709 memorystatus_jetsam_snapshot
->stats
.anonymous_pages
= vm_stat
.internal_page_count
;
1710 memorystatus_jetsam_snapshot
->stats
.compressions
= vm_stat
.compressions
;
1711 memorystatus_jetsam_snapshot
->stats
.decompressions
= vm_stat
.decompressions
;
1712 memorystatus_jetsam_snapshot
->stats
.compressor_pages
= vm_stat
.compressor_page_count
;
1713 memorystatus_jetsam_snapshot
->stats
.total_uncompressed_pages_in_compressor
= vm_stat
.total_uncompressed_pages_in_compressor
;
1716 next_p
= memorystatus_get_first_proc_locked(&b
, TRUE
);
1719 next_p
= memorystatus_get_next_proc_locked(&b
, p
, TRUE
);
1721 if (FALSE
== memorystatus_get_snapshot_properties_for_proc_locked(p
, &memorystatus_jetsam_snapshot_list
[i
])) {
1725 MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
1727 p
->p_uuid
[0], p
->p_uuid
[1], p
->p_uuid
[2], p
->p_uuid
[3], p
->p_uuid
[4], p
->p_uuid
[5], p
->p_uuid
[6], p
->p_uuid
[7],
1728 p
->p_uuid
[8], p
->p_uuid
[9], p
->p_uuid
[10], p
->p_uuid
[11], p
->p_uuid
[12], p
->p_uuid
[13], p
->p_uuid
[14], p
->p_uuid
[15]);
1730 if (++i
== memorystatus_jetsam_snapshot_max
) {
1735 memorystatus_jetsam_snapshot
->snapshot_time
= mach_absolute_time();
1736 memorystatus_jetsam_snapshot
->entry_count
= memorystatus_jetsam_snapshot_count
= i
;
1739 #if DEVELOPMENT || DEBUG
1742 memorystatus_cmd_set_panic_bits(user_addr_t buffer
, uint32_t buffer_size
) {
1744 memorystatus_jetsam_panic_options_t debug
;
1746 if (buffer_size
!= sizeof(memorystatus_jetsam_panic_options_t
)) {
1750 ret
= copyin(buffer
, &debug
, buffer_size
);
1755 /* Panic bits match kMemorystatusKilled* enum */
1756 memorystatus_jetsam_panic_debug
= (memorystatus_jetsam_panic_debug
& ~debug
.mask
) | (debug
.data
& debug
.mask
);
1758 /* Copyout new value */
1759 debug
.data
= memorystatus_jetsam_panic_debug
;
1760 ret
= copyout(&debug
, buffer
, sizeof(memorystatus_jetsam_panic_options_t
));
1768 * Jetsam a specific process.
1771 memorystatus_kill_specific_process(pid_t victim_pid
, uint32_t cause
) {
1775 /* TODO - add a victim queue and push this into the main jetsam thread */
1777 p
= proc_find(victim_pid
);
1782 printf("memorystatus: specifically killing pid %d [%s] - memorystatus_available_pages: %d\n",
1783 victim_pid
, (p
->p_comm
? p
->p_comm
: "(unknown)"), memorystatus_available_pages
);
1787 if (memorystatus_jetsam_snapshot_count
== 0) {
1788 memorystatus_jetsam_snapshot_procs_locked();
1791 memorystatus_update_snapshot_locked(p
, cause
);
1794 killed
= memorystatus_do_kill(p
, cause
);
1801 * Jetsam the first process in the queue.
1804 memorystatus_kill_top_process(boolean_t any
, uint32_t cause
, int32_t *priority
, uint32_t *errors
)
1807 proc_t p
= PROC_NULL
, next_p
= PROC_NULL
;
1808 boolean_t new_snapshot
= FALSE
, killed
= FALSE
;
1811 #ifndef CONFIG_FREEZE
1815 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_JETSAM
) | DBG_FUNC_START
,
1816 memorystatus_available_pages
, 0, 0, 0, 0);
1820 next_p
= memorystatus_get_first_proc_locked(&i
, TRUE
);
1822 #if DEVELOPMENT || DEBUG
1824 int procSuspendedForDiagnosis
;
1825 #endif /* DEVELOPMENT || DEBUG */
1828 next_p
= memorystatus_get_next_proc_locked(&i
, p
, TRUE
);
1830 #if DEVELOPMENT || DEBUG
1831 activeProcess
= p
->p_memstat_state
& P_MEMSTAT_FOREGROUND
;
1832 procSuspendedForDiagnosis
= p
->p_memstat_state
& P_MEMSTAT_DIAG_SUSPENDED
;
1833 #endif /* DEVELOPMENT || DEBUG */
1837 if (p
->p_memstat_state
& (P_MEMSTAT_ERROR
| P_MEMSTAT_TERMINATED
)) {
1841 #if DEVELOPMENT || DEBUG
1842 if ((memorystatus_jetsam_policy
& kPolicyDiagnoseActive
) && procSuspendedForDiagnosis
) {
1843 printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid
);
1846 #endif /* DEVELOPMENT || DEBUG */
1850 boolean_t reclaim_proc
= !(p
->p_memstat_state
& (P_MEMSTAT_LOCKED
| P_MEMSTAT_NORECLAIM
));
1851 if (any
|| reclaim_proc
) {
1863 *priority
= p
->p_memstat_effectivepriority
;
1867 * Capture a snapshot if none exists and:
1868 * - priority was not requested (this is something other than an ambient kill)
1869 * - the priority was requested *and* the targeted process is not at idle priority
1871 if ((memorystatus_jetsam_snapshot_count
== 0) &&
1872 ((!priority
) || (priority
&& (*priority
!= JETSAM_PRIORITY_IDLE
)))) {
1873 memorystatus_jetsam_snapshot_procs_locked();
1874 new_snapshot
= TRUE
;
1878 * Mark as terminated so that if exit1() indicates success, but the process (for example)
1879 * is blocked in task_exception_notify(), it'll be skipped if encountered again - see
1880 * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the
1881 * acquisition of the proc lock.
1883 p
->p_memstat_state
|= P_MEMSTAT_TERMINATED
;
1885 #if DEVELOPMENT || DEBUG
1886 if ((memorystatus_jetsam_policy
& kPolicyDiagnoseActive
) && activeProcess
) {
1887 MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n",
1888 aPid
, (p
->p_comm
? p
->p_comm
: "(unknown)"), memorystatus_level
);
1889 memorystatus_update_snapshot_locked(p
, kMemorystatusKilledDiagnostic
);
1890 p
->p_memstat_state
|= P_MEMSTAT_DIAG_SUSPENDED
;
1891 if (memorystatus_jetsam_policy
& kPolicyDiagnoseFirst
) {
1892 jetsam_diagnostic_suspended_one_active_proc
= 1;
1893 printf("jetsam: returning after suspending first active proc - %d\n", aPid
);
1896 p
= proc_ref_locked(p
);
1899 task_suspend(p
->task
);
1906 #endif /* DEVELOPMENT || DEBUG */
1908 /* Shift queue, update stats */
1909 memorystatus_update_snapshot_locked(p
, cause
);
1911 p
= proc_ref_locked(p
);
1914 printf("memorystatus: jetsam killing pid %d [%s] - memorystatus_available_pages: %d\n",
1915 aPid
, (p
->p_comm
? p
->p_comm
: "(unknown)"), memorystatus_available_pages
);
1916 killed
= memorystatus_do_kill(p
, cause
);
1925 /* Failure - unwind and restart. */
1927 proc_rele_locked(p
);
1928 p
->p_memstat_state
&= ~P_MEMSTAT_TERMINATED
;
1929 p
->p_memstat_state
|= P_MEMSTAT_ERROR
;
1932 next_p
= memorystatus_get_first_proc_locked(&i
, TRUE
);
1940 /* Clear snapshot if freshly captured and no target was found */
1941 if (new_snapshot
&& !killed
) {
1942 memorystatus_jetsam_snapshot
->entry_count
= memorystatus_jetsam_snapshot_count
= 0;
1945 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_JETSAM
) | DBG_FUNC_END
,
1946 memorystatus_available_pages
, killed
? aPid
: 0, 0, 0, 0);
1954 memorystatus_kill_hiwat_proc(uint32_t *errors
)
1957 proc_t p
= PROC_NULL
, next_p
= PROC_NULL
;
1958 boolean_t new_snapshot
= FALSE
, killed
= FALSE
;
1961 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_JETSAM_HIWAT
) | DBG_FUNC_START
,
1962 memorystatus_available_pages
, 0, 0, 0, 0);
1966 next_p
= memorystatus_get_first_proc_locked(&i
, TRUE
);
1972 next_p
= memorystatus_get_next_proc_locked(&i
, p
, TRUE
);
1976 if (p
->p_memstat_state
& (P_MEMSTAT_ERROR
| P_MEMSTAT_TERMINATED
)) {
1980 /* skip if no limit set */
1981 if (p
->p_memstat_memlimit
<= 0) {
1985 /* skip if a currently inapplicable limit is encountered */
1986 if ((p
->p_memstat_state
& P_MEMSTAT_MEMLIMIT_BACKGROUND
) && (p
->p_memstat_effectivepriority
>= JETSAM_PRIORITY_FOREGROUND
)) {
1990 footprint
= (uint32_t)(get_task_phys_footprint(p
->task
) / (1024 * 1024));
1991 skip
= (((int32_t)footprint
) <= p
->p_memstat_memlimit
);
1992 #if DEVELOPMENT || DEBUG
1993 if (!skip
&& (memorystatus_jetsam_policy
& kPolicyDiagnoseActive
)) {
1994 if (p
->p_memstat_state
& P_MEMSTAT_DIAG_SUSPENDED
) {
1998 #endif /* DEVELOPMENT || DEBUG */
2002 if (p
->p_memstat_state
& P_MEMSTAT_LOCKED
) {
2013 MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d pages > 1 (%d)\n",
2014 (memorystatus_jetsam_policy
& kPolicyDiagnoseActive
) ? "suspending": "killing", aPid
, p
->p_comm
, pages
, hiwat
);
2016 if (memorystatus_jetsam_snapshot_count
== 0) {
2017 memorystatus_jetsam_snapshot_procs_locked();
2018 new_snapshot
= TRUE
;
2021 p
->p_memstat_state
|= P_MEMSTAT_TERMINATED
;
2023 #if DEVELOPMENT || DEBUG
2024 if (memorystatus_jetsam_policy
& kPolicyDiagnoseActive
) {
2025 MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid
, memorystatus_available_pages
);
2026 memorystatus_update_snapshot_locked(p
, kMemorystatusKilledDiagnostic
);
2027 p
->p_memstat_state
|= P_MEMSTAT_DIAG_SUSPENDED
;
2029 p
= proc_ref_locked(p
);
2032 task_suspend(p
->task
);
2039 #endif /* DEVELOPMENT || DEBUG */
2041 memorystatus_update_snapshot_locked(p
, kMemorystatusKilledHiwat
);
2043 p
= proc_ref_locked(p
);
2046 printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n",
2047 aPid
, (p
->p_comm
? p
->p_comm
: "(unknown)"), memorystatus_available_pages
);
2048 killed
= memorystatus_do_kill(p
, kMemorystatusKilledHiwat
);
2057 /* Failure - unwind and restart. */
2059 proc_rele_locked(p
);
2060 p
->p_memstat_state
&= ~P_MEMSTAT_TERMINATED
;
2061 p
->p_memstat_state
|= P_MEMSTAT_ERROR
;
2064 next_p
= memorystatus_get_first_proc_locked(&i
, TRUE
);
2072 /* Clear snapshot if freshly captured and no target was found */
2073 if (new_snapshot
&& !killed
) {
2074 memorystatus_jetsam_snapshot
->entry_count
= memorystatus_jetsam_snapshot_count
= 0;
2077 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_JETSAM_HIWAT
) | DBG_FUNC_END
,
2078 memorystatus_available_pages
, killed
? aPid
: 0, 0, 0, 0);
2083 #endif /* LEGACY_HIWATER */
2086 memorystatus_kill_process_async(pid_t victim_pid
, uint32_t cause
) {
2087 /* TODO: allow a general async path */
2088 if ((victim_pid
!= -1) || (cause
!= kMemorystatusKilledVMPageShortage
|| cause
!= kMemorystatusKilledVMThrashing
)) {
2092 kill_under_pressure
= TRUE
;
2093 memorystatus_thread_wake();
2098 memorystatus_kill_process_sync(pid_t victim_pid
, uint32_t cause
) {
2100 uint32_t errors
= 0;
2102 if (victim_pid
== -1) {
2103 /* No pid, so kill first process */
2104 res
= memorystatus_kill_top_process(TRUE
, cause
, NULL
, &errors
);
2106 res
= memorystatus_kill_specific_process(victim_pid
, cause
);
2110 memorystatus_clear_errors();
2114 /* Fire off snapshot notification */
2115 size_t snapshot_size
= sizeof(memorystatus_jetsam_snapshot_t
) +
2116 sizeof(memorystatus_jetsam_snapshot_entry_t
) * memorystatus_jetsam_snapshot_count
;
2117 memorystatus_jetsam_snapshot
->notification_time
= mach_absolute_time();
2118 memorystatus_send_note(kMemorystatusSnapshotNote
, &snapshot_size
, sizeof(snapshot_size
));
2125 memorystatus_kill_on_VM_page_shortage(boolean_t async
) {
2127 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMPageShortage
);
2129 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMPageShortage
);
2134 memorystatus_kill_on_VM_thrashing(boolean_t async
) {
2136 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMThrashing
);
2138 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMThrashing
);
2143 memorystatus_kill_on_vnode_limit(void) {
2144 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes
);
2147 #endif /* CONFIG_JETSAM */
2151 __private_extern__
void
2152 memorystatus_freeze_init(void)
2154 kern_return_t result
;
2157 result
= kernel_thread_start(memorystatus_freeze_thread
, NULL
, &thread
);
2158 if (result
== KERN_SUCCESS
) {
2159 thread_deallocate(thread
);
2161 panic("Could not create memorystatus_freeze_thread");
2166 memorystatus_freeze_top_process(boolean_t
*memorystatus_freeze_swap_low
)
2170 proc_t p
= PROC_NULL
, next_p
= PROC_NULL
;
2173 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_FREEZE
) | DBG_FUNC_START
,
2174 memorystatus_available_pages
, 0, 0, 0, 0);
2178 next_p
= memorystatus_get_first_proc_locked(&i
, TRUE
);
2181 uint32_t purgeable
, wired
, clean
, dirty
;
2184 uint32_t max_pages
= 0;
2188 next_p
= memorystatus_get_next_proc_locked(&i
, p
, TRUE
);
2191 state
= p
->p_memstat_state
;
2193 /* Ensure the process is eligible for freezing */
2194 if ((state
& (P_MEMSTAT_TERMINATED
| P_MEMSTAT_LOCKED
| P_MEMSTAT_FROZEN
)) || !(state
& P_MEMSTAT_SUSPENDED
)) {
2195 continue; // with lock held
2198 /* Only freeze processes meeting our minimum resident page criteria */
2199 memorystatus_get_task_page_counts(p
->task
, &pages
, NULL
);
2200 if (pages
< memorystatus_freeze_pages_min
) {
2201 continue; // with lock held
2204 if (DEFAULT_FREEZER_IS_ACTIVE
|| DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE
) {
2205 /* Ensure there's enough free space to freeze this process. */
2206 max_pages
= MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max
);
2207 if (max_pages
< memorystatus_freeze_pages_min
) {
2208 *memorystatus_freeze_swap_low
= TRUE
;
2213 max_pages
= UINT32_MAX
- 1;
2216 /* Mark as locked temporarily to avoid kill */
2217 p
->p_memstat_state
|= P_MEMSTAT_LOCKED
;
2219 p
= proc_ref_locked(p
);
2225 kr
= task_freeze(p
->task
, &purgeable
, &wired
, &clean
, &dirty
, max_pages
, &shared
, FALSE
);
2227 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
2228 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n",
2229 (kr
== KERN_SUCCESS
) ? "SUCCEEDED" : "FAILED", aPid
, (p
->p_comm
? p
->p_comm
: "(unknown)"),
2230 memorystatus_available_pages
, purgeable
, wired
, clean
, dirty
, shared
, default_pager_swap_pages_free());
2233 p
->p_memstat_state
&= ~P_MEMSTAT_LOCKED
;
2236 if (KERN_SUCCESS
== kr
) {
2237 memorystatus_freeze_entry_t data
= { aPid
, TRUE
, dirty
};
2239 memorystatus_frozen_count
++;
2241 p
->p_memstat_state
|= (P_MEMSTAT_FROZEN
| (shared
? 0: P_MEMSTAT_NORECLAIM
));
2244 for (i
= 0; i
< sizeof(throttle_intervals
) / sizeof(struct throttle_interval_t
); i
++) {
2245 throttle_intervals
[i
].pageouts
+= dirty
;
2248 memorystatus_freeze_pageouts
+= dirty
;
2249 memorystatus_freeze_count
++;
2253 memorystatus_send_note(kMemorystatusFreezeNote
, &data
, sizeof(data
));
2255 /* Return the number of reclaimed pages */
2269 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_FREEZE
) | DBG_FUNC_END
,
2270 memorystatus_available_pages
, aPid
, 0, 0, 0);
2275 static inline boolean_t
2276 memorystatus_can_freeze_processes(void)
2282 if (memorystatus_suspended_count
) {
2283 uint32_t average_resident_pages
, estimated_processes
;
2285 /* Estimate the number of suspended processes we can fit */
2286 average_resident_pages
= memorystatus_suspended_footprint_total
/ memorystatus_suspended_count
;
2287 estimated_processes
= memorystatus_suspended_count
+
2288 ((memorystatus_available_pages
- memorystatus_available_pages_critical
) / average_resident_pages
);
2290 /* If it's predicted that no freeze will occur, lower the threshold temporarily */
2291 if (estimated_processes
<= FREEZE_SUSPENDED_THRESHOLD_DEFAULT
) {
2292 memorystatus_freeze_suspended_threshold
= FREEZE_SUSPENDED_THRESHOLD_LOW
;
2294 memorystatus_freeze_suspended_threshold
= FREEZE_SUSPENDED_THRESHOLD_DEFAULT
;
2297 MEMORYSTATUS_DEBUG(1, "memorystatus_can_freeze_processes: %d suspended processes, %d average resident pages / process, %d suspended processes estimated\n",
2298 memorystatus_suspended_count
, average_resident_pages
, estimated_processes
);
2300 if ((memorystatus_suspended_count
- memorystatus_frozen_count
) > memorystatus_freeze_suspended_threshold
) {
2315 memorystatus_can_freeze(boolean_t
*memorystatus_freeze_swap_low
)
2317 /* Only freeze if we're sufficiently low on memory; this holds off freeze right
2318 after boot, and is generally is a no-op once we've reached steady state. */
2319 if (memorystatus_available_pages
> memorystatus_freeze_threshold
) {
2323 /* Check minimum suspended process threshold. */
2324 if (!memorystatus_can_freeze_processes()) {
2328 /* Is swap running low? */
2329 if (*memorystatus_freeze_swap_low
) {
2330 /* If there's been no movement in free swap pages since we last attempted freeze, return. */
2331 if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min
) {
2335 /* Pages have been freed - we can retry. */
2336 *memorystatus_freeze_swap_low
= FALSE
;
2344 memorystatus_freeze_update_throttle_interval(mach_timespec_t
*ts
, struct throttle_interval_t
*interval
)
2346 if (CMP_MACH_TIMESPEC(ts
, &interval
->ts
) >= 0) {
2347 if (!interval
->max_pageouts
) {
2348 interval
->max_pageouts
= (interval
->burst_multiple
* (((uint64_t)interval
->mins
* FREEZE_DAILY_PAGEOUTS_MAX
) / (24 * 60)));
2350 printf("memorystatus_freeze_update_throttle_interval: %d minute throttle timeout, resetting\n", interval
->mins
);
2352 interval
->ts
.tv_sec
= interval
->mins
* 60;
2353 interval
->ts
.tv_nsec
= 0;
2354 ADD_MACH_TIMESPEC(&interval
->ts
, ts
);
2355 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2356 if (interval
->pageouts
> interval
->max_pageouts
) {
2357 interval
->pageouts
-= interval
->max_pageouts
;
2359 interval
->pageouts
= 0;
2361 interval
->throttle
= FALSE
;
2362 } else if (!interval
->throttle
&& interval
->pageouts
>= interval
->max_pageouts
) {
2363 printf("memorystatus_freeze_update_throttle_interval: %d minute pageout limit exceeded; enabling throttle\n", interval
->mins
);
2364 interval
->throttle
= TRUE
;
2367 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
2368 interval
->pageouts
, interval
->max_pageouts
, interval
->mins
, (interval
->ts
.tv_sec
- ts
->tv_sec
) / 60,
2369 interval
->throttle
? "on" : "off");
2373 memorystatus_freeze_update_throttle(void)
2379 boolean_t throttled
= FALSE
;
2381 #if DEVELOPMENT || DEBUG
2382 if (!memorystatus_freeze_throttle_enabled
)
2386 clock_get_system_nanotime(&sec
, &nsec
);
2390 /* Check freeze pageouts over multiple intervals and throttle if we've exceeded our budget.
2392 * This ensures that periods of inactivity can't be used as 'credit' towards freeze if the device has
2393 * remained dormant for a long period. We do, however, allow increased thresholds for shorter intervals in
2394 * order to allow for bursts of activity.
2396 for (i
= 0; i
< sizeof(throttle_intervals
) / sizeof(struct throttle_interval_t
); i
++) {
2397 memorystatus_freeze_update_throttle_interval(&ts
, &throttle_intervals
[i
]);
2398 if (throttle_intervals
[i
].throttle
== TRUE
)
2406 memorystatus_freeze_thread(void *param __unused
, wait_result_t wr __unused
)
2408 static boolean_t memorystatus_freeze_swap_low
= FALSE
;
2410 if (memorystatus_freeze_enabled
) {
2411 if (memorystatus_can_freeze(&memorystatus_freeze_swap_low
)) {
2412 /* Only freeze if we've not exceeded our pageout budgets */
2413 if (!memorystatus_freeze_update_throttle()) {
2414 memorystatus_freeze_top_process(&memorystatus_freeze_swap_low
);
2416 printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n");
2417 memorystatus_freeze_throttle_count
++; /* Throttled, update stats */
2422 assert_wait((event_t
) &memorystatus_freeze_wakeup
, THREAD_UNINT
);
2423 thread_block((thread_continue_t
) memorystatus_freeze_thread
);
2426 #endif /* CONFIG_FREEZE */
2428 #if CONFIG_JETSAM && VM_PRESSURE_EVENTS
2431 memorystatus_warn_process(pid_t pid
) {
2432 return (vm_dispatch_pressure_note_to_pid(pid
, FALSE
) == 0);
2435 static inline boolean_t
2436 memorystatus_update_pressure_locked(boolean_t
*pressured
) {
2437 vm_pressure_level_t old_level
, new_level
;
2439 old_level
= memorystatus_vm_pressure_level
;
2441 if (memorystatus_available_pages
> memorystatus_available_pages_pressure
) {
2442 /* Too many free pages */
2443 new_level
= kVMPressureNormal
;
2446 else if (memorystatus_frozen_count
> 0) {
2447 /* Frozen processes exist */
2448 new_level
= kVMPressureNormal
;
2451 else if (memorystatus_suspended_count
> MEMORYSTATUS_SUSPENDED_THRESHOLD
) {
2452 /* Too many supended processes */
2453 new_level
= kVMPressureNormal
;
2455 else if (memorystatus_suspended_count
> 0) {
2456 /* Some suspended processes - warn */
2457 new_level
= kVMPressureWarning
;
2460 /* Otherwise, pressure level is urgent */
2461 new_level
= kVMPressureUrgent
;
2464 *pressured
= (new_level
!= kVMPressureNormal
);
2466 /* Did the pressure level change? */
2467 if (old_level
!= new_level
) {
2468 MEMORYSTATUS_DEBUG(1, "memorystatus_update_pressure_locked(): memory pressure changed %d -> %d; memorystatus_available_pages: %d\n ",
2469 old_level
, new_level
, memorystatus_available_pages
);
2470 memorystatus_vm_pressure_level
= new_level
;
2478 memorystatus_update_vm_pressure(boolean_t target_foreground
) {
2479 boolean_t pressure_changed
, pressured
;
2480 boolean_t warn
= FALSE
;
2483 * Centralised pressure handling routine. Called from:
2484 * - The main jetsam thread. In this case, we update the pressure level and dispatch warnings to the foreground
2485 * process *only*, each time the available page % drops.
2486 * - The pageout scan path. In this scenario, every other registered process is targeted in footprint order.
2488 * This scheme guarantees delivery to the foreground app, while providing for warnings to the remaining processes
2489 * driven by the pageout scan.
2492 MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): foreground %d; available %d, critical %d, pressure %d\n",
2493 target_foreground
, memorystatus_available_pages
, memorystatus_available_pages_critical
, memorystatus_available_pages_pressure
);
2497 pressure_changed
= memorystatus_update_pressure_locked(&pressured
);
2500 if (target_foreground
) {
2501 if (memorystatus_available_pages
!= memorystatus_last_foreground_pressure_pages
) {
2502 if (memorystatus_available_pages
< memorystatus_last_foreground_pressure_pages
) {
2505 memorystatus_last_foreground_pressure_pages
= memorystatus_available_pages
;
2510 } else if (pressure_changed
) {
2511 memorystatus_last_foreground_pressure_pages
= (unsigned int)-1;
2516 /* Target foreground processes if specified */
2518 if (target_foreground
) {
2519 MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): invoking vm_find_pressure_foreground_candidates()\n");
2520 vm_find_pressure_foreground_candidates();
2522 MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): invoking vm_find_pressure_candidate()\n");
2523 /* Defer to VM code. This can race with the foreground priority, but
2524 * it's preferable to holding onto locks for an extended period. */
2525 vm_find_pressure_candidate();
2529 /* Dispatch the global kevent to privileged listeners */
2530 if (pressure_changed
) {
2531 memorystatus_issue_pressure_kevent(pressured
);
2534 return KERN_SUCCESS
;
2538 memorystatus_send_pressure_note(pid_t pid
) {
2539 MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid
);
2540 return memorystatus_send_note(kMemorystatusPressureNote
, &pid
, sizeof(pid
));
2544 memorystatus_bg_pressure_eligible(proc_t p
) {
2545 boolean_t eligible
= FALSE
;
2549 MEMORYSTATUS_DEBUG(1, "memorystatus_bg_pressure_eligible: pid %d, state 0x%x\n", p
->p_pid
, p
->p_memstat_state
);
2551 /* Foreground processes have already been dealt with at this point, so just test for eligibility */
2552 if (!(p
->p_memstat_state
& (P_MEMSTAT_TERMINATED
| P_MEMSTAT_LOCKED
| P_MEMSTAT_SUSPENDED
| P_MEMSTAT_FROZEN
))) {
2562 memorystatus_is_foreground_locked(proc_t p
) {
2563 return ((p
->p_memstat_effectivepriority
== JETSAM_PRIORITY_FOREGROUND
) ||
2564 (p
->p_memstat_effectivepriority
== JETSAM_PRIORITY_FOREGROUND_SUPPORT
));
2567 #else /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */
2570 * Trigger levels to test the mechanism.
2571 * Can be used via a sysctl.
2573 #define TEST_LOW_MEMORY_TRIGGER_ONE 1
2574 #define TEST_LOW_MEMORY_TRIGGER_ALL 2
2575 #define TEST_PURGEABLE_TRIGGER_ONE 3
2576 #define TEST_PURGEABLE_TRIGGER_ALL 4
2577 #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE 5
2578 #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL 6
2580 boolean_t memorystatus_manual_testing_on
= FALSE
;
2581 vm_pressure_level_t memorystatus_manual_testing_level
= kVMPressureNormal
;
2583 extern struct knote
*
2584 vm_pressure_select_optimal_candidate_to_notify(struct klist
*, int);
2587 kern_return_t
vm_pressure_notification_without_levels(void);
2589 extern void vm_pressure_klist_lock(void);
2590 extern void vm_pressure_klist_unlock(void);
2592 extern void vm_reset_active_list(void);
2594 extern void delay(int);
2596 #define INTER_NOTIFICATION_DELAY (250000) /* .25 second */
2598 void memorystatus_on_pageout_scan_end(void) {
2605 * knote_pressure_level - to check if the knote is registered for this notification level.
2607 * task - task whose bits we'll be modifying
2609 * pressure_level_to_clear - if the task has been notified of this past level, clear that notification bit so that if/when we revert to that level, the task will be notified again.
2611 * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately.
2615 is_knote_registered_modify_task_pressure_bits(struct knote
*, int, task_t
, vm_pressure_level_t
, vm_pressure_level_t
);
2618 is_knote_registered_modify_task_pressure_bits(struct knote
*kn_max
, int knote_pressure_level
, task_t task
, vm_pressure_level_t pressure_level_to_clear
, vm_pressure_level_t pressure_level_to_set
)
2620 if (kn_max
->kn_sfflags
& knote_pressure_level
) {
2622 if (task_has_been_notified(task
, pressure_level_to_clear
) == TRUE
) {
2624 task_clear_has_been_notified(task
, pressure_level_to_clear
);
2627 task_mark_has_been_notified(task
, pressure_level_to_set
);
2634 extern kern_return_t
vm_pressure_notify_dispatch_vm_clients(void);
2637 memorystatus_update_vm_pressure(boolean_t target_best_process
)
2639 struct knote
*kn_max
= NULL
;
2640 pid_t target_pid
= -1;
2641 struct klist dispatch_klist
= { NULL
};
2642 proc_t target_proc
= PROC_NULL
;
2643 static vm_pressure_level_t level_snapshot
= kVMPressureNormal
;
2644 struct task
*task
= NULL
;
2645 boolean_t found_candidate
= FALSE
;
2650 * There is a race window here. But it's not clear
2651 * how much we benefit from having extra synchronization.
2653 level_snapshot
= memorystatus_vm_pressure_level
;
2655 memorystatus_klist_lock();
2656 kn_max
= vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist
, level_snapshot
);
2658 if (kn_max
== NULL
) {
2659 memorystatus_klist_unlock();
2662 * No more level-based clients to notify.
2663 * Try the non-level based notification clients.
2665 * However, these non-level clients don't understand
2666 * the "return-to-normal" notification.
2668 * So don't consider them for those notifications. Just
2673 if (level_snapshot
!= kVMPressureNormal
) {
2674 goto try_dispatch_vm_clients
;
2676 return KERN_FAILURE
;
2680 target_proc
= kn_max
->kn_kq
->kq_p
;
2683 if (target_proc
!= proc_ref_locked(target_proc
)) {
2684 target_proc
= PROC_NULL
;
2686 memorystatus_klist_unlock();
2690 memorystatus_klist_unlock();
2692 target_pid
= target_proc
->p_pid
;
2694 task
= (struct task
*)(target_proc
->task
);
2696 if (level_snapshot
!= kVMPressureNormal
) {
2698 if (level_snapshot
== kVMPressureWarning
|| level_snapshot
== kVMPressureUrgent
) {
2700 if (is_knote_registered_modify_task_pressure_bits(kn_max
, NOTE_MEMORYSTATUS_PRESSURE_WARN
, task
, kVMPressureCritical
, kVMPressureWarning
) == TRUE
) {
2701 found_candidate
= TRUE
;
2704 if (level_snapshot
== kVMPressureCritical
) {
2706 if (is_knote_registered_modify_task_pressure_bits(kn_max
, NOTE_MEMORYSTATUS_PRESSURE_CRITICAL
, task
, kVMPressureWarning
, kVMPressureCritical
) == TRUE
) {
2707 found_candidate
= TRUE
;
2712 if (kn_max
->kn_sfflags
& NOTE_MEMORYSTATUS_PRESSURE_NORMAL
) {
2714 task_clear_has_been_notified(task
, kVMPressureWarning
);
2715 task_clear_has_been_notified(task
, kVMPressureCritical
);
2717 found_candidate
= TRUE
;
2721 if (found_candidate
== FALSE
) {
2725 memorystatus_klist_lock();
2726 KNOTE_DETACH(&memorystatus_klist
, kn_max
);
2727 KNOTE_ATTACH(&dispatch_klist
, kn_max
);
2728 memorystatus_klist_unlock();
2730 KNOTE(&dispatch_klist
, (level_snapshot
!= kVMPressureNormal
) ? kMemorystatusPressure
: kMemorystatusNoPressure
);
2732 memorystatus_klist_lock();
2733 KNOTE_DETACH(&dispatch_klist
, kn_max
);
2734 KNOTE_ATTACH(&memorystatus_klist
, kn_max
);
2735 memorystatus_klist_unlock();
2737 microuptime(&target_proc
->vm_pressure_last_notify_tstamp
);
2738 proc_rele(target_proc
);
2740 if (target_best_process
== TRUE
) {
2744 try_dispatch_vm_clients
:
2745 if (level_snapshot
!= kVMPressureNormal
) {
2747 * Wake up idle-exit thread.
2748 * Targets one process per invocation.
2750 * TODO: memorystatus_idle_exit_from_VM should return FALSE once it's
2751 * done with all idle-exitable processes. Currently, we will exit this
2752 * loop when we are done with notification clients (level and non-level based)
2753 * but we may still have some idle-exitable processes around.
2756 memorystatus_idle_exit_from_VM();
2758 if ((vm_pressure_notify_dispatch_vm_clients() == KERN_FAILURE
) && (kn_max
== NULL
)) {
2760 * kn_max == NULL i.e. we didn't find any eligible clients for the level-based notifications
2762 * we have failed to find any eligible clients for the non-level based notifications too.
2766 return KERN_FAILURE
;
2770 if (memorystatus_manual_testing_on
== FALSE
) {
2771 delay(INTER_NOTIFICATION_DELAY
);
2775 return KERN_SUCCESS
;
2779 convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t
);
2782 convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t internal_pressure_level
)
2784 vm_pressure_level_t dispatch_level
= NOTE_MEMORYSTATUS_PRESSURE_NORMAL
;
2786 switch (internal_pressure_level
) {
2788 case kVMPressureNormal
:
2790 dispatch_level
= NOTE_MEMORYSTATUS_PRESSURE_NORMAL
;
2794 case kVMPressureWarning
:
2795 case kVMPressureUrgent
:
2797 dispatch_level
= NOTE_MEMORYSTATUS_PRESSURE_WARN
;
2801 case kVMPressureCritical
:
2803 dispatch_level
= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL
;
2811 return dispatch_level
;
2815 sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS
2817 #pragma unused(arg1, arg2, oidp)
2819 vm_pressure_level_t dispatch_level
= convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level
);
2821 return SYSCTL_OUT(req
, &dispatch_level
, sizeof(dispatch_level
));
2824 SYSCTL_PROC(_kern
, OID_AUTO
, memorystatus_vm_pressure_level
, CTLTYPE_INT
|CTLFLAG_RD
|CTLFLAG_LOCKED
,
2825 0, 0, &sysctl_memorystatus_vm_pressure_level
, "I", "");
2828 extern int memorystatus_purge_on_warning
;
2829 extern int memorystatus_purge_on_critical
;
2832 sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS
2834 #pragma unused(arg1, arg2)
2838 int pressure_level
= 0;
2839 int trigger_request
= 0;
2842 error
= sysctl_handle_int(oidp
, &level
, 0, req
);
2843 if (error
|| !req
->newptr
) {
2847 memorystatus_manual_testing_on
= TRUE
;
2849 trigger_request
= (level
>> 16) & 0xFFFF;
2850 pressure_level
= (level
& 0xFFFF);
2852 if (trigger_request
< TEST_LOW_MEMORY_TRIGGER_ONE
||
2853 trigger_request
> TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL
) {
2856 switch (pressure_level
) {
2857 case NOTE_MEMORYSTATUS_PRESSURE_NORMAL
:
2858 case NOTE_MEMORYSTATUS_PRESSURE_WARN
:
2859 case NOTE_MEMORYSTATUS_PRESSURE_CRITICAL
:
2866 * The pressure level is being set from user-space.
2867 * And user-space uses the constants in sys/event.h
2868 * So we translate those events to our internal levels here.
2870 if (pressure_level
== NOTE_MEMORYSTATUS_PRESSURE_NORMAL
) {
2872 memorystatus_manual_testing_level
= kVMPressureNormal
;
2875 } else if (pressure_level
== NOTE_MEMORYSTATUS_PRESSURE_WARN
) {
2877 memorystatus_manual_testing_level
= kVMPressureWarning
;
2878 force_purge
= memorystatus_purge_on_warning
;
2880 } else if (pressure_level
== NOTE_MEMORYSTATUS_PRESSURE_CRITICAL
) {
2882 memorystatus_manual_testing_level
= kVMPressureCritical
;
2883 force_purge
= memorystatus_purge_on_critical
;
2886 memorystatus_vm_pressure_level
= memorystatus_manual_testing_level
;
2888 /* purge according to the new pressure level */
2889 switch (trigger_request
) {
2890 case TEST_PURGEABLE_TRIGGER_ONE
:
2891 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE
:
2892 if (force_purge
== 0) {
2893 /* no purging requested */
2896 vm_purgeable_object_purge_one_unlocked(force_purge
);
2898 case TEST_PURGEABLE_TRIGGER_ALL
:
2899 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL
:
2900 if (force_purge
== 0) {
2901 /* no purging requested */
2904 while (vm_purgeable_object_purge_one_unlocked(force_purge
));
2908 if ((trigger_request
== TEST_LOW_MEMORY_TRIGGER_ONE
) ||
2909 (trigger_request
== TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE
)) {
2911 memorystatus_update_vm_pressure(TRUE
);
2914 if ((trigger_request
== TEST_LOW_MEMORY_TRIGGER_ALL
) ||
2915 (trigger_request
== TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL
)) {
2917 while (memorystatus_update_vm_pressure(FALSE
) == KERN_SUCCESS
) {
2922 if (pressure_level
== NOTE_MEMORYSTATUS_PRESSURE_NORMAL
) {
2923 memorystatus_manual_testing_on
= FALSE
;
2925 vm_pressure_klist_lock();
2926 vm_reset_active_list();
2927 vm_pressure_klist_unlock();
2930 vm_pressure_klist_lock();
2931 vm_pressure_notification_without_levels();
2932 vm_pressure_klist_unlock();
2938 SYSCTL_PROC(_kern
, OID_AUTO
, memorypressure_manual_trigger
, CTLTYPE_INT
|CTLFLAG_WR
|CTLFLAG_LOCKED
|CTLFLAG_MASKED
,
2939 0, 0, &sysctl_memorypressure_manual_trigger
, "I", "");
2942 extern int memorystatus_purge_on_warning
;
2943 extern int memorystatus_purge_on_urgent
;
2944 extern int memorystatus_purge_on_critical
;
2946 SYSCTL_INT(_kern
, OID_AUTO
, memorystatus_purge_on_warning
, CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_purge_on_warning
, 0, "");
2947 SYSCTL_INT(_kern
, OID_AUTO
, memorystatus_purge_on_urgent
, CTLTYPE_INT
|CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_purge_on_urgent
, 0, "");
2948 SYSCTL_INT(_kern
, OID_AUTO
, memorystatus_purge_on_critical
, CTLTYPE_INT
|CTLFLAG_RW
|CTLFLAG_LOCKED
, &memorystatus_purge_on_critical
, 0, "");
2951 #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */
2953 /* Return both allocated and actual size, since there's a race between allocation and list compilation */
2955 memorystatus_get_priority_list(memorystatus_priority_entry_t
**list_ptr
, size_t *buffer_size
, size_t *list_size
, boolean_t size_only
)
2957 uint32_t list_count
, i
= 0;
2958 memorystatus_priority_entry_t
*list_entry
;
2961 list_count
= memorystatus_list_count
;
2962 *list_size
= sizeof(memorystatus_priority_entry_t
) * list_count
;
2964 /* Just a size check? */
2969 /* Otherwise, validate the size of the buffer */
2970 if (*buffer_size
< *list_size
) {
2974 *list_ptr
= (memorystatus_priority_entry_t
*)kalloc(*list_size
);
2979 memset(*list_ptr
, 0, *list_size
);
2981 *buffer_size
= *list_size
;
2984 list_entry
= *list_ptr
;
2988 p
= memorystatus_get_first_proc_locked(&i
, TRUE
);
2989 while (p
&& (*list_size
< *buffer_size
)) {
2990 list_entry
->pid
= p
->p_pid
;
2991 list_entry
->priority
= p
->p_memstat_effectivepriority
;
2992 list_entry
->user_data
= p
->p_memstat_userdata
;
2994 if (((p
->p_memstat_state
& P_MEMSTAT_MEMLIMIT_BACKGROUND
) && (p
->p_memstat_effectivepriority
>= JETSAM_PRIORITY_FOREGROUND
)) ||
2995 (p
->p_memstat_memlimit
<= 0)) {
2996 task_get_phys_footprint_limit(p
->task
, &list_entry
->limit
);
2998 list_entry
->limit
= p
->p_memstat_memlimit
;
3001 task_get_phys_footprint_limit(p
->task
, &list_entry
->limit
);
3003 list_entry
->state
= memorystatus_build_state(p
);
3006 *list_size
+= sizeof(memorystatus_priority_entry_t
);
3008 p
= memorystatus_get_next_proc_locked(&i
, p
, TRUE
);
3013 MEMORYSTATUS_DEBUG(1, "memorystatus_get_priority_list: returning %lu for size\n", (unsigned long)*list_size
);
3019 memorystatus_cmd_get_priority_list(user_addr_t buffer
, size_t buffer_size
, int32_t *retval
) {
3021 boolean_t size_only
;
3022 memorystatus_priority_entry_t
*list
= NULL
;
3025 size_only
= ((buffer
== USER_ADDR_NULL
) ? TRUE
: FALSE
);
3027 error
= memorystatus_get_priority_list(&list
, &buffer_size
, &list_size
, size_only
);
3033 error
= copyout(list
, buffer
, list_size
);
3037 *retval
= list_size
;
3042 kfree(list
, buffer_size
);
3051 memorystatus_clear_errors(void)
3056 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_CLEAR_ERRORS
) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
3060 p
= memorystatus_get_first_proc_locked(&i
, TRUE
);
3062 if (p
->p_memstat_state
& P_MEMSTAT_ERROR
) {
3063 p
->p_memstat_state
&= ~P_MEMSTAT_ERROR
;
3065 p
= memorystatus_get_next_proc_locked(&i
, p
, TRUE
);
3070 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT
, BSD_MEMSTAT_CLEAR_ERRORS
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
3074 memorystatus_update_levels_locked(boolean_t critical_only
) {
3075 memorystatus_available_pages_critical
= memorystatus_available_pages_critical_base
;
3078 // If there's an entry in the first bucket, we have idle processes
3079 memstat_bucket_t
*first_bucket
= &memstat_bucket
[JETSAM_PRIORITY_IDLE
];
3080 if (first_bucket
->count
) {
3081 memorystatus_available_pages_critical
+= memorystatus_available_pages_critical_idle_offset
;
3085 #if DEBUG || DEVELOPMENT
3086 if (memorystatus_jetsam_policy
& kPolicyDiagnoseActive
) {
3087 memorystatus_available_pages_critical
+= memorystatus_jetsam_policy_offset_pages_diagnostic
;
3091 if (critical_only
) {
3095 #if VM_PRESSURE_EVENTS
3096 memorystatus_available_pages_pressure
= (pressure_threshold_percentage
/ delta_percentage
) * memorystatus_delta
;
3097 #if DEBUG || DEVELOPMENT
3098 if (memorystatus_jetsam_policy
& kPolicyDiagnoseActive
) {
3099 memorystatus_available_pages_pressure
+= memorystatus_jetsam_policy_offset_pages_diagnostic
;
3106 memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t
**snapshot
, size_t *snapshot_size
, boolean_t size_only
) {
3107 size_t input_size
= *snapshot_size
;
3109 if (memorystatus_jetsam_snapshot_count
> 0) {
3110 *snapshot_size
= sizeof(memorystatus_jetsam_snapshot_t
) + (sizeof(memorystatus_jetsam_snapshot_entry_t
) * (memorystatus_jetsam_snapshot_count
));
3119 if (input_size
< *snapshot_size
) {
3123 *snapshot
= memorystatus_jetsam_snapshot
;
3125 MEMORYSTATUS_DEBUG(1, "memorystatus_snapshot: returning %ld for size\n", (long)*snapshot_size
);
3131 memorystatus_cmd_get_jetsam_snapshot(user_addr_t buffer
, size_t buffer_size
, int32_t *retval
) {
3133 boolean_t size_only
;
3134 memorystatus_jetsam_snapshot_t
*snapshot
;
3136 size_only
= ((buffer
== USER_ADDR_NULL
) ? TRUE
: FALSE
);
3138 error
= memorystatus_get_snapshot(&snapshot
, &buffer_size
, size_only
);
3143 /* Copy out and reset */
3145 if ((error
= copyout(snapshot
, buffer
, buffer_size
)) == 0) {
3146 snapshot
->entry_count
= memorystatus_jetsam_snapshot_count
= 0;
3151 *retval
= buffer_size
;
3158 memorystatus_cmd_set_priority_properties(pid_t pid
, user_addr_t buffer
, size_t buffer_size
, __unused
int32_t *retval
) {
3159 const uint32_t MAX_ENTRY_COUNT
= 2; /* Cap the entry count */
3163 uint32_t entry_count
;
3164 memorystatus_priority_properties_t
*entries
;
3166 /* Validate inputs */
3167 if ((pid
== 0) || (buffer
== USER_ADDR_NULL
) || (buffer_size
== 0)) {
3171 /* Make sure the buffer is a multiple of the entry size, and that an excessive size isn't specified */
3172 entry_count
= (buffer_size
/ sizeof(memorystatus_priority_properties_t
));
3173 if (((buffer_size
% sizeof(memorystatus_priority_properties_t
)) != 0) || (entry_count
> MAX_ENTRY_COUNT
)) {
3177 entries
= (memorystatus_priority_properties_t
*)kalloc(buffer_size
);
3179 error
= copyin(buffer
, entries
, buffer_size
);
3181 for (i
= 0; i
< entry_count
; i
++) {
3194 if (p
->p_memstat_state
& P_MEMSTAT_INTERNAL
) {
3200 error
= memorystatus_update(p
, entries
[i
].priority
, entries
[i
].user_data
, FALSE
, FALSE
, 0, 0);
3204 kfree(entries
, buffer_size
);
3210 memorystatus_cmd_get_pressure_status(int32_t *retval
) {
3213 /* Need privilege for check */
3214 error
= priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE
, 0);
3219 /* Inherently racy, so it's not worth taking a lock here */
3220 *retval
= (kVMPressureNormal
!= memorystatus_vm_pressure_level
) ? 1 : 0;
3226 memorystatus_cmd_set_jetsam_high_water_mark(pid_t pid
, int32_t high_water_mark
, __unused
int32_t *retval
) {
3229 proc_t p
= proc_find(pid
);
3234 if (high_water_mark
<= 0) {
3235 high_water_mark
= -1; /* Disable */
3240 if (p
->p_memstat_state
& P_MEMSTAT_INTERNAL
) {
3245 p
->p_memstat_memlimit
= high_water_mark
;
3246 if (memorystatus_highwater_enabled
) {
3247 if (p
->p_memstat_state
& P_MEMSTAT_MEMLIMIT_BACKGROUND
) {
3248 memorystatus_update_priority_locked(p
, p
->p_memstat_effectivepriority
);
3250 error
= (task_set_phys_footprint_limit_internal(p
->task
, high_water_mark
, NULL
, TRUE
) == 0) ? 0 : EINVAL
;
3261 #endif /* CONFIG_JETSAM */
3264 memorystatus_control(struct proc
*p __unused
, struct memorystatus_control_args
*args
, int *ret
) {
3271 /* Root only for now */
3272 if (!kauth_cred_issuser(kauth_cred_get())) {
3278 if (args
->buffersize
> MEMORYSTATUS_BUFFERSIZE_MAX
) {
3283 switch (args
->command
) {
3284 case MEMORYSTATUS_CMD_GET_PRIORITY_LIST
:
3285 error
= memorystatus_cmd_get_priority_list(args
->buffer
, args
->buffersize
, ret
);
3288 case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES
:
3289 error
= memorystatus_cmd_set_priority_properties(args
->pid
, args
->buffer
, args
->buffersize
, ret
);
3291 case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT
:
3292 error
= memorystatus_cmd_get_jetsam_snapshot(args
->buffer
, args
->buffersize
, ret
);
3294 case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS
:
3295 error
= memorystatus_cmd_get_pressure_status(ret
);
3297 case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK
:
3298 /* TODO: deprecate. Keeping it in as there's no pid based way to set the ledger limit right now. */
3299 error
= memorystatus_cmd_set_jetsam_high_water_mark(args
->pid
, (int32_t)args
->flags
, ret
);
3302 #if DEVELOPMENT || DEBUG
3303 case MEMORYSTATUS_CMD_TEST_JETSAM
:
3304 error
= memorystatus_kill_process_sync(args
->pid
, kMemorystatusKilled
) ? 0 : EINVAL
;
3306 case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS
:
3307 error
= memorystatus_cmd_set_panic_bits(args
->buffer
, args
->buffersize
);
3309 #endif /* DEVELOPMENT || DEBUG */
3310 #endif /* CONFIG_JETSAM */
3321 filt_memorystatusattach(struct knote
*kn
)
3323 kn
->kn_flags
|= EV_CLEAR
;
3324 return memorystatus_knote_register(kn
);
3328 filt_memorystatusdetach(struct knote
*kn
)
3330 memorystatus_knote_unregister(kn
);
3334 filt_memorystatus(struct knote
*kn __unused
, long hint
)
3338 case kMemorystatusNoPressure
:
3339 if (kn
->kn_sfflags
& NOTE_MEMORYSTATUS_PRESSURE_NORMAL
) {
3340 kn
->kn_fflags
|= NOTE_MEMORYSTATUS_PRESSURE_NORMAL
;
3343 case kMemorystatusPressure
:
3344 if (memorystatus_vm_pressure_level
== kVMPressureWarning
|| memorystatus_vm_pressure_level
== kVMPressureUrgent
) {
3345 if (kn
->kn_sfflags
& NOTE_MEMORYSTATUS_PRESSURE_WARN
) {
3346 kn
->kn_fflags
|= NOTE_MEMORYSTATUS_PRESSURE_WARN
;
3348 } else if (memorystatus_vm_pressure_level
== kVMPressureCritical
) {
3350 if (kn
->kn_sfflags
& NOTE_MEMORYSTATUS_PRESSURE_CRITICAL
) {
3351 kn
->kn_fflags
|= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL
;
3360 return (kn
->kn_fflags
!= 0);
3364 memorystatus_klist_lock(void) {
3365 lck_mtx_lock(&memorystatus_klist_mutex
);
3369 memorystatus_klist_unlock(void) {
3370 lck_mtx_unlock(&memorystatus_klist_mutex
);
3374 memorystatus_kevent_init(lck_grp_t
*grp
, lck_attr_t
*attr
) {
3375 lck_mtx_init(&memorystatus_klist_mutex
, grp
, attr
);
3376 klist_init(&memorystatus_klist
);
3380 memorystatus_knote_register(struct knote
*kn
) {
3383 memorystatus_klist_lock();
3385 if (kn
->kn_sfflags
& (NOTE_MEMORYSTATUS_PRESSURE_NORMAL
| NOTE_MEMORYSTATUS_PRESSURE_WARN
| NOTE_MEMORYSTATUS_PRESSURE_CRITICAL
)) {
3387 #if CONFIG_JETSAM && VM_PRESSURE_EVENTS
3388 /* Need a privilege to register */
3389 error
= priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE
, 0);
3390 #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */
3393 KNOTE_ATTACH(&memorystatus_klist
, kn
);
3399 memorystatus_klist_unlock();
3405 memorystatus_knote_unregister(struct knote
*kn __unused
) {
3406 memorystatus_klist_lock();
3407 KNOTE_DETACH(&memorystatus_klist
, kn
);
3408 memorystatus_klist_unlock();
3411 #if CONFIG_JETSAM && VM_PRESSURE_EVENTS
3413 memorystatus_issue_pressure_kevent(boolean_t pressured
) {
3414 memorystatus_klist_lock();
3415 KNOTE(&memorystatus_klist
, pressured
? kMemorystatusPressure
: kMemorystatusNoPressure
);
3416 memorystatus_klist_unlock();
3420 #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */