2 * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <libkern/libkern.h>
30 #include <mach/mach_types.h>
31 #include <mach/task.h>
32 #include <sys/proc_internal.h>
33 #include <sys/event.h>
34 #include <sys/eventvar.h>
35 #include <kern/locks.h>
36 #include <sys/queue.h>
37 #include <kern/vm_pressure.h>
38 #include <sys/malloc.h>
39 #include <sys/errno.h>
40 #include <sys/systm.h>
41 #include <sys/types.h>
42 #include <sys/sysctl.h>
43 #include <kern/assert.h>
44 #include <kern/task.h>
45 #include <vm/vm_pageout.h>
47 #include <kern/task.h>
49 #if CONFIG_MEMORYSTATUS
50 #include <sys/kern_memorystatus.h>
54 * This value is the threshold that a process must meet to be considered for scavenging.
56 #define VM_PRESSURE_MINIMUM_RSIZE 10 /* MB */
58 #define VM_PRESSURE_NOTIFY_WAIT_PERIOD 10000 /* milliseconds */
60 void vm_pressure_klist_lock(void);
61 void vm_pressure_klist_unlock(void);
63 static void vm_dispatch_memory_pressure(void);
64 void vm_reset_active_list(void);
66 #if CONFIG_MEMORYSTATUS
67 static kern_return_t
vm_try_pressure_candidates(boolean_t target_foreground_process
);
70 static lck_mtx_t vm_pressure_klist_mutex
;
72 struct klist vm_pressure_klist
;
73 struct klist vm_pressure_klist_dormant
;
76 #define VM_PRESSURE_DEBUG(cond, format, ...) \
78 if (cond) { printf(format, ##__VA_ARGS__); } \
81 #define VM_PRESSURE_DEBUG(cond, format, ...)
84 void vm_pressure_init(lck_grp_t
*grp
, lck_attr_t
*attr
) {
85 lck_mtx_init(&vm_pressure_klist_mutex
, grp
, attr
);
88 void vm_pressure_klist_lock(void) {
89 lck_mtx_lock(&vm_pressure_klist_mutex
);
92 void vm_pressure_klist_unlock(void) {
93 lck_mtx_unlock(&vm_pressure_klist_mutex
);
96 int vm_knote_register(struct knote
*kn
) {
99 vm_pressure_klist_lock();
101 if ((kn
->kn_sfflags
) & (NOTE_VM_PRESSURE
)) {
102 KNOTE_ATTACH(&vm_pressure_klist
, kn
);
107 vm_pressure_klist_unlock();
112 void vm_knote_unregister(struct knote
*kn
) {
113 struct knote
*kn_temp
;
115 vm_pressure_klist_lock();
117 VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d cancelling pressure notification\n", kn
->kn_kq
->kq_p
->p_pid
);
119 SLIST_FOREACH(kn_temp
, &vm_pressure_klist
, kn_selnext
) {
121 KNOTE_DETACH(&vm_pressure_klist
, kn
);
122 vm_pressure_klist_unlock();
127 SLIST_FOREACH(kn_temp
, &vm_pressure_klist_dormant
, kn_selnext
) {
129 KNOTE_DETACH(&vm_pressure_klist_dormant
, kn
);
130 vm_pressure_klist_unlock();
135 vm_pressure_klist_unlock();
138 void vm_pressure_proc_cleanup(proc_t p
)
140 struct knote
*kn
= NULL
;
142 vm_pressure_klist_lock();
144 VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d exiting pressure notification\n", p
->p_pid
);
146 SLIST_FOREACH(kn
, &vm_pressure_klist
, kn_selnext
) {
147 if (kn
->kn_kq
->kq_p
== p
) {
148 KNOTE_DETACH(&vm_pressure_klist
, kn
);
149 vm_pressure_klist_unlock();
154 SLIST_FOREACH(kn
, &vm_pressure_klist_dormant
, kn_selnext
) {
155 if (kn
->kn_kq
->kq_p
== p
) {
156 KNOTE_DETACH(&vm_pressure_klist_dormant
, kn
);
157 vm_pressure_klist_unlock();
162 vm_pressure_klist_unlock();
166 * Used by the vm_pressure_thread which is
167 * signalled from within vm_pageout_scan().
169 void consider_vm_pressure_events(void)
171 vm_dispatch_memory_pressure();
174 #if CONFIG_MEMORYSTATUS
176 /* Jetsam aware version. Called with lock held */
178 struct knote
*vm_find_knote_from_pid(pid_t
, struct klist
*);
180 struct knote
*vm_find_knote_from_pid(pid_t pid
, struct klist
*list
) {
181 struct knote
*kn
= NULL
;
183 SLIST_FOREACH(kn
, list
, kn_selnext
) {
188 current_pid
= p
->p_pid
;
190 if (current_pid
== pid
) {
198 int vm_dispatch_pressure_note_to_pid(pid_t pid
, boolean_t locked
) {
202 VM_PRESSURE_DEBUG(1, "vm_dispatch_pressure_note_to_pid(): pid %d\n", pid
);
205 vm_pressure_klist_lock();
209 * Because we're specifically targeting a process here, we don't care
210 * if a warning has already been sent and it's moved to the dormant
211 * list; check that too.
213 kn
= vm_find_knote_from_pid(pid
, &vm_pressure_klist
);
215 KNOTE(&vm_pressure_klist
, pid
);
218 kn
= vm_find_knote_from_pid(pid
, &vm_pressure_klist_dormant
);
220 KNOTE(&vm_pressure_klist_dormant
, pid
);
226 vm_pressure_klist_unlock();
232 void vm_find_pressure_foreground_candidates(void)
234 struct knote
*kn
, *kn_tmp
;
235 struct klist dispatch_klist
= { NULL
};
237 vm_pressure_klist_lock();
240 /* Find the foreground processes. */
241 SLIST_FOREACH_SAFE(kn
, &vm_pressure_klist
, kn_selnext
, kn_tmp
) {
242 proc_t p
= kn
->kn_kq
->kq_p
;
244 if (memorystatus_is_foreground_locked(p
)) {
245 KNOTE_DETACH(&vm_pressure_klist
, kn
);
246 KNOTE_ATTACH(&dispatch_klist
, kn
);
250 SLIST_FOREACH_SAFE(kn
, &vm_pressure_klist_dormant
, kn_selnext
, kn_tmp
) {
251 proc_t p
= kn
->kn_kq
->kq_p
;
253 if (memorystatus_is_foreground_locked(p
)) {
254 KNOTE_DETACH(&vm_pressure_klist_dormant
, kn
);
255 KNOTE_ATTACH(&dispatch_klist
, kn
);
261 /* Dispatch pressure notifications accordingly */
262 SLIST_FOREACH_SAFE(kn
, &dispatch_klist
, kn_selnext
, kn_tmp
) {
263 proc_t p
= kn
->kn_kq
->kq_p
;
266 if (p
!= proc_ref_locked(p
)) {
268 KNOTE_DETACH(&dispatch_klist
, kn
);
269 KNOTE_ATTACH(&vm_pressure_klist_dormant
, kn
);
274 VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d\n", kn
->kn_kq
->kq_p
->p_pid
);
275 KNOTE(&dispatch_klist
, p
->p_pid
);
276 KNOTE_DETACH(&dispatch_klist
, kn
);
277 KNOTE_ATTACH(&vm_pressure_klist_dormant
, kn
);
278 microuptime(&p
->vm_pressure_last_notify_tstamp
);
279 memorystatus_send_pressure_note(p
->p_pid
);
283 vm_pressure_klist_unlock();
286 void vm_find_pressure_candidate(void)
288 struct knote
*kn
= NULL
, *kn_max
= NULL
;
289 unsigned int resident_max
= 0;
290 pid_t target_pid
= -1;
291 struct klist dispatch_klist
= { NULL
};
292 struct timeval curr_tstamp
= {0, 0};
293 int elapsed_msecs
= 0;
294 proc_t target_proc
= PROC_NULL
;
295 kern_return_t kr
= KERN_SUCCESS
;
297 microuptime(&curr_tstamp
);
299 vm_pressure_klist_lock();
301 SLIST_FOREACH(kn
, &vm_pressure_klist
, kn_selnext
) {\
302 struct mach_task_basic_info basic_info
;
303 mach_msg_type_number_t size
= MACH_TASK_BASIC_INFO_COUNT
;
304 unsigned int resident_size
= 0;
305 proc_t p
= PROC_NULL
;
306 struct task
* t
= TASK_NULL
;
310 if (p
!= proc_ref_locked(p
)) {
317 t
= (struct task
*)(p
->task
);
319 timevalsub(&curr_tstamp
, &p
->vm_pressure_last_notify_tstamp
);
320 elapsed_msecs
= curr_tstamp
.tv_sec
* 1000 + curr_tstamp
.tv_usec
/ 1000;
322 if (elapsed_msecs
< VM_PRESSURE_NOTIFY_WAIT_PERIOD
) {
327 if (!memorystatus_bg_pressure_eligible(p
)) {
328 VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p
->p_pid
);
333 if( ( kr
= task_info(t
, MACH_TASK_BASIC_INFO
, (task_info_t
)(&basic_info
), &size
)) != KERN_SUCCESS
) {
334 VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed\n", p
->p_pid
);
340 * We don't want a small process to block large processes from
341 * being notified again. <rdar://problem/7955532>
343 resident_size
= (basic_info
.resident_size
)/(1024 * 1024);
344 if (resident_size
>= VM_PRESSURE_MINIMUM_RSIZE
) {
345 if (resident_size
> resident_max
) {
346 resident_max
= resident_size
;
348 target_pid
= p
->p_pid
;
352 /* There was no candidate with enough resident memory to scavenge */
353 VM_PRESSURE_DEBUG(1, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p
->p_pid
, resident_size
);
358 if (kn_max
== NULL
|| target_pid
== -1) {
359 VM_PRESSURE_DEBUG(1, "[vm_pressure] - no target found!\n");
363 VM_DEBUG_CONSTANT_EVENT(vm_pressure_event
, VM_PRESSURE_EVENT
, DBG_FUNC_NONE
, target_pid
, resident_max
, 0, 0);
364 VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max
->kn_kq
->kq_p
->p_pid
, resident_max
);
366 KNOTE_DETACH(&vm_pressure_klist
, kn_max
);
368 target_proc
= proc_find(target_pid
);
369 if (target_proc
!= PROC_NULL
) {
370 KNOTE_ATTACH(&dispatch_klist
, kn_max
);
371 KNOTE(&dispatch_klist
, target_pid
);
372 KNOTE_ATTACH(&vm_pressure_klist_dormant
, kn_max
);
373 memorystatus_send_pressure_note(target_pid
);
374 microuptime(&target_proc
->vm_pressure_last_notify_tstamp
);
375 proc_rele(target_proc
);
379 vm_pressure_klist_unlock();
381 #endif /* CONFIG_MEMORYSTATUS */
385 vm_pressure_select_optimal_candidate_to_notify(struct klist
*candidate_list
, int level
, boolean_t target_foreground_process
);
387 kern_return_t
vm_pressure_notification_without_levels(boolean_t target_foreground_process
);
388 kern_return_t
vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process
);
391 vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process
)
393 vm_pressure_klist_lock();
395 if (SLIST_EMPTY(&vm_pressure_klist
)) {
396 vm_reset_active_list();
399 if (!SLIST_EMPTY(&vm_pressure_klist
)) {
401 VM_PRESSURE_DEBUG(1, "[vm_pressure] vm_dispatch_memory_pressure\n");
403 if (KERN_SUCCESS
== vm_try_pressure_candidates(target_foreground_process
)) {
404 vm_pressure_klist_unlock();
409 VM_PRESSURE_DEBUG(1, "[vm_pressure] could not find suitable event candidate\n");
411 vm_pressure_klist_unlock();
416 static void vm_dispatch_memory_pressure(void)
418 memorystatus_update_vm_pressure(FALSE
);
421 extern vm_pressure_level_t
422 convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t
);
425 vm_pressure_select_optimal_candidate_to_notify(struct klist
*candidate_list
, int level
, boolean_t target_foreground_process
)
427 struct knote
*kn
= NULL
, *kn_max
= NULL
;
428 unsigned int resident_max
= 0;
429 struct timeval curr_tstamp
= {0, 0};
430 int elapsed_msecs
= 0;
431 int selected_task_importance
= 0;
432 static int pressure_snapshot
= -1;
433 boolean_t pressure_increase
= FALSE
;
437 if (pressure_snapshot
== -1) {
441 pressure_snapshot
= level
;
442 pressure_increase
= TRUE
;
445 if (level
>= pressure_snapshot
) {
446 pressure_increase
= TRUE
;
448 pressure_increase
= FALSE
;
451 pressure_snapshot
= level
;
455 if ((level
> 0) && (pressure_increase
) == TRUE
) {
457 * We'll start by considering the largest
458 * unimportant task in our list.
460 selected_task_importance
= INT_MAX
;
463 * We'll start by considering the largest
464 * important task in our list.
466 selected_task_importance
= 0;
469 microuptime(&curr_tstamp
);
471 SLIST_FOREACH(kn
, candidate_list
, kn_selnext
) {
473 unsigned int resident_size
= 0;
474 proc_t p
= PROC_NULL
;
475 struct task
* t
= TASK_NULL
;
476 int curr_task_importance
= 0;
477 boolean_t consider_knote
= FALSE
;
478 boolean_t privileged_listener
= FALSE
;
482 if (p
!= proc_ref_locked(p
)) {
489 #if CONFIG_MEMORYSTATUS
490 if (target_foreground_process
== TRUE
&& !memorystatus_is_foreground_locked(p
)) {
492 * Skip process not marked foreground.
497 #endif /* CONFIG_MEMORYSTATUS */
499 t
= (struct task
*)(p
->task
);
501 timevalsub(&curr_tstamp
, &p
->vm_pressure_last_notify_tstamp
);
502 elapsed_msecs
= curr_tstamp
.tv_sec
* 1000 + curr_tstamp
.tv_usec
/ 1000;
504 if ((level
== -1) && (elapsed_msecs
< VM_PRESSURE_NOTIFY_WAIT_PERIOD
)) {
511 * For the level based notifications, check and see if this knote is
512 * registered for the current level.
514 vm_pressure_level_t dispatch_level
= convert_internal_pressure_level_to_dispatch_level(level
);
516 if ((kn
->kn_sfflags
& dispatch_level
) == 0) {
522 #if CONFIG_MEMORYSTATUS
523 if (target_foreground_process
== FALSE
&& !memorystatus_bg_pressure_eligible(p
)) {
524 VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p
->p_pid
);
528 #endif /* CONFIG_MEMORYSTATUS */
530 curr_task_importance
= task_importance_estimate(t
);
533 * Privileged listeners are only considered in the multi-level pressure scheme
534 * AND only if the pressure is increasing.
538 if (task_has_been_notified(t
, level
) == FALSE
) {
541 * Is this a privileged listener?
543 if (task_low_mem_privileged_listener(t
, FALSE
, &privileged_listener
) == 0) {
545 if (privileged_listener
) {
555 } else if (level
== 0) {
558 * Task wasn't notified when the pressure was increasing and so
559 * no need to notify it that the pressure is decreasing.
561 if ((task_has_been_notified(t
, kVMPressureWarning
) == FALSE
) && (task_has_been_notified(t
, kVMPressureCritical
) == FALSE
)) {
568 * We don't want a small process to block large processes from
569 * being notified again. <rdar://problem/7955532>
571 resident_size
= (get_task_phys_footprint(t
))/(1024*1024ULL); //(MB);
573 if (resident_size
>= VM_PRESSURE_MINIMUM_RSIZE
) {
577 * Warning or Critical Pressure.
579 if (pressure_increase
) {
580 if ((curr_task_importance
< selected_task_importance
) ||
581 ((curr_task_importance
== selected_task_importance
) && (resident_size
> resident_max
))) {
584 * We have found a candidate process which is:
585 * a) at a lower importance than the current selected process
587 * b) has importance equal to that of the current selected process but is larger
590 consider_knote
= TRUE
;
593 if ((curr_task_importance
> selected_task_importance
) ||
594 ((curr_task_importance
== selected_task_importance
) && (resident_size
> resident_max
))) {
597 * We have found a candidate process which is:
598 * a) at a higher importance than the current selected process
600 * b) has importance equal to that of the current selected process but is larger
603 consider_knote
= TRUE
;
606 } else if (level
== 0) {
608 * Pressure back to normal.
610 if ((curr_task_importance
> selected_task_importance
) ||
611 ((curr_task_importance
== selected_task_importance
) && (resident_size
> resident_max
))) {
613 consider_knote
= TRUE
;
615 } else if (level
== -1) {
618 * Simple (importance and level)-free behavior based solely on RSIZE.
620 if (resident_size
> resident_max
) {
621 consider_knote
= TRUE
;
626 if (consider_knote
) {
627 resident_max
= resident_size
;
629 selected_task_importance
= curr_task_importance
;
630 consider_knote
= FALSE
; /* reset for the next candidate */
633 /* There was no candidate with enough resident memory to scavenge */
634 VM_PRESSURE_DEBUG(0, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p
->p_pid
, resident_size
);
641 VM_DEBUG_CONSTANT_EVENT(vm_pressure_event
, VM_PRESSURE_EVENT
, DBG_FUNC_NONE
, kn_max
->kn_kq
->kq_p
->p_pid
, resident_max
, 0, 0);
642 VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max
->kn_kq
->kq_p
->p_pid
, resident_max
);
649 * vm_pressure_klist_lock is held for this routine.
651 kern_return_t
vm_pressure_notification_without_levels(boolean_t target_foreground_process
)
653 struct knote
*kn_max
= NULL
;
654 pid_t target_pid
= -1;
655 struct klist dispatch_klist
= { NULL
};
656 proc_t target_proc
= PROC_NULL
;
657 struct klist
*candidate_list
= NULL
;
659 candidate_list
= &vm_pressure_klist
;
661 kn_max
= vm_pressure_select_optimal_candidate_to_notify(candidate_list
, -1, target_foreground_process
);
663 if (kn_max
== NULL
) {
664 if (target_foreground_process
) {
666 * Doesn't matter if the process had been notified earlier on.
667 * This is a very specific request. Deliver it.
669 candidate_list
= &vm_pressure_klist_dormant
;
670 kn_max
= vm_pressure_select_optimal_candidate_to_notify(candidate_list
, -1, target_foreground_process
);
673 if (kn_max
== NULL
) {
678 target_proc
= kn_max
->kn_kq
->kq_p
;
680 KNOTE_DETACH(candidate_list
, kn_max
);
682 if (target_proc
!= PROC_NULL
) {
684 target_pid
= target_proc
->p_pid
;
686 memoryshot(VM_PRESSURE_EVENT
, DBG_FUNC_NONE
);
688 KNOTE_ATTACH(&dispatch_klist
, kn_max
);
689 KNOTE(&dispatch_klist
, target_pid
);
690 KNOTE_ATTACH(&vm_pressure_klist_dormant
, kn_max
);
692 #if CONFIG_MEMORYSTATUS
693 memorystatus_send_pressure_note(target_pid
);
694 #endif /* CONFIG_MEMORYSTATUS */
696 microuptime(&target_proc
->vm_pressure_last_notify_tstamp
);
702 static kern_return_t
vm_try_pressure_candidates(boolean_t target_foreground_process
)
705 * This takes care of candidates that use NOTE_VM_PRESSURE.
706 * It's a notification without indication of the level
707 * of memory pressure.
709 return (vm_pressure_notification_without_levels(target_foreground_process
));
713 * Remove all elements from the dormant list and place them on the active list.
714 * Called with klist lock held.
716 void vm_reset_active_list(void) {
717 /* Re-charge the main list from the dormant list if possible */
718 if (!SLIST_EMPTY(&vm_pressure_klist_dormant
)) {
721 VM_PRESSURE_DEBUG(1, "[vm_pressure] recharging main list from dormant list\n");
723 while (!SLIST_EMPTY(&vm_pressure_klist_dormant
)) {
724 kn
= SLIST_FIRST(&vm_pressure_klist_dormant
);
725 SLIST_REMOVE_HEAD(&vm_pressure_klist_dormant
, kn_selnext
);
726 SLIST_INSERT_HEAD(&vm_pressure_klist
, kn
, kn_selnext
);