X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/6d2010ae8f7a6078e10b361c6962983bab233e0f..a1c7dba18ef36983396c282fe85292db066e39db:/bsd/kern/vm_pressure.c diff --git a/bsd/kern/vm_pressure.c b/bsd/kern/vm_pressure.c index b5fc2f072..27c1aed10 100644 --- a/bsd/kern/vm_pressure.c +++ b/bsd/kern/vm_pressure.c @@ -40,18 +40,51 @@ #include #include #include +#include +#include +#include + +#include + +#if CONFIG_MEMORYSTATUS +#include +#endif + +/* + * This value is the threshold that a process must meet to be considered for scavenging. + */ +#define VM_PRESSURE_MINIMUM_RSIZE 10 /* MB */ + +#define VM_PRESSURE_NOTIFY_WAIT_PERIOD 10000 /* milliseconds */ void vm_pressure_klist_lock(void); void vm_pressure_klist_unlock(void); -void vm_dispatch_memory_pressure(void); -int vm_try_terminate_candidates(void); -int vm_try_pressure_candidates(void); -void vm_recharge_active_list(void); +static void vm_dispatch_memory_pressure(void); +void vm_reset_active_list(void); + +#if CONFIG_MEMORYSTATUS +static kern_return_t vm_try_pressure_candidates(boolean_t target_foreground_process); +#endif + +static lck_mtx_t vm_pressure_klist_mutex; struct klist vm_pressure_klist; struct klist vm_pressure_klist_dormant; +#if DEBUG +#define VM_PRESSURE_DEBUG(cond, format, ...) \ +do { \ + if (cond) { printf(format, ##__VA_ARGS__); } \ +} while(0) +#else +#define VM_PRESSURE_DEBUG(cond, format, ...) +#endif + +void vm_pressure_init(lck_grp_t *grp, lck_attr_t *attr) { + lck_mtx_init(&vm_pressure_klist_mutex, grp, attr); +} + void vm_pressure_klist_lock(void) { lck_mtx_lock(&vm_pressure_klist_mutex); } @@ -65,13 +98,11 @@ int vm_knote_register(struct knote *kn) { vm_pressure_klist_lock(); - if ((kn->kn_sfflags & (NOTE_VM_PRESSURE))) { -#if DEBUG - printf("[vm_pressure] process %d registering pressure notification\n", kn->kn_kq->kq_p->p_pid); -#endif + if ((kn->kn_sfflags) & (NOTE_VM_PRESSURE)) { KNOTE_ATTACH(&vm_pressure_klist, kn); - } else + } else { rv = ENOTSUP; + } vm_pressure_klist_unlock(); @@ -83,9 +114,7 @@ void vm_knote_unregister(struct knote *kn) { vm_pressure_klist_lock(); -#if DEBUG - printf("[vm_pressure] process %d cancelling pressure notification\n", kn->kn_kq->kq_p->p_pid); -#endif + VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d cancelling pressure notification\n", kn->kn_kq->kq_p->p_pid); SLIST_FOREACH(kn_temp, &vm_pressure_klist, kn_selnext) { if (kn_temp == kn) { @@ -94,139 +123,571 @@ void vm_knote_unregister(struct knote *kn) { return; } } - KNOTE_DETACH(&vm_pressure_klist_dormant, kn); + + SLIST_FOREACH(kn_temp, &vm_pressure_klist_dormant, kn_selnext) { + if (kn_temp == kn) { + KNOTE_DETACH(&vm_pressure_klist_dormant, kn); + vm_pressure_klist_unlock(); + return; + } + } vm_pressure_klist_unlock(); } -/* Interface for event dispatch from vm_pageout_garbage_collect thread */ -void consider_pressure_events(void) { - vm_dispatch_memory_pressure(); -} +void vm_pressure_proc_cleanup(proc_t p) +{ + struct knote *kn = NULL; -void vm_dispatch_memory_pressure(void) { vm_pressure_klist_lock(); - if (!SLIST_EMPTY(&vm_pressure_klist)) { - -#if DEBUG - printf("[vm_pressure] vm_dispatch_memory_pressure\n"); -#endif - - if (vm_try_pressure_candidates()) { + VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d exiting pressure notification\n", p->p_pid); + + SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) { + if (kn->kn_kq->kq_p == p) { + KNOTE_DETACH(&vm_pressure_klist, kn); vm_pressure_klist_unlock(); return; } - } - /* Else... */ - -#if DEBUG - printf("[vm_pressure] could not find suitable event candidate\n"); -#endif - - vm_recharge_active_list(); + SLIST_FOREACH(kn, &vm_pressure_klist_dormant, kn_selnext) { + if (kn->kn_kq->kq_p == p) { + KNOTE_DETACH(&vm_pressure_klist_dormant, kn); + vm_pressure_klist_unlock(); + return; + } + } vm_pressure_klist_unlock(); } /* - * Try standard pressure event candidates. Called with klist lock held. + * Used by the vm_pressure_thread which is + * signalled from within vm_pageout_scan(). */ -int vm_try_pressure_candidates(void) { +void consider_vm_pressure_events(void) +{ + vm_dispatch_memory_pressure(); +} + +#if CONFIG_MEMORYSTATUS + +/* Jetsam aware version. Called with lock held */ + +struct knote *vm_find_knote_from_pid(pid_t, struct klist *); + +struct knote *vm_find_knote_from_pid(pid_t pid, struct klist *list) { + struct knote *kn = NULL; + + SLIST_FOREACH(kn, list, kn_selnext) { + struct proc *p; + pid_t current_pid; + + p = kn->kn_kq->kq_p; + current_pid = p->p_pid; + + if (current_pid == pid) { + break; + } + } + + return kn; +} + +int vm_dispatch_pressure_note_to_pid(pid_t pid, boolean_t locked) { + int ret = EINVAL; + struct knote *kn; + + VM_PRESSURE_DEBUG(1, "vm_dispatch_pressure_note_to_pid(): pid %d\n", pid); + + if (!locked) { + vm_pressure_klist_lock(); + } + /* - * This value is the threshold that a process must meet to be considered for scavenging. - * If a process has sufficiently little resident memory, there is probably no use scavenging it. - * At best, we'll scavenge very little memory. At worst, we'll page in code pages or malloc metadata. + * Because we're specifically targeting a process here, we don't care + * if a warning has already been sent and it's moved to the dormant + * list; check that too. */ + kn = vm_find_knote_from_pid(pid, &vm_pressure_klist); + if (kn) { + KNOTE(&vm_pressure_klist, pid); + ret = 0; + } else { + kn = vm_find_knote_from_pid(pid, &vm_pressure_klist_dormant); + if (kn) { + KNOTE(&vm_pressure_klist_dormant, pid); + ret = 0; + } + } + + if (!locked) { + vm_pressure_klist_unlock(); + } + + return ret; +} + +void vm_find_pressure_foreground_candidates(void) +{ + struct knote *kn, *kn_tmp; + struct klist dispatch_klist = { NULL }; + + vm_pressure_klist_lock(); + proc_list_lock(); -#define VM_PRESSURE_MINIMUM_RSIZE (10 * 1024 * 1024) - - struct proc *p_max = NULL; + /* Find the foreground processes. */ + SLIST_FOREACH_SAFE(kn, &vm_pressure_klist, kn_selnext, kn_tmp) { + proc_t p = kn->kn_kq->kq_p; + + if (memorystatus_is_foreground_locked(p)) { + KNOTE_DETACH(&vm_pressure_klist, kn); + KNOTE_ATTACH(&dispatch_klist, kn); + } + } + + SLIST_FOREACH_SAFE(kn, &vm_pressure_klist_dormant, kn_selnext, kn_tmp) { + proc_t p = kn->kn_kq->kq_p; + + if (memorystatus_is_foreground_locked(p)) { + KNOTE_DETACH(&vm_pressure_klist_dormant, kn); + KNOTE_ATTACH(&dispatch_klist, kn); + } + } + + proc_list_unlock(); + + /* Dispatch pressure notifications accordingly */ + SLIST_FOREACH_SAFE(kn, &dispatch_klist, kn_selnext, kn_tmp) { + proc_t p = kn->kn_kq->kq_p; + + proc_list_lock(); + if (p != proc_ref_locked(p)) { + proc_list_unlock(); + KNOTE_DETACH(&dispatch_klist, kn); + KNOTE_ATTACH(&vm_pressure_klist_dormant, kn); + continue; + } + proc_list_unlock(); + + VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d\n", kn->kn_kq->kq_p->p_pid); + KNOTE(&dispatch_klist, p->p_pid); + KNOTE_DETACH(&dispatch_klist, kn); + KNOTE_ATTACH(&vm_pressure_klist_dormant, kn); + microuptime(&p->vm_pressure_last_notify_tstamp); + memorystatus_send_pressure_note(p->p_pid); + proc_rele(p); + } + + vm_pressure_klist_unlock(); +} + +void vm_find_pressure_candidate(void) +{ + struct knote *kn = NULL, *kn_max = NULL; unsigned int resident_max = 0; - struct knote *kn_max = NULL; - struct knote *kn; + pid_t target_pid = -1; + struct klist dispatch_klist = { NULL }; + struct timeval curr_tstamp = {0, 0}; + int elapsed_msecs = 0; + proc_t target_proc = PROC_NULL; + kern_return_t kr = KERN_SUCCESS; + + microuptime(&curr_tstamp); - SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) { - if ( (kn != NULL ) && ( kn->kn_kq != NULL ) && ( kn->kn_kq->kq_p != NULL ) ) { - if (kn->kn_sfflags & NOTE_VM_PRESSURE) { - struct proc *p = kn->kn_kq->kq_p; - if (!(kn->kn_status & KN_DISABLED)) { - kern_return_t kr = KERN_SUCCESS; - struct task *t = (struct task *)(p->task); - struct task_basic_info basic_info; - mach_msg_type_number_t size = TASK_BASIC_INFO_COUNT; - if( ( kr = task_info(t, TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) == KERN_SUCCESS ) { - unsigned int resident_size = basic_info.resident_size; - /* - * We don't want a small process to block large processes from - * being notified again. - */ - if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) { - if (resident_size > resident_max) { - p_max = p; - resident_max = resident_size; - kn_max = kn; - } - } else { -#if DEBUG - /* There was no candidate with enough resident memory to scavenge */ - /* This debug print makes too much noise now */ - //printf("[vm_pressure] threshold failed for pid %d with %u resident, skipping...\n", p->p_pid, resident_size); -#endif + vm_pressure_klist_lock(); + + SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {\ + struct mach_task_basic_info basic_info; + mach_msg_type_number_t size = MACH_TASK_BASIC_INFO_COUNT; + unsigned int resident_size = 0; + proc_t p = PROC_NULL; + struct task* t = TASK_NULL; + + p = kn->kn_kq->kq_p; + proc_list_lock(); + if (p != proc_ref_locked(p)) { + p = PROC_NULL; + proc_list_unlock(); + continue; + } + proc_list_unlock(); + + t = (struct task *)(p->task); + + timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp); + elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; + + if (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD) { + proc_rele(p); + continue; + } + + if (!memorystatus_bg_pressure_eligible(p)) { + VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p->p_pid); + proc_rele(p); + continue; + } + + if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) { + VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed\n", p->p_pid); + proc_rele(p); + continue; + } + + /* + * We don't want a small process to block large processes from + * being notified again. + */ + resident_size = (basic_info.resident_size)/(1024 * 1024); + if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) { + if (resident_size > resident_max) { + resident_max = resident_size; + kn_max = kn; + target_pid = p->p_pid; + target_proc = p; + } + } else { + /* There was no candidate with enough resident memory to scavenge */ + VM_PRESSURE_DEBUG(1, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size); + } + proc_rele(p); + } + + if (kn_max == NULL || target_pid == -1) { + VM_PRESSURE_DEBUG(1, "[vm_pressure] - no target found!\n"); + goto exit; + } + + VM_DEBUG_EVENT(vm_pageout_scan, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0); + VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); + + KNOTE_DETACH(&vm_pressure_klist, kn_max); + + target_proc = proc_find(target_pid); + if (target_proc != PROC_NULL) { + KNOTE_ATTACH(&dispatch_klist, kn_max); + KNOTE(&dispatch_klist, target_pid); + KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max); + memorystatus_send_pressure_note(target_pid); + microuptime(&target_proc->vm_pressure_last_notify_tstamp); + proc_rele(target_proc); + } + +exit: + vm_pressure_klist_unlock(); +} +#endif /* CONFIG_MEMORYSTATUS */ + + +struct knote * +vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level, boolean_t target_foreground_process); + +kern_return_t vm_pressure_notification_without_levels(boolean_t target_foreground_process); +kern_return_t vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process); + +kern_return_t +vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process) +{ + vm_pressure_klist_lock(); + + if (SLIST_EMPTY(&vm_pressure_klist)) { + vm_reset_active_list(); + } + + if (!SLIST_EMPTY(&vm_pressure_klist)) { + + VM_PRESSURE_DEBUG(1, "[vm_pressure] vm_dispatch_memory_pressure\n"); + + if (KERN_SUCCESS == vm_try_pressure_candidates(target_foreground_process)) { + vm_pressure_klist_unlock(); + return KERN_SUCCESS; + } + } + + VM_PRESSURE_DEBUG(1, "[vm_pressure] could not find suitable event candidate\n"); + + vm_pressure_klist_unlock(); + + return KERN_FAILURE; +} + +static void vm_dispatch_memory_pressure(void) +{ + memorystatus_update_vm_pressure(FALSE); +} + +extern vm_pressure_level_t +convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); + +struct knote * +vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level, boolean_t target_foreground_process) +{ + struct knote *kn = NULL, *kn_max = NULL; + unsigned int resident_max = 0; + struct timeval curr_tstamp = {0, 0}; + int elapsed_msecs = 0; + int selected_task_importance = 0; + static int pressure_snapshot = -1; + boolean_t pressure_increase = FALSE; + + if (level != -1) { + + if (pressure_snapshot == -1) { + /* + * Initial snapshot. + */ + pressure_snapshot = level; + pressure_increase = TRUE; + } else { + + if (level >= pressure_snapshot) { + pressure_increase = TRUE; + } else { + pressure_increase = FALSE; + } + + pressure_snapshot = level; + } + } + + if ((level > 0) && (pressure_increase) == TRUE) { + /* + * We'll start by considering the largest + * unimportant task in our list. + */ + selected_task_importance = INT_MAX; + } else { + /* + * We'll start by considering the largest + * important task in our list. + */ + selected_task_importance = 0; + } + + microuptime(&curr_tstamp); + + SLIST_FOREACH(kn, candidate_list, kn_selnext) { + + unsigned int resident_size = 0; + proc_t p = PROC_NULL; + struct task* t = TASK_NULL; + int curr_task_importance = 0; + boolean_t consider_knote = FALSE; + + p = kn->kn_kq->kq_p; + proc_list_lock(); + if (p != proc_ref_locked(p)) { + p = PROC_NULL; + proc_list_unlock(); + continue; + } + proc_list_unlock(); + +#if CONFIG_MEMORYSTATUS + if (target_foreground_process == TRUE && !memorystatus_is_foreground_locked(p)) { + /* + * Skip process not marked foreground. + */ + proc_rele(p); + continue; + } +#endif /* CONFIG_MEMORYSTATUS */ + + t = (struct task *)(p->task); + + timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp); + elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; + + if ((level == -1) && (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD)) { + proc_rele(p); + continue; + } + + if (level != -1) { + /* + * For the level based notifications, check and see if this knote is + * registered for the current level. + */ + vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(level); + + if ((kn->kn_sfflags & dispatch_level) == 0) { + proc_rele(p); + continue; + } + } + +#if CONFIG_MEMORYSTATUS + if (target_foreground_process == FALSE && !memorystatus_bg_pressure_eligible(p)) { + VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p->p_pid); + proc_rele(p); + continue; + } +#endif /* CONFIG_MEMORYSTATUS */ + + curr_task_importance = task_importance_estimate(t); + + /* + * We don't want a small process to block large processes from + * being notified again. + */ + resident_size = (get_task_phys_footprint(t))/(1024*1024ULL); //(MB); + + if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) { + + if (level > 0) { + /* + * Warning or Critical Pressure. + */ + if (pressure_increase) { + if ((curr_task_importance < selected_task_importance) || + ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) { + + /* + * We have found a candidate process which is: + * a) at a lower importance than the current selected process + * OR + * b) has importance equal to that of the current selected process but is larger + */ + + if (task_has_been_notified(t, level) == FALSE) { + consider_knote = TRUE; } - } else { -#if DEBUG - printf("[vm_pressure] task_info for pid %d failed with %d\n", p->p_pid, kr); -#endif } } else { -#if DEBUG - printf("[vm_pressure] pid %d currently disabled, skipping...\n", p->p_pid); -#endif + if ((curr_task_importance > selected_task_importance) || + ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) { + + /* + * We have found a candidate process which is: + * a) at a higher importance than the current selected process + * OR + * b) has importance equal to that of the current selected process but is larger + */ + + if (task_has_been_notified(t, level) == FALSE) { + consider_knote = TRUE; + } + } + } + } else if (level == 0) { + /* + * Pressure back to normal. + */ + if ((curr_task_importance > selected_task_importance) || + ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) { + + if ((task_has_been_notified(t, kVMPressureWarning) == TRUE) || (task_has_been_notified(t, kVMPressureCritical) == TRUE)) { + consider_knote = TRUE; + } + } + } else if (level == -1) { + + /* + * Simple (importance and level)-free behavior based solely on RSIZE. + */ + if (resident_size > resident_max) { + consider_knote = TRUE; } } - } else { -#if DEBUG - if (kn == NULL) { - printf("[vm_pressure] kn is NULL\n"); - } else if (kn->kn_kq == NULL) { - printf("[vm_pressure] kn->kn_kq is NULL\n"); - } else if (kn->kn_kq->kq_p == NULL) { - printf("[vm_pressure] kn->kn_kq->kq_p is NULL\n"); + + + if (consider_knote) { + resident_max = resident_size; + kn_max = kn; + selected_task_importance = curr_task_importance; + consider_knote = FALSE; /* reset for the next candidate */ } -#endif - } + } else { + /* There was no candidate with enough resident memory to scavenge */ + VM_PRESSURE_DEBUG(0, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size); + } + proc_rele(p); + } + + if (kn_max) { + VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); } + + return kn_max; +} + +/* + * vm_pressure_klist_lock is held for this routine. + */ +kern_return_t vm_pressure_notification_without_levels(boolean_t target_foreground_process) +{ + struct knote *kn_max = NULL; + pid_t target_pid = -1; + struct klist dispatch_klist = { NULL }; + proc_t target_proc = PROC_NULL; + struct klist *candidate_list = NULL; + + candidate_list = &vm_pressure_klist; - if (kn_max == NULL) return 0; + kn_max = vm_pressure_select_optimal_candidate_to_notify(candidate_list, -1, target_foreground_process); -#if DEBUG - printf("[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max); -#endif + if (kn_max == NULL) { + if (target_foreground_process) { + /* + * Doesn't matter if the process had been notified earlier on. + * This is a very specific request. Deliver it. + */ + candidate_list = &vm_pressure_klist_dormant; + kn_max = vm_pressure_select_optimal_candidate_to_notify(candidate_list, -1, target_foreground_process); + } - KNOTE_DETACH(&vm_pressure_klist, kn_max); - struct klist dispatch_klist = { NULL }; - KNOTE_ATTACH(&dispatch_klist, kn_max); - KNOTE(&dispatch_klist, NOTE_VM_PRESSURE); - KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max); + if (kn_max == NULL) { + return KERN_FAILURE; + } + } + + target_proc = kn_max->kn_kq->kq_p; + + KNOTE_DETACH(candidate_list, kn_max); + + if (target_proc != PROC_NULL) { - return 1; + target_pid = target_proc->p_pid; + + memoryshot(VM_PRESSURE_EVENT, DBG_FUNC_NONE); + + KNOTE_ATTACH(&dispatch_klist, kn_max); + KNOTE(&dispatch_klist, target_pid); + KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max); + +#if CONFIG_MEMORYSTATUS + memorystatus_send_pressure_note(target_pid); +#endif /* CONFIG_MEMORYSTATUS */ + + microuptime(&target_proc->vm_pressure_last_notify_tstamp); + } + + return KERN_SUCCESS; } +static kern_return_t vm_try_pressure_candidates(boolean_t target_foreground_process) +{ + /* + * This takes care of candidates that use NOTE_VM_PRESSURE. + * It's a notification without indication of the level + * of memory pressure. + */ + return (vm_pressure_notification_without_levels(target_foreground_process)); +} /* * Remove all elements from the dormant list and place them on the active list. * Called with klist lock held. */ -void vm_recharge_active_list(void) { +void vm_reset_active_list(void) { /* Re-charge the main list from the dormant list if possible */ if (!SLIST_EMPTY(&vm_pressure_klist_dormant)) { -#if DEBUG - printf("[vm_pressure] recharging main list from dormant list\n"); -#endif struct knote *kn; + + VM_PRESSURE_DEBUG(1, "[vm_pressure] recharging main list from dormant list\n"); + while (!SLIST_EMPTY(&vm_pressure_klist_dormant)) { kn = SLIST_FIRST(&vm_pressure_klist_dormant); SLIST_REMOVE_HEAD(&vm_pressure_klist_dormant, kn_selnext);