- TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list);
- bucket->count++;
-
- memorystatus_list_count++;
-
- memorystatus_check_levels_locked();
-
-exit:
- if (!locked) {
- proc_list_unlock();
- }
-
- return 0;
-}
-
-/*
- * Description:
- * Moves a process from one jetsam bucket to another.
- * which changes the LRU position of the process.
- *
- * Monitors transition between buckets and if necessary
- * will update cached memory limits accordingly.
- *
- * skip_demotion_check:
- * - if the 'jetsam aging policy' is NOT 'legacy':
- * When this flag is TRUE, it means we are going
- * to age the ripe processes out of the aging bands and into the
- * IDLE band and apply their inactive memory limits.
- *
- * - if the 'jetsam aging policy' is 'legacy':
- * When this flag is TRUE, it might mean the above aging mechanism
- * OR
- * It might be that we have a process that has used up its 'idle deferral'
- * stay that is given to it once per lifetime. And in this case, the process
- * won't be going through any aging codepaths. But we still need to apply
- * the right inactive limits and so we explicitly set this to TRUE if the
- * new priority for the process is the IDLE band.
- */
-void
-memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert, boolean_t skip_demotion_check)
-{
- memstat_bucket_t *old_bucket, *new_bucket;
-
- assert(priority < MEMSTAT_BUCKET_COUNT);
-
- /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */
- if ((p->p_listflag & P_LIST_EXITED) != 0) {
- return;
- }
-
- MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting %s(%d) to priority %d, inserting at %s\n",
- (*p->p_name ? p->p_name : "unknown"), p->p_pid, priority, head_insert ? "head" : "tail");
-
- DTRACE_MEMORYSTATUS3(memorystatus_update_priority, proc_t, p, int32_t, p->p_memstat_effectivepriority, int, priority);
-
-#if DEVELOPMENT || DEBUG
- if (priority == JETSAM_PRIORITY_IDLE && /* if the process is on its way into the IDLE band */
- skip_demotion_check == FALSE && /* and it isn't via the path that will set the INACTIVE memlimits */
- (p->p_memstat_dirty & P_DIRTY_TRACK) && /* and it has 'DIRTY' tracking enabled */
- ((p->p_memstat_memlimit != p->p_memstat_memlimit_inactive) || /* and we notice that the current limit isn't the right value (inactive) */
- ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) ? ( ! (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT)) : (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT)))) /* OR type (fatal vs non-fatal) */
- panic("memorystatus_update_priority_locked: on %s with 0x%x, prio: %d and %d\n", p->p_name, p->p_memstat_state, priority, p->p_memstat_memlimit); /* then we must catch this */
-#endif /* DEVELOPMENT || DEBUG */
-
- old_bucket = &memstat_bucket[p->p_memstat_effectivepriority];
-
- if (skip_demotion_check == FALSE) {
-
- if (isSysProc(p)) {
- /*
- * For system processes, the memorystatus_dirty_* routines take care of adding/removing
- * the processes from the aging bands and balancing the demotion counts.
- * We can, however, override that if the process has an 'elevated inactive jetsam band' attribute.
- */
-
- if (p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) {
- /*
- * 2 types of processes can use the non-standard elevated inactive band:
- * - Frozen processes that always land in memorystatus_freeze_jetsam_band
- * OR
- * - processes that specifically opt-in to the elevated inactive support e.g. docked processes.
- */
-#if CONFIG_FREEZE
- if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
- if (priority <= memorystatus_freeze_jetsam_band) {
- priority = memorystatus_freeze_jetsam_band;
- }
- } else
-#endif /* CONFIG_FREEZE */
- {
- if (priority <= JETSAM_PRIORITY_ELEVATED_INACTIVE) {
- priority = JETSAM_PRIORITY_ELEVATED_INACTIVE;
- }
- }
- assert(! (p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS));
- }
- } else if (isApp(p)) {
-
- /*
- * Check to see if the application is being lowered in jetsam priority. If so, and:
- * - it has an 'elevated inactive jetsam band' attribute, then put it in the appropriate band.
- * - it is a normal application, then let it age in the aging band if that policy is in effect.
- */
-
- if (p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) {
-#if CONFIG_FREEZE
- if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
- if (priority <= memorystatus_freeze_jetsam_band) {
- priority = memorystatus_freeze_jetsam_band;
- }
- } else
-#endif /* CONFIG_FREEZE */
- {
- if (priority <= JETSAM_PRIORITY_ELEVATED_INACTIVE) {
- priority = JETSAM_PRIORITY_ELEVATED_INACTIVE;
- }
- }
- } else {
-
- if (applications_aging_band) {
- if (p->p_memstat_effectivepriority == applications_aging_band) {
- assert(old_bucket->count == (memorystatus_scheduled_idle_demotions_apps + 1));
- }
-
- if ((jetsam_aging_policy != kJetsamAgingPolicyLegacy) && (priority <= applications_aging_band)) {
- assert(! (p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS));
- priority = applications_aging_band;
- memorystatus_schedule_idle_demotion_locked(p, TRUE);
- }
- }
- }
- }
- }
-
- if ((system_procs_aging_band && (priority == system_procs_aging_band)) || (applications_aging_band && (priority == applications_aging_band))) {
- assert(p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS);
- }
-
- TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list);
- old_bucket->count--;
-
- new_bucket = &memstat_bucket[priority];
- if (head_insert)
- TAILQ_INSERT_HEAD(&new_bucket->list, p, p_memstat_list);
- else
- TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list);
- new_bucket->count++;
-
- if (memorystatus_highwater_enabled) {
- boolean_t is_fatal;
- boolean_t use_active;
-
- /*
- * If cached limit data is updated, then the limits
- * will be enforced by writing to the ledgers.
- */
- boolean_t ledger_update_needed = TRUE;
-
- /*
- * Here, we must update the cached memory limit if the task
- * is transitioning between:
- * active <--> inactive
- * FG <--> BG
- * but:
- * dirty <--> clean is ignored
- *
- * We bypass non-idle processes that have opted into dirty tracking because
- * a move between buckets does not imply a transition between the
- * dirty <--> clean state.
- */
-
- if (p->p_memstat_dirty & P_DIRTY_TRACK) {
-
- if (skip_demotion_check == TRUE && priority == JETSAM_PRIORITY_IDLE) {
- CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal);
- use_active = FALSE;
- } else {
- ledger_update_needed = FALSE;
- }
-
- } else if ((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) {
- /*
- * inactive --> active
- * BG --> FG
- * assign active state
- */
- CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal);
- use_active = TRUE;
-
- } else if ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) {
- /*
- * active --> inactive
- * FG --> BG
- * assign inactive state
- */
- CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal);
- use_active = FALSE;
- } else {
- /*
- * The transition between jetsam priority buckets apparently did
- * not affect active/inactive state.
- * This is not unusual... especially during startup when
- * processes are getting established in their respective bands.
- */
- ledger_update_needed = FALSE;
- }
-
- /*
- * Enforce the new limits by writing to the ledger
- */
- if (ledger_update_needed) {
- task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1, NULL, use_active, is_fatal);
-
- MEMORYSTATUS_DEBUG(3, "memorystatus_update_priority_locked: new limit on pid %d (%dMB %s) priority old --> new (%d --> %d) dirty?=0x%x %s\n",
- p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
- (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, priority, p->p_memstat_dirty,
- (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
- }
- }
-
- /*
- * Record idle start or idle delta.
- */
- if (p->p_memstat_effectivepriority == priority) {
- /*
- * This process is not transitioning between
- * jetsam priority buckets. Do nothing.
- */
- } else if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) {
- uint64_t now;
- /*
- * Transitioning out of the idle priority bucket.
- * Record idle delta.
- */
- assert(p->p_memstat_idle_start != 0);
- now = mach_absolute_time();
- if (now > p->p_memstat_idle_start) {
- p->p_memstat_idle_delta = now - p->p_memstat_idle_start;
- }
-
- /*
- * About to become active and so memory footprint could change.
- * So mark it eligible for freeze-considerations next time around.
- */
- if (p->p_memstat_state & P_MEMSTAT_FREEZE_IGNORE) {
- p->p_memstat_state &= ~P_MEMSTAT_FREEZE_IGNORE;
- }
-
- } else if (priority == JETSAM_PRIORITY_IDLE) {
- /*
- * Transitioning into the idle priority bucket.
- * Record idle start.
- */
- p->p_memstat_idle_start = mach_absolute_time();
- }
-
- KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CHANGE_PRIORITY), p->p_pid, priority, p->p_memstat_effectivepriority, 0, 0);
-
- p->p_memstat_effectivepriority = priority;
-
-#if CONFIG_SECLUDED_MEMORY
- if (secluded_for_apps &&
- task_could_use_secluded_mem(p->task)) {
- task_set_can_use_secluded_mem(
- p->task,
- (priority >= JETSAM_PRIORITY_FOREGROUND));
- }
-#endif /* CONFIG_SECLUDED_MEMORY */
-
- memorystatus_check_levels_locked();
-}
-
-/*
- *
- * Description: Update the jetsam priority and memory limit attributes for a given process.
- *
- * Parameters:
- * p init this process's jetsam information.
- * priority The jetsam priority band
- * user_data user specific data, unused by the kernel
- * effective guards against race if process's update already occurred
- * update_memlimit When true we know this is the init step via the posix_spawn path.
- *
- * memlimit_active Value in megabytes; The monitored footprint level while the
- * process is active. Exceeding it may result in termination
- * based on it's associated fatal flag.
- *
- * memlimit_active_is_fatal When a process is active and exceeds its memory footprint,
- * this describes whether or not it should be immediately fatal.
- *
- * memlimit_inactive Value in megabytes; The monitored footprint level while the
- * process is inactive. Exceeding it may result in termination
- * based on it's associated fatal flag.
- *
- * memlimit_inactive_is_fatal When a process is inactive and exceeds its memory footprint,
- * this describes whether or not it should be immediatly fatal.
- *
- * Returns: 0 Success
- * non-0 Failure
- */
-
-int
-memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit,
- int32_t memlimit_active, boolean_t memlimit_active_is_fatal,
- int32_t memlimit_inactive, boolean_t memlimit_inactive_is_fatal)
-{
- int ret;
- boolean_t head_insert = false;
-
- MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing (%s) pid %d: priority %d, user_data 0x%llx\n", (*p->p_name ? p->p_name : "unknown"), p->p_pid, priority, user_data);
-
- KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0);
-
- if (priority == -1) {
- /* Use as shorthand for default priority */
- priority = JETSAM_PRIORITY_DEFAULT;
- } else if ((priority == system_procs_aging_band) || (priority == applications_aging_band)) {
- /* Both the aging bands are reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */
- priority = JETSAM_PRIORITY_IDLE;
- } else if (priority == JETSAM_PRIORITY_IDLE_HEAD) {
- /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle queue */
- priority = JETSAM_PRIORITY_IDLE;
- head_insert = TRUE;
- } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) {
- /* Sanity check */
- ret = EINVAL;
- goto out;
- }
-
- proc_list_lock();
-
- assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL));
-
- if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) {
- ret = EALREADY;
- proc_list_unlock();
- MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", p->p_pid);
- goto out;
- }
-
- if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) || ((p->p_listflag & P_LIST_EXITED) != 0)) {
- /*
- * This could happen when a process calling posix_spawn() is exiting on the jetsam thread.
- */
- ret = EBUSY;
- proc_list_unlock();
- goto out;
- }
-
- p->p_memstat_state |= P_MEMSTAT_PRIORITYUPDATED;
- p->p_memstat_userdata = user_data;
- p->p_memstat_requestedpriority = priority;
-
- if (update_memlimit) {
- boolean_t is_fatal;
- boolean_t use_active;
-
- /*
- * Posix_spawn'd processes come through this path to instantiate ledger limits.
- * Forked processes do not come through this path, so no ledger limits exist.
- * (That's why forked processes can consume unlimited memory.)
- */
-
- MEMORYSTATUS_DEBUG(3, "memorystatus_update(enter): pid %d, priority %d, dirty=0x%x, Active(%dMB %s), Inactive(%dMB, %s)\n",
- p->p_pid, priority, p->p_memstat_dirty,
- memlimit_active, (memlimit_active_is_fatal ? "F " : "NF"),
- memlimit_inactive, (memlimit_inactive_is_fatal ? "F " : "NF"));
-
- if (memlimit_active <= 0) {
- /*
- * This process will have a system_wide task limit when active.
- * System_wide task limit is always fatal.
- * It's quite common to see non-fatal flag passed in here.
- * It's not an error, we just ignore it.
- */
-
- /*
- * For backward compatibility with some unexplained launchd behavior,
- * we allow a zero sized limit. But we still enforce system_wide limit
- * when written to the ledgers.
- */
-
- if (memlimit_active < 0) {
- memlimit_active = -1; /* enforces system_wide task limit */
- }
- memlimit_active_is_fatal = TRUE;
- }
-
- if (memlimit_inactive <= 0) {
- /*
- * This process will have a system_wide task limit when inactive.
- * System_wide task limit is always fatal.
- */
-
- memlimit_inactive = -1;
- memlimit_inactive_is_fatal = TRUE;
- }
-
- /*
- * Initialize the active limit variants for this process.
- */
- SET_ACTIVE_LIMITS_LOCKED(p, memlimit_active, memlimit_active_is_fatal);
-
- /*
- * Initialize the inactive limit variants for this process.
- */
- SET_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive, memlimit_inactive_is_fatal);
-
- /*
- * Initialize the cached limits for target process.
- * When the target process is dirty tracked, it's typically
- * in a clean state. Non dirty tracked processes are
- * typically active (Foreground or above).
- * But just in case, we don't make assumptions...
- */
-
- if (proc_jetsam_state_is_active_locked(p) == TRUE) {
- CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal);
- use_active = TRUE;
- } else {
- CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal);
- use_active = FALSE;
- }
-
- /*
- * Enforce the cached limit by writing to the ledger.
- */
- if (memorystatus_highwater_enabled) {
- /* apply now */
- task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, use_active, is_fatal);
-
- MEMORYSTATUS_DEBUG(3, "memorystatus_update: init: limit on pid %d (%dMB %s) targeting priority(%d) dirty?=0x%x %s\n",
- p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
- (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), priority, p->p_memstat_dirty,
- (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
- }
- }
-
- /*
- * We can't add to the aging bands buckets here.
- * But, we could be removing it from those buckets.
- * Check and take appropriate steps if so.
- */
-
- if (isProcessInAgingBands(p)) {
-
- memorystatus_invalidate_idle_demotion_locked(p, TRUE);
- memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, FALSE, TRUE);
- } else {
- if (jetsam_aging_policy == kJetsamAgingPolicyLegacy && priority == JETSAM_PRIORITY_IDLE) {
- /*
- * Daemons with 'inactive' limits will go through the dirty tracking codepath.
- * This path deals with apps that may have 'inactive' limits e.g. WebContent processes.
- * If this is the legacy aging policy we explicitly need to apply those limits. If it
- * is any other aging policy, then we don't need to worry because all processes
- * will go through the aging bands and then the demotion thread will take care to
- * move them into the IDLE band and apply the required limits.
- */
- memorystatus_update_priority_locked(p, priority, head_insert, TRUE);
- }
- }
-
- memorystatus_update_priority_locked(p, priority, head_insert, FALSE);
-
- proc_list_unlock();
- ret = 0;
-
-out:
- KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_END, ret, 0, 0, 0, 0);
-
- return ret;
-}
-
-int
-memorystatus_remove(proc_t p, boolean_t locked)
-{
- int ret;
- memstat_bucket_t *bucket;
- boolean_t reschedule = FALSE;
-
- MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing pid %d\n", p->p_pid);
-
- if (!locked) {
- proc_list_lock();
- }
-
- assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL));
-
- bucket = &memstat_bucket[p->p_memstat_effectivepriority];
-
- if (isSysProc(p) && system_procs_aging_band && (p->p_memstat_effectivepriority == system_procs_aging_band)) {
-
- assert(bucket->count == memorystatus_scheduled_idle_demotions_sysprocs);
- reschedule = TRUE;
-
- } else if (isApp(p) && applications_aging_band && (p->p_memstat_effectivepriority == applications_aging_band)) {
-
- assert(bucket->count == memorystatus_scheduled_idle_demotions_apps);
- reschedule = TRUE;
- }
-
- /*
- * Record idle delta
- */
-
- if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) {
- uint64_t now = mach_absolute_time();
- if (now > p->p_memstat_idle_start) {
- p->p_memstat_idle_delta = now - p->p_memstat_idle_start;
- }
- }
-
- TAILQ_REMOVE(&bucket->list, p, p_memstat_list);
- bucket->count--;
-
- memorystatus_list_count--;
-
- /* If awaiting demotion to the idle band, clean up */
- if (reschedule) {
- memorystatus_invalidate_idle_demotion_locked(p, TRUE);
- memorystatus_reschedule_idle_demotion_locked();
- }
-
- memorystatus_check_levels_locked();
-
-#if CONFIG_FREEZE
- if (p->p_memstat_state & (P_MEMSTAT_FROZEN)) {
-
- if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) {
- p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE;
- memorystatus_refreeze_eligible_count--;
- }
-
- memorystatus_frozen_count--;
- memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages;
- p->p_memstat_freeze_sharedanon_pages = 0;
- }
-
- if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) {
- memorystatus_suspended_count--;
- }
-#endif
-
- if (!locked) {
- proc_list_unlock();
- }
-
- if (p) {
- ret = 0;
- } else {
- ret = ESRCH;
- }
-
- return ret;
-}
-
-/*
- * Validate dirty tracking flags with process state.
- *
- * Return:
- * 0 on success
- * non-0 on failure
- *
- * The proc_list_lock is held by the caller.
- */
-
-static int
-memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) {
- /* See that the process isn't marked for termination */
- if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) {
- return EBUSY;
- }
-
- /* Idle exit requires that process be tracked */
- if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) &&
- !(pcontrol & PROC_DIRTY_TRACK)) {
- return EINVAL;
- }
-
- /* 'Launch in progress' tracking requires that process have enabled dirty tracking too. */
- if ((pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) &&
- !(pcontrol & PROC_DIRTY_TRACK)) {
- return EINVAL;
- }
-
- /* Only one type of DEFER behavior is allowed.*/
- if ((pcontrol & PROC_DIRTY_DEFER) &&
- (pcontrol & PROC_DIRTY_DEFER_ALWAYS)) {
- return EINVAL;
- }
-
- /* Deferral is only relevant if idle exit is specified */
- if (((pcontrol & PROC_DIRTY_DEFER) ||
- (pcontrol & PROC_DIRTY_DEFER_ALWAYS)) &&
- !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) {
- return EINVAL;
- }
-
- return(0);
-}
-
-static void
-memorystatus_update_idle_priority_locked(proc_t p) {
- int32_t priority;
-
- MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty);
-
- assert(isSysProc(p));
-
- if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) {
-
- priority = (p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) ? system_procs_aging_band : JETSAM_PRIORITY_IDLE;
- } else {
- priority = p->p_memstat_requestedpriority;
- }
-
- if (priority != p->p_memstat_effectivepriority) {
-
- if ((jetsam_aging_policy == kJetsamAgingPolicyLegacy) &&
- (priority == JETSAM_PRIORITY_IDLE)) {
-
- /*
- * This process is on its way into the IDLE band. The system is
- * using 'legacy' jetsam aging policy. That means, this process
- * has already used up its idle-deferral aging time that is given
- * once per its lifetime. So we need to set the INACTIVE limits
- * explicitly because it won't be going through the demotion paths
- * that take care to apply the limits appropriately.
- */
-
- if (p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) {
-
- /*
- * This process has the 'elevated inactive jetsam band' attribute.
- * So, there will be no trip to IDLE after all.
- * Instead, we pin the process in the elevated band,
- * where its ACTIVE limits will apply.
- */
-
- priority = JETSAM_PRIORITY_ELEVATED_INACTIVE;
- }
-
- memorystatus_update_priority_locked(p, priority, false, true);
-
- } else {
- memorystatus_update_priority_locked(p, priority, false, false);
- }
- }
-}
-
-/*
- * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle
- * (clean). They may also indicate that they support termination when idle, with the result that they are promoted
- * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low
- * priority idle band when clean (and killed earlier, protecting higher priority procesess).
- *
- * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by
- * memorystatus_sysprocs_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band
- * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to
- * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle
- * band. The deferral can be cleared early by clearing the appropriate flag.
- *
- * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process
- * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be
- * re-enabled or the guard state cleared, depending on whether the guard deadline has passed.
- */
-
-int
-memorystatus_dirty_track(proc_t p, uint32_t pcontrol) {
- unsigned int old_dirty;
- boolean_t reschedule = FALSE;
- boolean_t already_deferred = FALSE;
- boolean_t defer_now = FALSE;
- int ret = 0;
-
- KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_TRACK),
- p->p_pid, p->p_memstat_dirty, pcontrol, 0, 0);
-
- proc_list_lock();
-
- if ((p->p_listflag & P_LIST_EXITED) != 0) {
- /*
- * Process is on its way out.
- */
- ret = EBUSY;
- goto exit;
- }
-
- if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
- ret = EPERM;
- goto exit;
- }
-
- if ((ret = memorystatus_validate_track_flags(p, pcontrol)) != 0) {
- /* error */
- goto exit;
- }
-
- old_dirty = p->p_memstat_dirty;
-
- /* These bits are cumulative, as per <rdar://problem/11159924> */
- if (pcontrol & PROC_DIRTY_TRACK) {
- p->p_memstat_dirty |= P_DIRTY_TRACK;
- }
-
- if (pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) {
- p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT;
- }
-
- if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) {
- p->p_memstat_dirty |= P_DIRTY_LAUNCH_IN_PROGRESS;
- }
-
- if (old_dirty & P_DIRTY_AGING_IN_PROGRESS) {
- already_deferred = TRUE;
- }
-
-
- /* This can be set and cleared exactly once. */
- if (pcontrol & (PROC_DIRTY_DEFER | PROC_DIRTY_DEFER_ALWAYS)) {
-
- if ((pcontrol & (PROC_DIRTY_DEFER)) &&
- !(old_dirty & P_DIRTY_DEFER)) {
- p->p_memstat_dirty |= P_DIRTY_DEFER;
- }
-
- if ((pcontrol & (PROC_DIRTY_DEFER_ALWAYS)) &&
- !(old_dirty & P_DIRTY_DEFER_ALWAYS)) {
- p->p_memstat_dirty |= P_DIRTY_DEFER_ALWAYS;
- }
-
- defer_now = TRUE;
- }
-
- MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for pid %d\n",
- ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N",
- defer_now ? "Y" : "N",
- p->p_memstat_dirty & P_DIRTY ? "Y" : "N",
- p->p_pid);
-
- /* Kick off or invalidate the idle exit deferment if there's a state transition. */
- if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) {
- if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) {
-
- if (defer_now && !already_deferred) {
-
- /*
- * Request to defer a clean process that's idle-exit enabled
- * and not already in the jetsam deferred band. Most likely a
- * new launch.
- */
- memorystatus_schedule_idle_demotion_locked(p, TRUE);
- reschedule = TRUE;
-
- } else if (!defer_now) {
-
- /*
- * The process isn't asking for the 'aging' facility.
- * Could be that it is:
- */
-
- if (already_deferred) {
- /*
- * already in the aging bands. Traditionally,
- * some processes have tried to use this to
- * opt out of the 'aging' facility.
- */
-
- memorystatus_invalidate_idle_demotion_locked(p, TRUE);
- } else {
- /*
- * agnostic to the 'aging' facility. In that case,
- * we'll go ahead and opt it in because this is likely
- * a new launch (clean process, dirty tracking enabled)
- */
-
- memorystatus_schedule_idle_demotion_locked(p, TRUE);
- }
-
- reschedule = TRUE;
- }
- }
- } else {
-
- /*
- * We are trying to operate on a dirty process. Dirty processes have to
- * be removed from the deferred band. The question is do we reset the
- * deferred state or not?
- *
- * This could be a legal request like:
- * - this process had opted into the 'aging' band
- * - but it's now dirty and requests to opt out.
- * In this case, we remove the process from the band and reset its
- * state too. It'll opt back in properly when needed.
- *
- * OR, this request could be a user-space bug. E.g.:
- * - this process had opted into the 'aging' band when clean
- * - and, then issues another request to again put it into the band except
- * this time the process is dirty.
- * The process going dirty, as a transition in memorystatus_dirty_set(), will pull the process out of
- * the deferred band with its state intact. So our request below is no-op.
- * But we do it here anyways for coverage.
- *
- * memorystatus_update_idle_priority_locked()
- * single-mindedly treats a dirty process as "cannot be in the aging band".
- */
-
- if (!defer_now && already_deferred) {
- memorystatus_invalidate_idle_demotion_locked(p, TRUE);
- reschedule = TRUE;
- } else {
-
- boolean_t reset_state = (jetsam_aging_policy != kJetsamAgingPolicyLegacy) ? TRUE : FALSE;
-
- memorystatus_invalidate_idle_demotion_locked(p, reset_state);
- reschedule = TRUE;
- }
- }
-
- memorystatus_update_idle_priority_locked(p);
-
- if (reschedule) {
- memorystatus_reschedule_idle_demotion_locked();
- }
-
- ret = 0;
-
-exit:
- proc_list_unlock();
-
- return ret;
-}
-
-int
-memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) {
- int ret;
- boolean_t kill = false;
- boolean_t reschedule = FALSE;
- boolean_t was_dirty = FALSE;
- boolean_t now_dirty = FALSE;
-
- MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty);
- KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_SET), p->p_pid, self, pcontrol, 0, 0);
-
- proc_list_lock();
-
- if ((p->p_listflag & P_LIST_EXITED) != 0) {
- /*
- * Process is on its way out.
- */
- ret = EBUSY;
- goto exit;
- }
-
- if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
- ret = EPERM;
- goto exit;
- }
-
- if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY)
- was_dirty = TRUE;
-
- if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) {
- /* Dirty tracking not enabled */
- ret = EINVAL;
- } else if (pcontrol && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) {
- /*
- * Process is set to be terminated and we're attempting to mark it dirty.
- * Set for termination and marking as clean is OK - see <rdar://problem/10594349>.
- */
- ret = EBUSY;
- } else {
- int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN;
- if (pcontrol && !(p->p_memstat_dirty & flag)) {
- /* Mark the process as having been dirtied at some point */
- p->p_memstat_dirty |= (flag | P_DIRTY_MARKED);
- memorystatus_dirty_count++;
- ret = 0;
- } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) {
- if ((flag == P_DIRTY_SHUTDOWN) && (!(p->p_memstat_dirty & P_DIRTY))) {
- /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */
- p->p_memstat_dirty |= P_DIRTY_TERMINATED;
- kill = true;
- } else if ((flag == P_DIRTY) && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) {
- /* Kill previously terminated processes if set clean */
- kill = true;
- }
- p->p_memstat_dirty &= ~flag;
- memorystatus_dirty_count--;
- ret = 0;
- } else {
- /* Already set */
- ret = EALREADY;
- }
- }
-
- if (ret != 0) {
- goto exit;
- }
-
- if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY)
- now_dirty = TRUE;
-
- if ((was_dirty == TRUE && now_dirty == FALSE) ||
- (was_dirty == FALSE && now_dirty == TRUE)) {
-
- /* Manage idle exit deferral, if applied */
- if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) {
-
- /*
- * Legacy mode: P_DIRTY_AGING_IN_PROGRESS means the process is in the aging band OR it might be heading back
- * there once it's clean again. For the legacy case, this only applies if it has some protection window left.
- * P_DIRTY_DEFER: one-time protection window given at launch
- * P_DIRTY_DEFER_ALWAYS: protection window given for every dirty->clean transition. Like non-legacy mode.
- *
- * Non-Legacy mode: P_DIRTY_AGING_IN_PROGRESS means the process is in the aging band. It will always stop over
- * in that band on it's way to IDLE.
- */
-
- if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
- /*
- * New dirty process i.e. "was_dirty == FALSE && now_dirty == TRUE"
- *
- * The process will move from its aging band to its higher requested
- * jetsam band.
- */
- boolean_t reset_state = (jetsam_aging_policy != kJetsamAgingPolicyLegacy) ? TRUE : FALSE;
-
- memorystatus_invalidate_idle_demotion_locked(p, reset_state);
- reschedule = TRUE;
- } else {
-
- /*
- * Process is back from "dirty" to "clean".
- */
-
- if (jetsam_aging_policy == kJetsamAgingPolicyLegacy) {
- if (((p->p_memstat_dirty & P_DIRTY_DEFER_ALWAYS) == FALSE) &&
- (mach_absolute_time() >= p->p_memstat_idledeadline)) {
- /*
- * The process' hasn't enrolled in the "always defer after dirty"
- * mode and its deadline has expired. It currently
- * does not reside in any of the aging buckets.
- *
- * It's on its way to the JETSAM_PRIORITY_IDLE
- * bucket via memorystatus_update_idle_priority_locked()
- * below.
-
- * So all we need to do is reset all the state on the
- * process that's related to the aging bucket i.e.
- * the AGING_IN_PROGRESS flag and the timer deadline.
- */
-
- memorystatus_invalidate_idle_demotion_locked(p, TRUE);
- reschedule = TRUE;
- } else {
- /*
- * Process enrolled in "always stop in deferral band after dirty" OR
- * it still has some protection window left and so
- * we just re-arm the timer without modifying any
- * state on the process iff it still wants into that band.
- */
-
- if (p->p_memstat_dirty & P_DIRTY_DEFER_ALWAYS) {
- memorystatus_schedule_idle_demotion_locked(p, TRUE);
- reschedule = TRUE;
- } else if (p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) {
- memorystatus_schedule_idle_demotion_locked(p, FALSE);
- reschedule = TRUE;
- }
- }
- } else {
-
- memorystatus_schedule_idle_demotion_locked(p, TRUE);
- reschedule = TRUE;
- }
- }
- }
-
- memorystatus_update_idle_priority_locked(p);
-
- if (memorystatus_highwater_enabled) {
- boolean_t ledger_update_needed = TRUE;
- boolean_t use_active;
- boolean_t is_fatal;
- /*
- * We are in this path because this process transitioned between
- * dirty <--> clean state. Update the cached memory limits.
- */
-
- if (proc_jetsam_state_is_active_locked(p) == TRUE) {
- /*
- * process is pinned in elevated band
- * or
- * process is dirty
- */
- CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal);
- use_active = TRUE;
- ledger_update_needed = TRUE;
- } else {
- /*
- * process is clean...but if it has opted into pressured-exit
- * we don't apply the INACTIVE limit till the process has aged
- * out and is entering the IDLE band.
- * See memorystatus_update_priority_locked() for that.
- */
-
- if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) {
- ledger_update_needed = FALSE;
- } else {
- CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal);
- use_active = FALSE;
- ledger_update_needed = TRUE;
- }
- }
-
- /*
- * Enforce the new limits by writing to the ledger.
- *
- * This is a hot path and holding the proc_list_lock while writing to the ledgers,
- * (where the task lock is taken) is bad. So, we temporarily drop the proc_list_lock.
- * We aren't traversing the jetsam bucket list here, so we should be safe.
- * See rdar://21394491.
- */
-
- if (ledger_update_needed && proc_ref_locked(p) == p) {
- int ledger_limit;
- if (p->p_memstat_memlimit > 0) {
- ledger_limit = p->p_memstat_memlimit;
- } else {
- ledger_limit = -1;
- }
- proc_list_unlock();
- task_set_phys_footprint_limit_internal(p->task, ledger_limit, NULL, use_active, is_fatal);
- proc_list_lock();
- proc_rele_locked(p);
-
- MEMORYSTATUS_DEBUG(3, "memorystatus_dirty_set: new limit on pid %d (%dMB %s) priority(%d) dirty?=0x%x %s\n",
- p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1),
- (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty,
- (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : ""));
- }
-
- }
-
- /* If the deferral state changed, reschedule the demotion timer */
- if (reschedule) {
- memorystatus_reschedule_idle_demotion_locked();
- }
- }
-
- if (kill) {
- if (proc_ref_locked(p) == p) {
- proc_list_unlock();
- psignal(p, SIGKILL);
- proc_list_lock();
- proc_rele_locked(p);
- }
- }
-
-exit:
- proc_list_unlock();
-
- return ret;
-}
-
-int
-memorystatus_dirty_clear(proc_t p, uint32_t pcontrol) {
-
- int ret = 0;
-
- MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_clear(): %d 0x%x 0x%x\n", p->p_pid, pcontrol, p->p_memstat_dirty);
-
- KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_CLEAR), p->p_pid, pcontrol, 0, 0, 0);
-
- proc_list_lock();
-
- if ((p->p_listflag & P_LIST_EXITED) != 0) {
- /*
- * Process is on its way out.
- */
- ret = EBUSY;
- goto exit;
- }
-
- if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
- ret = EPERM;
- goto exit;
- }
-
- if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) {
- /* Dirty tracking not enabled */
- ret = EINVAL;
- goto exit;
- }
-
- if (!pcontrol || (pcontrol & (PROC_DIRTY_LAUNCH_IN_PROGRESS | PROC_DIRTY_DEFER | PROC_DIRTY_DEFER_ALWAYS)) == 0) {
- ret = EINVAL;
- goto exit;
- }
-
- if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) {
- p->p_memstat_dirty &= ~P_DIRTY_LAUNCH_IN_PROGRESS;
- }
-
- /* This can be set and cleared exactly once. */
- if (pcontrol & (PROC_DIRTY_DEFER | PROC_DIRTY_DEFER_ALWAYS)) {
-
- if (p->p_memstat_dirty & P_DIRTY_DEFER) {
- p->p_memstat_dirty &= ~(P_DIRTY_DEFER);
- }
-
- if (p->p_memstat_dirty & P_DIRTY_DEFER_ALWAYS) {
- p->p_memstat_dirty &= ~(P_DIRTY_DEFER_ALWAYS);
- }
-
- memorystatus_invalidate_idle_demotion_locked(p, TRUE);
- memorystatus_update_idle_priority_locked(p);
- memorystatus_reschedule_idle_demotion_locked();
- }
-
- ret = 0;
-exit:
- proc_list_unlock();
-
- return ret;
-}
-
-int
-memorystatus_dirty_get(proc_t p) {
- int ret = 0;
-
- proc_list_lock();
-
- if (p->p_memstat_dirty & P_DIRTY_TRACK) {
- ret |= PROC_DIRTY_TRACKED;
- if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) {
- ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT;
- }
- if (p->p_memstat_dirty & P_DIRTY) {
- ret |= PROC_DIRTY_IS_DIRTY;
- }
- if (p->p_memstat_dirty & P_DIRTY_LAUNCH_IN_PROGRESS) {
- ret |= PROC_DIRTY_LAUNCH_IS_IN_PROGRESS;
- }
- }
-
- proc_list_unlock();
-
- return ret;
-}
-
-int
-memorystatus_on_terminate(proc_t p) {
- int sig;
-
- proc_list_lock();
-
- p->p_memstat_dirty |= P_DIRTY_TERMINATED;
-
- if ((p->p_memstat_dirty & (P_DIRTY_TRACK|P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) {
- /* Clean; mark as terminated and issue SIGKILL */
- sig = SIGKILL;
- } else {
- /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */
- sig = SIGTERM;
- }
-
- proc_list_unlock();
-
- return sig;
-}
-
-void
-memorystatus_on_suspend(proc_t p)
-{
-#if CONFIG_FREEZE
- uint32_t pages;
- memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL);
-#endif
- proc_list_lock();
-#if CONFIG_FREEZE
- memorystatus_suspended_count++;
-#endif
- p->p_memstat_state |= P_MEMSTAT_SUSPENDED;
- proc_list_unlock();
-}
-
-void
-memorystatus_on_resume(proc_t p)
-{
-#if CONFIG_FREEZE
- boolean_t frozen;
- pid_t pid;
-#endif
-
- proc_list_lock();
-
-#if CONFIG_FREEZE
- frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN);
- if (frozen) {
- /*
- * Now that we don't _thaw_ a process completely,
- * resuming it (and having some on-demand swapins)
- * shouldn't preclude it from being counted as frozen.
- *
- * memorystatus_frozen_count--;
- *
- * We preserve the P_MEMSTAT_FROZEN state since the process
- * could have state on disk AND so will deserve some protection
- * in the jetsam bands.
- */
- if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == 0) {
- p->p_memstat_state |= P_MEMSTAT_REFREEZE_ELIGIBLE;
- memorystatus_refreeze_eligible_count++;
- }
- p->p_memstat_thaw_count++;
-
- memorystatus_thaw_count++;
- }
-
- memorystatus_suspended_count--;
-
- pid = p->p_pid;
-#endif
-
- /*
- * P_MEMSTAT_FROZEN will remain unchanged. This used to be:
- * p->p_memstat_state &= ~(P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN);
- */
- p->p_memstat_state &= ~P_MEMSTAT_SUSPENDED;
-
- proc_list_unlock();
-
-#if CONFIG_FREEZE
- if (frozen) {
- memorystatus_freeze_entry_t data = { pid, FALSE, 0 };
- memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
- }
-#endif
-}
-
-void
-memorystatus_on_inactivity(proc_t p)
-{
-#pragma unused(p)
-#if CONFIG_FREEZE
- /* Wake the freeze thread */
- thread_wakeup((event_t)&memorystatus_freeze_wakeup);
-#endif
-}
-
-/*
- * The proc_list_lock is held by the caller.
-*/
-static uint32_t
-memorystatus_build_state(proc_t p) {
- uint32_t snapshot_state = 0;
-
- /* General */
- if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) {
- snapshot_state |= kMemorystatusSuspended;
- }
- if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
- snapshot_state |= kMemorystatusFrozen;
- }
- if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) {
- snapshot_state |= kMemorystatusWasThawed;
- }
-
- /* Tracking */
- if (p->p_memstat_dirty & P_DIRTY_TRACK) {
- snapshot_state |= kMemorystatusTracked;
- }
- if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) {
- snapshot_state |= kMemorystatusSupportsIdleExit;
- }
- if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
- snapshot_state |= kMemorystatusDirty;
- }
-
- return snapshot_state;
-}
-
-static boolean_t
-kill_idle_exit_proc(void)
-{
- proc_t p, victim_p = PROC_NULL;
- uint64_t current_time;
- boolean_t killed = FALSE;
- unsigned int i = 0;
- os_reason_t jetsam_reason = OS_REASON_NULL;
-
- /* Pick next idle exit victim. */
- current_time = mach_absolute_time();
-
- jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_IDLE_EXIT);
- if (jetsam_reason == OS_REASON_NULL) {
- printf("kill_idle_exit_proc: failed to allocate jetsam reason\n");
- }
-
- proc_list_lock();
-
- p = memorystatus_get_first_proc_locked(&i, FALSE);
- while (p) {
- /* No need to look beyond the idle band */
- if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) {
- break;
- }
-
- if ((p->p_memstat_dirty & (P_DIRTY_ALLOW_IDLE_EXIT|P_DIRTY_IS_DIRTY|P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) {
- if (current_time >= p->p_memstat_idledeadline) {
- p->p_memstat_dirty |= P_DIRTY_TERMINATED;
- victim_p = proc_ref_locked(p);
- break;
- }
- }
-
- p = memorystatus_get_next_proc_locked(&i, p, FALSE);
- }
-
- proc_list_unlock();
-
- if (victim_p) {
- printf("memorystatus: killing_idle_process pid %d [%s]\n", victim_p->p_pid, (*victim_p->p_name ? victim_p->p_name : "unknown"));
- killed = memorystatus_do_kill(victim_p, kMemorystatusKilledIdleExit, jetsam_reason);
- proc_rele(victim_p);
- } else {
- os_reason_free(jetsam_reason);
- }
-
- return killed;
-}
-
-static void
-memorystatus_thread_wake(void)
-{
- int thr_id = 0;
- int active_thr = atomic_load(&active_jetsam_threads);
-
- /* Wakeup all the jetsam threads */
- for (thr_id = 0; thr_id < active_thr; thr_id++) {
- thread_wakeup((event_t)&jetsam_threads[thr_id].memorystatus_wakeup);
- }
-}
-
-#if CONFIG_JETSAM
-
-static void
-memorystatus_thread_pool_max()
-{
- /* Increase the jetsam thread pool to max_jetsam_threads */
- int max_threads = max_jetsam_threads;
- printf("Expanding memorystatus pool to %d!\n", max_threads);
- atomic_store(&active_jetsam_threads, max_threads);
-}
-
-static void
-memorystatus_thread_pool_default()
-{
- /* Restore the jetsam thread pool to a single thread */
- printf("Reverting memorystatus pool back to 1\n");
- atomic_store(&active_jetsam_threads, 1);
-}
-
-#endif /* CONFIG_JETSAM */
-
-extern void vm_pressure_response(void);
-
-static int
-memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation)
-{
- struct jetsam_thread_state *jetsam_thread = jetsam_current_thread();
-
- if (interval_ms) {
- assert_wait_timeout(&jetsam_thread->memorystatus_wakeup, THREAD_UNINT, interval_ms, NSEC_PER_MSEC);
- } else {
- assert_wait(&jetsam_thread->memorystatus_wakeup, THREAD_UNINT);
- }
-
- return thread_block(continuation);
-}
-
-static boolean_t
-memorystatus_avail_pages_below_pressure(void)
-{
-#if CONFIG_EMBEDDED
-/*
- * Instead of CONFIG_EMBEDDED for these *avail_pages* routines, we should
- * key off of the system having dynamic swap support. With full swap support,
- * the system shouldn't really need to worry about various page thresholds.
- */
- return (memorystatus_available_pages <= memorystatus_available_pages_pressure);
-#else /* CONFIG_EMBEDDED */
- return FALSE;
-#endif /* CONFIG_EMBEDDED */
-}
-
-static boolean_t
-memorystatus_avail_pages_below_critical(void)
-{
-#if CONFIG_EMBEDDED
- return (memorystatus_available_pages <= memorystatus_available_pages_critical);
-#else /* CONFIG_EMBEDDED */
- return FALSE;
-#endif /* CONFIG_EMBEDDED */
-}
-
-static boolean_t
-memorystatus_post_snapshot(int32_t priority, uint32_t cause)
-{
-#if CONFIG_EMBEDDED
-#pragma unused(cause)
- /*
- * Don't generate logs for steady-state idle-exit kills,
- * unless it is overridden for debug or by the device
- * tree.
- */
-
- return ((priority != JETSAM_PRIORITY_IDLE) || memorystatus_idle_snapshot);
-
-#else /* CONFIG_EMBEDDED */
- /*
- * Don't generate logs for steady-state idle-exit kills,
- * unless
- * - it is overridden for debug or by the device
- * tree.
- * OR
- * - the kill causes are important i.e. not kMemorystatusKilledIdleExit
- */
-
- boolean_t snapshot_eligible_kill_cause = (is_reason_thrashing(cause) || is_reason_zone_map_exhaustion(cause));
- return ((priority != JETSAM_PRIORITY_IDLE) || memorystatus_idle_snapshot || snapshot_eligible_kill_cause);
-#endif /* CONFIG_EMBEDDED */
-}
-
-static boolean_t
-memorystatus_action_needed(void)
-{
-#if CONFIG_EMBEDDED
- return (is_reason_thrashing(kill_under_pressure_cause) ||
- is_reason_zone_map_exhaustion(kill_under_pressure_cause) ||
- memorystatus_available_pages <= memorystatus_available_pages_pressure);
-#else /* CONFIG_EMBEDDED */
- return (is_reason_thrashing(kill_under_pressure_cause) ||
- is_reason_zone_map_exhaustion(kill_under_pressure_cause));
-#endif /* CONFIG_EMBEDDED */
-}
-
-#if CONFIG_FREEZE
-extern void vm_swap_consider_defragmenting(int);
-
-/*
- * This routine will _jetsam_ all frozen processes
- * and reclaim the swap space immediately.
- *
- * So freeze has to be DISABLED when we call this routine.
- */
-
-void
-memorystatus_disable_freeze(void)
-{
- memstat_bucket_t *bucket;
- int bucket_count = 0, retries = 0;
- boolean_t retval = FALSE, killed = FALSE;
- uint32_t errors = 0, errors_over_prev_iteration = 0;
- os_reason_t jetsam_reason = 0;
- unsigned int band = 0;
- proc_t p = PROC_NULL, next_p = PROC_NULL;
-
- assert(memorystatus_freeze_enabled == FALSE);
-
- jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE);
- if (jetsam_reason == OS_REASON_NULL) {
- printf("memorystatus_disable_freeze: failed to allocate jetsam reason\n");
- }
-
- /*
- * Let's relocate all frozen processes into band 8. Demoted frozen processes
- * are sitting in band 0 currently and it's possible to have a frozen process
- * in the FG band being actively used. We don't reset its frozen state when
- * it is resumed because it has state on disk.
- *
- * We choose to do this relocation rather than implement a new 'kill frozen'
- * process function for these reasons:
- * - duplication of code: too many kill functions exist and we need to rework them better.
- * - disk-space-shortage kills are rare
- * - not having the 'real' jetsam band at time of the this frozen kill won't preclude us
- * from answering any imp. questions re. jetsam policy/effectiveness.
- *
- * This is essentially what memorystatus_update_inactive_jetsam_priority_band() does while
- * avoiding the application of memory limits.
- */
-
-again:
- proc_list_lock();
-
- band = JETSAM_PRIORITY_IDLE;
- p = PROC_NULL;
- next_p = PROC_NULL;
-
- next_p = memorystatus_get_first_proc_locked(&band, TRUE);
- while (next_p) {
-
- p = next_p;
- next_p = memorystatus_get_next_proc_locked(&band, p, TRUE);
-
- if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) {
- break;
- }
-
- if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) {
- continue;
- }
-
- if (p->p_memstat_state & P_MEMSTAT_ERROR) {
- p->p_memstat_state &= ~P_MEMSTAT_ERROR;
- }
-
- if (p->p_memstat_effectivepriority == memorystatus_freeze_jetsam_band) {
- continue;
- }
-
- /*
- * We explicitly add this flag here so the process looks like a normal
- * frozen process i.e. P_MEMSTAT_FROZEN and P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND.
- * We don't bother with assigning the 'active' memory
- * limits at this point because we are going to be killing it soon below.
- */
- p->p_memstat_state |= P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND;
- memorystatus_invalidate_idle_demotion_locked(p, TRUE);
-
- memorystatus_update_priority_locked(p, memorystatus_freeze_jetsam_band, FALSE, TRUE);
- }
-
- bucket = &memstat_bucket[memorystatus_freeze_jetsam_band];
- bucket_count = bucket->count;
- proc_list_unlock();
-
- /*
- * Bucket count is already stale at this point. But, we don't expect
- * freezing to continue since we have already disabled the freeze functionality.
- * However, an existing freeze might be in progress. So we might miss that process
- * in the first go-around. We hope to catch it in the next.
- */
-
- errors_over_prev_iteration = 0;
- while (bucket_count) {
-
- bucket_count--;
-
- /*
- * memorystatus_kill_elevated_process() drops a reference,
- * so take another one so we can continue to use this exit reason
- * even after it returns.
- */
-
- os_reason_ref(jetsam_reason);
- retval = memorystatus_kill_elevated_process(
- kMemorystatusKilledDiskSpaceShortage,
- jetsam_reason,
- memorystatus_freeze_jetsam_band,
- 0, /* the iteration of aggressive jetsam..ignored here */
- &errors);
-
- if (errors > 0) {
- printf("memorystatus_disable_freeze: memorystatus_kill_elevated_process returned %d error(s)\n", errors);
- errors_over_prev_iteration += errors;
- errors = 0;
- }
-
- if (retval == 0) {
- /*
- * No frozen processes left to kill.
- */
- break;
- }
-
- killed = TRUE;
- }
-
- proc_list_lock();
-
- if (memorystatus_frozen_count) {
- /*
- * A frozen process snuck in and so
- * go back around to kill it. That
- * process may have been resumed and
- * put into the FG band too. So we
- * have to do the relocation again.
- */
- assert(memorystatus_freeze_enabled == FALSE);
-
- retries++;
- if (retries < 3) {
- proc_list_unlock();
- goto again;
- }
-#if DEVELOPMENT || DEBUG
- panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d, errors = %d",
- memorystatus_frozen_count, errors_over_prev_iteration);
-#endif /* DEVELOPMENT || DEBUG */
- }
- proc_list_unlock();
-
- os_reason_free(jetsam_reason);
-
- if (killed) {
-
- vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM);
-
- proc_list_lock();
- size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
- sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
- uint64_t timestamp_now = mach_absolute_time();
- memorystatus_jetsam_snapshot->notification_time = timestamp_now;
- memorystatus_jetsam_snapshot->js_gencount++;
- if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
- timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
- proc_list_unlock();
- int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
- if (!ret) {
- proc_list_lock();
- memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
- proc_list_unlock();
- }
- } else {
- proc_list_unlock();
- }
- }
-
- return;
-}
-#endif /* CONFIG_FREEZE */
-
-static boolean_t
-memorystatus_act_on_hiwat_processes(uint32_t *errors, uint32_t *hwm_kill, boolean_t *post_snapshot, __unused boolean_t *is_critical)
-{
- boolean_t purged = FALSE;
- boolean_t killed = memorystatus_kill_hiwat_proc(errors, &purged);
-
- if (killed) {
- *hwm_kill = *hwm_kill + 1;
- *post_snapshot = TRUE;
- return TRUE;
- } else {
- if (purged == FALSE) {
- /* couldn't purge and couldn't kill */
- memorystatus_hwm_candidates = FALSE;
- }
- }
-
-#if CONFIG_JETSAM
- /* No highwater processes to kill. Continue or stop for now? */
- if (!is_reason_thrashing(kill_under_pressure_cause) &&
- !is_reason_zone_map_exhaustion(kill_under_pressure_cause) &&
- (memorystatus_available_pages > memorystatus_available_pages_critical)) {
- /*
- * We are _not_ out of pressure but we are above the critical threshold and there's:
- * - no compressor thrashing
- * - enough zone memory
- * - no more HWM processes left.
- * For now, don't kill any other processes.
- */
-
- if (*hwm_kill == 0) {
- memorystatus_thread_wasted_wakeup++;
- }
-
- *is_critical = FALSE;
-
- return TRUE;
- }
-#endif /* CONFIG_JETSAM */
-
- return FALSE;
-}
-
-static boolean_t
-memorystatus_act_aggressive(uint32_t cause, os_reason_t jetsam_reason, int *jld_idle_kills, boolean_t *corpse_list_purged, boolean_t *post_snapshot)
-{
- if (memorystatus_jld_enabled == TRUE) {
-
- boolean_t killed;
- uint32_t errors = 0;
-
- /* Jetsam Loop Detection - locals */
- memstat_bucket_t *bucket;
- int jld_bucket_count = 0;
- struct timeval jld_now_tstamp = {0,0};
- uint64_t jld_now_msecs = 0;
- int elevated_bucket_count = 0;
-
- /* Jetsam Loop Detection - statics */
- static uint64_t jld_timestamp_msecs = 0;
- static int jld_idle_kill_candidates = 0; /* Number of available processes in band 0,1 at start */
- static int jld_eval_aggressive_count = 0; /* Bumps the max priority in aggressive loop */
- static int32_t jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT;
- /*
- * Jetsam Loop Detection: attempt to detect
- * rapid daemon relaunches in the lower bands.
- */
-
- microuptime(&jld_now_tstamp);
-
- /*
- * Ignore usecs in this calculation.
- * msecs granularity is close enough.
- */
- jld_now_msecs = (jld_now_tstamp.tv_sec * 1000);
-
- proc_list_lock();
- switch (jetsam_aging_policy) {
- case kJetsamAgingPolicyLegacy:
- bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
- jld_bucket_count = bucket->count;
- bucket = &memstat_bucket[JETSAM_PRIORITY_AGING_BAND1];
- jld_bucket_count += bucket->count;
- break;
- case kJetsamAgingPolicySysProcsReclaimedFirst:
- case kJetsamAgingPolicyAppsReclaimedFirst:
- bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
- jld_bucket_count = bucket->count;
- bucket = &memstat_bucket[system_procs_aging_band];
- jld_bucket_count += bucket->count;
- bucket = &memstat_bucket[applications_aging_band];
- jld_bucket_count += bucket->count;
- break;
- case kJetsamAgingPolicyNone:
- default:
- bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
- jld_bucket_count = bucket->count;
- break;
- }
-
- bucket = &memstat_bucket[JETSAM_PRIORITY_ELEVATED_INACTIVE];
- elevated_bucket_count = bucket->count;
-
- proc_list_unlock();
-
- /*
- * memorystatus_jld_eval_period_msecs is a tunable
- * memorystatus_jld_eval_aggressive_count is a tunable
- * memorystatus_jld_eval_aggressive_priority_band_max is a tunable
- */
- if ( (jld_bucket_count == 0) ||
- (jld_now_msecs > (jld_timestamp_msecs + memorystatus_jld_eval_period_msecs))) {
-
- /*
- * Refresh evaluation parameters
- */
- jld_timestamp_msecs = jld_now_msecs;
- jld_idle_kill_candidates = jld_bucket_count;
- *jld_idle_kills = 0;
- jld_eval_aggressive_count = 0;
- jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT;
- }
-
- if (*jld_idle_kills > jld_idle_kill_candidates) {
- jld_eval_aggressive_count++;
-
-#if DEVELOPMENT || DEBUG
- printf("memorystatus: aggressive%d: beginning of window: %lld ms, : timestamp now: %lld ms\n",
- jld_eval_aggressive_count,
- jld_timestamp_msecs,
- jld_now_msecs);
- printf("memorystatus: aggressive%d: idle candidates: %d, idle kills: %d\n",
- jld_eval_aggressive_count,
- jld_idle_kill_candidates,
- *jld_idle_kills);
-#endif /* DEVELOPMENT || DEBUG */
-
- if ((jld_eval_aggressive_count == memorystatus_jld_eval_aggressive_count) &&
- (total_corpses_count() > 0) && (*corpse_list_purged == FALSE)) {
- /*
- * If we reach this aggressive cycle, corpses might be causing memory pressure.
- * So, in an effort to avoid jetsams in the FG band, we will attempt to purge
- * corpse memory prior to this final march through JETSAM_PRIORITY_UI_SUPPORT.
- */
- task_purge_all_corpses();
- *corpse_list_purged = TRUE;
- }
- else if (jld_eval_aggressive_count > memorystatus_jld_eval_aggressive_count) {
- /*
- * Bump up the jetsam priority limit (eg: the bucket index)
- * Enforce bucket index sanity.
- */
- if ((memorystatus_jld_eval_aggressive_priority_band_max < 0) ||
- (memorystatus_jld_eval_aggressive_priority_band_max >= MEMSTAT_BUCKET_COUNT)) {
- /*
- * Do nothing. Stick with the default level.
- */
- } else {
- jld_priority_band_max = memorystatus_jld_eval_aggressive_priority_band_max;
- }
- }
-
- /* Visit elevated processes first */
- while (elevated_bucket_count) {
-
- elevated_bucket_count--;
-
- /*
- * memorystatus_kill_elevated_process() drops a reference,
- * so take another one so we can continue to use this exit reason
- * even after it returns.
- */
-
- os_reason_ref(jetsam_reason);
- killed = memorystatus_kill_elevated_process(
- cause,
- jetsam_reason,
- JETSAM_PRIORITY_ELEVATED_INACTIVE,
- jld_eval_aggressive_count,
- &errors);
-
- if (killed) {
- *post_snapshot = TRUE;
- if (memorystatus_avail_pages_below_pressure()) {
- /*
- * Still under pressure.
- * Find another pinned processes.
- */
- continue;
- } else {
- return TRUE;
- }
- } else {
- /*
- * No pinned processes left to kill.
- * Abandon elevated band.
- */
- break;
- }
- }
-
- /*
- * memorystatus_kill_top_process_aggressive() allocates its own
- * jetsam_reason so the kMemorystatusKilledProcThrashing cause
- * is consistent throughout the aggressive march.
- */
- killed = memorystatus_kill_top_process_aggressive(
- kMemorystatusKilledProcThrashing,
- jld_eval_aggressive_count,
- jld_priority_band_max,
- &errors);
-
- if (killed) {
- /* Always generate logs after aggressive kill */
- *post_snapshot = TRUE;
- *jld_idle_kills = 0;
- return TRUE;
- }
- }
-
- return FALSE;
- }
-
- return FALSE;
-}
-
-
-static void
-memorystatus_thread(void *param __unused, wait_result_t wr __unused)
-{
- boolean_t post_snapshot = FALSE;
- uint32_t errors = 0;
- uint32_t hwm_kill = 0;
- boolean_t sort_flag = TRUE;
- boolean_t corpse_list_purged = FALSE;
- int jld_idle_kills = 0;
- struct jetsam_thread_state *jetsam_thread = jetsam_current_thread();
-
- if (jetsam_thread->inited == FALSE) {
- /*
- * It's the first time the thread has run, so just mark the thread as privileged and block.
- * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>.
- */
-
- char name[32];
- thread_wire(host_priv_self(), current_thread(), TRUE);
- snprintf(name, 32, "VM_memorystatus_%d", jetsam_thread->index + 1);
-
- if (jetsam_thread->index == 0) {
- if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) {
- thread_vm_bind_group_add();
- }
- }
- thread_set_thread_name(current_thread(), name);
- jetsam_thread->inited = TRUE;
- memorystatus_thread_block(0, memorystatus_thread);
- }
-
- KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START,
- memorystatus_available_pages, memorystatus_jld_enabled, memorystatus_jld_eval_period_msecs, memorystatus_jld_eval_aggressive_count,0);
-
- /*
- * Jetsam aware version.
- *
- * The VM pressure notification thread is working it's way through clients in parallel.
- *
- * So, while the pressure notification thread is targeting processes in order of
- * increasing jetsam priority, we can hopefully reduce / stop it's work by killing
- * any processes that have exceeded their highwater mark.
- *
- * If we run out of HWM processes and our available pages drops below the critical threshold, then,
- * we target the least recently used process in order of increasing jetsam priority (exception: the FG band).
- */
- while (memorystatus_action_needed()) {
- boolean_t killed;
- int32_t priority;
- uint32_t cause;
- uint64_t jetsam_reason_code = JETSAM_REASON_INVALID;
- os_reason_t jetsam_reason = OS_REASON_NULL;
-
- cause = kill_under_pressure_cause;
- switch (cause) {
- case kMemorystatusKilledFCThrashing:
- jetsam_reason_code = JETSAM_REASON_MEMORY_FCTHRASHING;
- break;
- case kMemorystatusKilledVMCompressorThrashing:
- jetsam_reason_code = JETSAM_REASON_MEMORY_VMCOMPRESSOR_THRASHING;
- break;
- case kMemorystatusKilledVMCompressorSpaceShortage:
- jetsam_reason_code = JETSAM_REASON_MEMORY_VMCOMPRESSOR_SPACE_SHORTAGE;
- break;
- case kMemorystatusKilledZoneMapExhaustion:
- jetsam_reason_code = JETSAM_REASON_ZONE_MAP_EXHAUSTION;
- break;
- case kMemorystatusKilledVMPageShortage:
- /* falls through */
- default:
- jetsam_reason_code = JETSAM_REASON_MEMORY_VMPAGESHORTAGE;
- cause = kMemorystatusKilledVMPageShortage;
- break;
- }
-
- /* Highwater */
- boolean_t is_critical = TRUE;
- if (memorystatus_act_on_hiwat_processes(&errors, &hwm_kill, &post_snapshot, &is_critical)) {
- if (is_critical == FALSE) {
- /*
- * For now, don't kill any other processes.
- */
- break;
- } else {
- goto done;
- }
- }
-
- jetsam_reason = os_reason_create(OS_REASON_JETSAM, jetsam_reason_code);
- if (jetsam_reason == OS_REASON_NULL) {
- printf("memorystatus_thread: failed to allocate jetsam reason\n");
- }
-
- if (memorystatus_act_aggressive(cause, jetsam_reason, &jld_idle_kills, &corpse_list_purged, &post_snapshot)) {
- goto done;
- }
-
- /*
- * memorystatus_kill_top_process() drops a reference,
- * so take another one so we can continue to use this exit reason
- * even after it returns
- */
- os_reason_ref(jetsam_reason);
-
- /* LRU */
- killed = memorystatus_kill_top_process(TRUE, sort_flag, cause, jetsam_reason, &priority, &errors);
- sort_flag = FALSE;
-
- if (killed) {
- if (memorystatus_post_snapshot(priority, cause) == TRUE) {
-
- post_snapshot = TRUE;
- }
-
- /* Jetsam Loop Detection */
- if (memorystatus_jld_enabled == TRUE) {
- if ((priority == JETSAM_PRIORITY_IDLE) || (priority == system_procs_aging_band) || (priority == applications_aging_band)) {
- jld_idle_kills++;
- } else {
- /*
- * We've reached into bands beyond idle deferred.
- * We make no attempt to monitor them
- */
- }
- }
-
- if ((priority >= JETSAM_PRIORITY_UI_SUPPORT) && (total_corpses_count() > 0) && (corpse_list_purged == FALSE)) {
- /*
- * If we have jetsammed a process in or above JETSAM_PRIORITY_UI_SUPPORT
- * then we attempt to relieve pressure by purging corpse memory.
- */
- task_purge_all_corpses();
- corpse_list_purged = TRUE;
- }
- goto done;
- }
-
- if (memorystatus_avail_pages_below_critical()) {
- /*
- * Still under pressure and unable to kill a process - purge corpse memory
- */
- if (total_corpses_count() > 0) {
- task_purge_all_corpses();
- corpse_list_purged = TRUE;
- }
-
- if (memorystatus_avail_pages_below_critical()) {
- /*
- * Still under pressure and unable to kill a process - panic
- */
- panic("memorystatus_jetsam_thread: no victim! available pages:%llu\n", (uint64_t)memorystatus_available_pages);
- }
- }
-
-done:
-
- /*
- * We do not want to over-kill when thrashing has been detected.
- * To avoid that, we reset the flag here and notify the
- * compressor.
- */
- if (is_reason_thrashing(kill_under_pressure_cause)) {
- kill_under_pressure_cause = 0;
-#if CONFIG_JETSAM
- vm_thrashing_jetsam_done();
-#endif /* CONFIG_JETSAM */
- } else if (is_reason_zone_map_exhaustion(kill_under_pressure_cause)) {
- kill_under_pressure_cause = 0;
- }
-
- os_reason_free(jetsam_reason);
- }
-
- kill_under_pressure_cause = 0;
-
- if (errors) {
- memorystatus_clear_errors();
- }
-
- if (post_snapshot) {
- proc_list_lock();
- size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
- sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
- uint64_t timestamp_now = mach_absolute_time();
- memorystatus_jetsam_snapshot->notification_time = timestamp_now;
- memorystatus_jetsam_snapshot->js_gencount++;
- if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
- timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
- proc_list_unlock();
- int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
- if (!ret) {
- proc_list_lock();
- memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
- proc_list_unlock();
- }
- } else {
- proc_list_unlock();
- }
- }
-
- KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END,
- memorystatus_available_pages, 0, 0, 0, 0);
-
- memorystatus_thread_block(0, memorystatus_thread);
-}
-
-/*
- * Returns TRUE:
- * when an idle-exitable proc was killed
- * Returns FALSE:
- * when there are no more idle-exitable procs found
- * when the attempt to kill an idle-exitable proc failed
- */
-boolean_t memorystatus_idle_exit_from_VM(void) {
-
- /*
- * This routine should no longer be needed since we are
- * now using jetsam bands on all platforms and so will deal
- * with IDLE processes within the memorystatus thread itself.
- *
- * But we still use it because we observed that macos systems
- * started heavy compression/swapping with a bunch of
- * idle-exitable processes alive and doing nothing. We decided
- * to rather kill those processes than start swapping earlier.
- */
-
- return(kill_idle_exit_proc());
-}
-
-/*
- * Callback invoked when allowable physical memory footprint exceeded
- * (dirty pages + IOKit mappings)
- *
- * This is invoked for both advisory, non-fatal per-task high watermarks,
- * as well as the fatal task memory limits.
- */
-void
-memorystatus_on_ledger_footprint_exceeded(boolean_t warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal)
-{
- os_reason_t jetsam_reason = OS_REASON_NULL;
-
- proc_t p = current_proc();
-
-#if VM_PRESSURE_EVENTS
- if (warning == TRUE) {
- /*
- * This is a warning path which implies that the current process is close, but has
- * not yet exceeded its per-process memory limit.
- */
- if (memorystatus_warn_process(p->p_pid, memlimit_is_active, memlimit_is_fatal, FALSE /* not exceeded */) != TRUE) {
- /* Print warning, since it's possible that task has not registered for pressure notifications */
- os_log(OS_LOG_DEFAULT, "memorystatus_on_ledger_footprint_exceeded: failed to warn the current task (%d exiting, or no handler registered?).\n", p->p_pid);
- }
- return;
- }
-#endif /* VM_PRESSURE_EVENTS */
-
- if (memlimit_is_fatal) {
- /*
- * If this process has no high watermark or has a fatal task limit, then we have been invoked because the task
- * has violated either the system-wide per-task memory limit OR its own task limit.
- */
- jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_PERPROCESSLIMIT);
- if (jetsam_reason == NULL) {
- printf("task_exceeded footprint: failed to allocate jetsam reason\n");
- } else if (corpse_for_fatal_memkill != 0 && proc_send_synchronous_EXC_RESOURCE(p) == FALSE) {
- /* Set OS_REASON_FLAG_GENERATE_CRASH_REPORT to generate corpse */
- jetsam_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT;
- }
-
- if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit, jetsam_reason) != TRUE) {
- printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n");
- }
- } else {
- /*
- * HWM offender exists. Done without locks or synchronization.
- * See comment near its declaration for more details.
- */
- memorystatus_hwm_candidates = TRUE;
-
-#if VM_PRESSURE_EVENTS
- /*
- * The current process is not in the warning path.
- * This path implies the current process has exceeded a non-fatal (soft) memory limit.
- * Failure to send note is ignored here.
- */
- (void)memorystatus_warn_process(p->p_pid, memlimit_is_active, memlimit_is_fatal, TRUE /* exceeded */);
-
-#endif /* VM_PRESSURE_EVENTS */
- }
-}
-
-void
-memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal)
-{
- proc_t p = current_proc();
-
- /*
- * The limit violation is logged here, but only once per process per limit.
- * Soft memory limit is a non-fatal high-water-mark
- * Hard memory limit is a fatal custom-task-limit or system-wide per-task memory limit.
- */
-
- os_log_with_startup_serial(OS_LOG_DEFAULT, "EXC_RESOURCE -> %s[%d] exceeded mem limit: %s%s %d MB (%s)\n",
- (*p->p_name ? p->p_name : "unknown"), p->p_pid, (memlimit_is_active ? "Active" : "Inactive"),
- (memlimit_is_fatal ? "Hard" : "Soft"), max_footprint_mb,
- (memlimit_is_fatal ? "fatal" : "non-fatal"));
-
- return;
-}
-
-
-/*
- * Description:
- * Evaluates process state to determine which limit
- * should be applied (active vs. inactive limit).
- *
- * Processes that have the 'elevated inactive jetsam band' attribute
- * are first evaluated based on their current priority band.
- * presently elevated ==> active
- *
- * Processes that opt into dirty tracking are evaluated
- * based on clean vs dirty state.
- * dirty ==> active
- * clean ==> inactive
- *
- * Process that do not opt into dirty tracking are
- * evalulated based on priority level.
- * Foreground or above ==> active
- * Below Foreground ==> inactive
- *
- * Return: TRUE if active
- * False if inactive
- */
-
-static boolean_t
-proc_jetsam_state_is_active_locked(proc_t p) {
-
- if ((p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) &&
- (p->p_memstat_effectivepriority == JETSAM_PRIORITY_ELEVATED_INACTIVE)) {
- /*
- * process has the 'elevated inactive jetsam band' attribute
- * and process is present in the elevated band
- * implies active state
- */
- return TRUE;
- } else if (p->p_memstat_dirty & P_DIRTY_TRACK) {
- /*
- * process has opted into dirty tracking
- * active state is based on dirty vs. clean
- */
- if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
- /*
- * process is dirty
- * implies active state
- */
- return TRUE;
- } else {
- /*
- * process is clean
- * implies inactive state
- */
- return FALSE;
- }
- } else if (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND) {
- /*
- * process is Foreground or higher
- * implies active state
- */
- return TRUE;
- } else {
- /*
- * process found below Foreground
- * implies inactive state
- */
- return FALSE;
- }
-}
-
-static boolean_t
-memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause, os_reason_t jetsam_reason) {
- boolean_t res;
-
- uint32_t errors = 0;
-
- if (victim_pid == -1) {
- /* No pid, so kill first process */
- res = memorystatus_kill_top_process(TRUE, TRUE, cause, jetsam_reason, NULL, &errors);
- } else {
- res = memorystatus_kill_specific_process(victim_pid, cause, jetsam_reason);
- }
-
- if (errors) {
- memorystatus_clear_errors();
- }
-
- if (res == TRUE) {
- /* Fire off snapshot notification */
- proc_list_lock();
- size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
- sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count;
- uint64_t timestamp_now = mach_absolute_time();
- memorystatus_jetsam_snapshot->notification_time = timestamp_now;
- if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 ||
- timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) {
- proc_list_unlock();
- int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
- if (!ret) {
- proc_list_lock();
- memorystatus_jetsam_snapshot_last_timestamp = timestamp_now;
- proc_list_unlock();
- }
- } else {
- proc_list_unlock();
- }
- }
-
- return res;
-}
-
-/*
- * Jetsam a specific process.
- */
-static boolean_t
-memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause, os_reason_t jetsam_reason) {
- boolean_t killed;
- proc_t p;
- uint64_t killtime = 0;
- clock_sec_t tv_sec;
- clock_usec_t tv_usec;
- uint32_t tv_msec;
-
- /* TODO - add a victim queue and push this into the main jetsam thread */
-
- p = proc_find(victim_pid);
- if (!p) {
- os_reason_free(jetsam_reason);
- return FALSE;
- }
-
- proc_list_lock();
-
- if (memorystatus_jetsam_snapshot_count == 0) {
- memorystatus_init_jetsam_snapshot_locked(NULL,0);
- }
-
- killtime = mach_absolute_time();
- absolutetime_to_microtime(killtime, &tv_sec, &tv_usec);
- tv_msec = tv_usec / 1000;
-
- memorystatus_update_jetsam_snapshot_entry_locked(p, cause, killtime);
-
- proc_list_unlock();
-
- os_log_with_startup_serial(OS_LOG_DEFAULT, "%lu.%03d memorystatus: killing_specific_process pid %d [%s] (%s %d) - memorystatus_available_pages: %llu\n",
- (unsigned long)tv_sec, tv_msec, victim_pid, (*p->p_name ? p->p_name : "unknown"),
- memorystatus_kill_cause_name[cause], p->p_memstat_effectivepriority, (uint64_t)memorystatus_available_pages);
-
- killed = memorystatus_do_kill(p, cause, jetsam_reason);
- proc_rele(p);
-
- return killed;
-}
-
-
-/*
- * Toggle the P_MEMSTAT_TERMINATED state.
- * Takes the proc_list_lock.
- */
-void
-proc_memstat_terminated(proc_t p, boolean_t set)
-{
-#if DEVELOPMENT || DEBUG
- if (p) {
- proc_list_lock();
- if (set == TRUE) {
- p->p_memstat_state |= P_MEMSTAT_TERMINATED;
- } else {
- p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
- }
- proc_list_unlock();
- }
-#else
-#pragma unused(p, set)
- /*
- * do nothing
- */
-#endif /* DEVELOPMENT || DEBUG */
- return;
-}
-
-
-#if CONFIG_JETSAM
-/*
- * This is invoked when cpulimits have been exceeded while in fatal mode.
- * The jetsam_flags do not apply as those are for memory related kills.
- * We call this routine so that the offending process is killed with
- * a non-zero exit status.
- */
-void
-jetsam_on_ledger_cpulimit_exceeded(void)
-{
- int retval = 0;
- int jetsam_flags = 0; /* make it obvious */
- proc_t p = current_proc();
- os_reason_t jetsam_reason = OS_REASON_NULL;
-
- printf("task_exceeded_cpulimit: killing pid %d [%s]\n",
- p->p_pid, (*p->p_name ? p->p_name : "(unknown)"));
-
- jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_CPULIMIT);
- if (jetsam_reason == OS_REASON_NULL) {
- printf("task_exceeded_cpulimit: unable to allocate memory for jetsam reason\n");
- }
-
- retval = jetsam_do_kill(p, jetsam_flags, jetsam_reason);
-
- if (retval) {
- printf("task_exceeded_cpulimit: failed to kill current task (exiting?).\n");
- }
-}
-
-#endif /* CONFIG_JETSAM */
-
-static void
-memorystatus_get_task_memory_region_count(task_t task, uint64_t *count)
-{
- assert(task);
- assert(count);
-
- *count = get_task_memory_region_count(task);
-}
-
-
-#define MEMORYSTATUS_VM_MAP_FORK_ALLOWED 0x100000000
-#define MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED 0x200000000
-
-#if DEVELOPMENT || DEBUG
-
-/*
- * Sysctl only used to test memorystatus_allowed_vm_map_fork() path.
- * set a new pidwatch value
- * or
- * get the current pidwatch value
- *
- * The pidwatch_val starts out with a PID to watch for in the map_fork path.
- * Its value is:
- * - OR'd with MEMORYSTATUS_VM_MAP_FORK_ALLOWED if we allow the map_fork.
- * - OR'd with MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED if we disallow the map_fork.
- * - set to -1ull if the map_fork() is aborted for other reasons.
- */
-
-uint64_t memorystatus_vm_map_fork_pidwatch_val = 0;
-
-static int sysctl_memorystatus_vm_map_fork_pidwatch SYSCTL_HANDLER_ARGS {
-#pragma unused(oidp, arg1, arg2)
-
- uint64_t new_value = 0;
- uint64_t old_value = 0;
- int error = 0;
-
- /*
- * The pid is held in the low 32 bits.
- * The 'allowed' flags are in the upper 32 bits.
- */
- old_value = memorystatus_vm_map_fork_pidwatch_val;
-
- error = sysctl_io_number(req, old_value, sizeof(old_value), &new_value, NULL);
-
- if (error || !req->newptr) {
- /*
- * No new value passed in.
- */
- return(error);
- }
-
- /*
- * A new pid was passed in via req->newptr.
- * Ignore any attempt to set the higher order bits.
- */
- memorystatus_vm_map_fork_pidwatch_val = new_value & 0xFFFFFFFF;
- printf("memorystatus: pidwatch old_value = 0x%llx, new_value = 0x%llx \n", old_value, new_value);
-
- return(error);
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_map_fork_pidwatch, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED| CTLFLAG_MASKED,
- 0, 0, sysctl_memorystatus_vm_map_fork_pidwatch, "Q", "get/set pid watched for in vm_map_fork");
-
-
-/*
- * Record if a watched process fails to qualify for a vm_map_fork().
- */
-void
-memorystatus_abort_vm_map_fork(task_t task)
-{
- if (memorystatus_vm_map_fork_pidwatch_val != 0) {
- proc_t p = get_bsdtask_info(task);
- if (p != NULL && memorystatus_vm_map_fork_pidwatch_val == (uint64_t)p->p_pid) {
- memorystatus_vm_map_fork_pidwatch_val = -1ull;
- }
- }
-}
-
-static void
-set_vm_map_fork_pidwatch(task_t task, uint64_t x)
-{
- if (memorystatus_vm_map_fork_pidwatch_val != 0) {
- proc_t p = get_bsdtask_info(task);
- if (p && (memorystatus_vm_map_fork_pidwatch_val == (uint64_t)p->p_pid)) {
- memorystatus_vm_map_fork_pidwatch_val |= x;
- }
- }
-}
-
-#else /* DEVELOPMENT || DEBUG */
-
-
-static void
-set_vm_map_fork_pidwatch(task_t task, uint64_t x)
-{
-#pragma unused(task)
-#pragma unused(x)
-}
-
-#endif /* DEVELOPMENT || DEBUG */
-
-/*
- * Called during EXC_RESOURCE handling when a process exceeds a soft
- * memory limit. This is the corpse fork path and here we decide if
- * vm_map_fork will be allowed when creating the corpse.
- * The task being considered is suspended.
- *
- * By default, a vm_map_fork is allowed to proceed.
- *
- * A few simple policy assumptions:
- * Desktop platform is not considered in this path.
- * The vm_map_fork is always allowed.
- *
- * If the device has a zero system-wide task limit,
- * then the vm_map_fork is allowed.
- *
- * And if a process's memory footprint calculates less
- * than or equal to half of the system-wide task limit,
- * then the vm_map_fork is allowed. This calculation
- * is based on the assumption that a process can
- * munch memory up to the system-wide task limit.
- */
-boolean_t
-memorystatus_allowed_vm_map_fork(task_t task)
-{
- boolean_t is_allowed = TRUE; /* default */
-
-#if CONFIG_EMBEDDED
-
- uint64_t footprint_in_bytes;
- uint64_t max_allowed_bytes;
-
- if (max_task_footprint_mb == 0) {
- set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_ALLOWED);
- return (is_allowed);
- }
-
- footprint_in_bytes = get_task_phys_footprint(task);
-
- /*
- * Maximum is 1/4 of the system-wide task limit.
- */
- max_allowed_bytes = ((uint64_t)max_task_footprint_mb * 1024 * 1024) >> 2;
-
- if (footprint_in_bytes > max_allowed_bytes) {
- printf("memorystatus disallowed vm_map_fork %lld %lld\n", footprint_in_bytes, max_allowed_bytes);
- set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED);
- return (!is_allowed);
- }
-#endif /* CONFIG_EMBEDDED */
-
- set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_ALLOWED);
- return (is_allowed);
-
-}
-
-static void
-memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages)
-{
- assert(task);
- assert(footprint);
-
- uint64_t pages;
-
- pages = (get_task_phys_footprint(task) / PAGE_SIZE_64);
- assert(((uint32_t)pages) == pages);
- *footprint = (uint32_t)pages;
-
- if (max_footprint_lifetime) {
- pages = (get_task_resident_max(task) / PAGE_SIZE_64);
- assert(((uint32_t)pages) == pages);
- *max_footprint_lifetime = (uint32_t)pages;
- }
- if (purgeable_pages) {
- pages = (get_task_purgeable_size(task) / PAGE_SIZE_64);
- assert(((uint32_t)pages) == pages);
- *purgeable_pages = (uint32_t)pages;
- }
-}
-
-static void
-memorystatus_get_task_phys_footprint_page_counts(task_t task,
- uint64_t *internal_pages, uint64_t *internal_compressed_pages,
- uint64_t *purgeable_nonvolatile_pages, uint64_t *purgeable_nonvolatile_compressed_pages,
- uint64_t *alternate_accounting_pages, uint64_t *alternate_accounting_compressed_pages,
- uint64_t *iokit_mapped_pages, uint64_t *page_table_pages)
-{
- assert(task);
-
- if (internal_pages) {
- *internal_pages = (get_task_internal(task) / PAGE_SIZE_64);
- }
-
- if (internal_compressed_pages) {
- *internal_compressed_pages = (get_task_internal_compressed(task) / PAGE_SIZE_64);
- }
-
- if (purgeable_nonvolatile_pages) {
- *purgeable_nonvolatile_pages = (get_task_purgeable_nonvolatile(task) / PAGE_SIZE_64);
- }
-
- if (purgeable_nonvolatile_compressed_pages) {
- *purgeable_nonvolatile_compressed_pages = (get_task_purgeable_nonvolatile_compressed(task) / PAGE_SIZE_64);
- }
-
- if (alternate_accounting_pages) {
- *alternate_accounting_pages = (get_task_alternate_accounting(task) / PAGE_SIZE_64);
- }
-
- if (alternate_accounting_compressed_pages) {
- *alternate_accounting_compressed_pages = (get_task_alternate_accounting_compressed(task) / PAGE_SIZE_64);
- }