]>
Commit | Line | Data |
---|---|---|
2d21ac55 | 1 | /* |
cb323159 | 2 | * Copyright (c) 2006-2019 Apple Inc. All rights reserved. |
2d21ac55 A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
0a7de745 | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
2d21ac55 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
2d21ac55 A |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | * | |
28 | */ | |
2d21ac55 | 29 | |
2d21ac55 | 30 | #include <kern/sched_prim.h> |
6d2010ae | 31 | #include <kern/kalloc.h> |
316670eb | 32 | #include <kern/assert.h> |
6d2010ae | 33 | #include <kern/debug.h> |
fe8ab488 | 34 | #include <kern/locks.h> |
2d21ac55 A |
35 | #include <kern/task.h> |
36 | #include <kern/thread.h> | |
316670eb | 37 | #include <kern/host.h> |
39037602 | 38 | #include <kern/policy_internal.h> |
5ba3f43e | 39 | #include <kern/thread_group.h> |
39037602 | 40 | |
cb323159 | 41 | #include <corpses/task_corpse.h> |
2d21ac55 | 42 | #include <libkern/libkern.h> |
3e170ce0 | 43 | #include <mach/coalition.h> |
316670eb | 44 | #include <mach/mach_time.h> |
b0d623f7 | 45 | #include <mach/task.h> |
316670eb | 46 | #include <mach/host_priv.h> |
39236c6e | 47 | #include <mach/mach_host.h> |
5ba3f43e | 48 | #include <os/log.h> |
39236c6e | 49 | #include <pexpert/pexpert.h> |
3e170ce0 | 50 | #include <sys/coalition.h> |
316670eb | 51 | #include <sys/kern_event.h> |
b0d623f7 | 52 | #include <sys/proc.h> |
39236c6e | 53 | #include <sys/proc_info.h> |
39037602 | 54 | #include <sys/reason.h> |
b0d623f7 A |
55 | #include <sys/signal.h> |
56 | #include <sys/signalvar.h> | |
2d21ac55 | 57 | #include <sys/sysctl.h> |
316670eb | 58 | #include <sys/sysproto.h> |
b0d623f7 | 59 | #include <sys/wait.h> |
6d2010ae | 60 | #include <sys/tree.h> |
316670eb | 61 | #include <sys/priv.h> |
f427ee49 | 62 | #include <vm/pmap.h> |
39236c6e A |
63 | #include <vm/vm_pageout.h> |
64 | #include <vm/vm_protos.h> | |
cb323159 A |
65 | #include <mach/machine/sdt.h> |
66 | #include <libkern/section_keywords.h> | |
67 | #include <stdatomic.h> | |
6d2010ae | 68 | |
f427ee49 A |
69 | #include <IOKit/IOBSD.h> |
70 | ||
6d2010ae | 71 | #if CONFIG_FREEZE |
6d2010ae | 72 | #include <vm/vm_map.h> |
39236c6e | 73 | #endif /* CONFIG_FREEZE */ |
6d2010ae | 74 | |
0a7de745 | 75 | #include <sys/kern_memorystatus.h> |
cb323159 A |
76 | #include <sys/kern_memorystatus_freeze.h> |
77 | #include <sys/kern_memorystatus_notify.h> | |
39037602 | 78 | |
fe8ab488 | 79 | /* For logging clarity */ |
5ba3f43e | 80 | static const char *memorystatus_kill_cause_name[] = { |
0a7de745 A |
81 | "", /* kMemorystatusInvalid */ |
82 | "jettisoned", /* kMemorystatusKilled */ | |
83 | "highwater", /* kMemorystatusKilledHiwat */ | |
84 | "vnode-limit", /* kMemorystatusKilledVnodes */ | |
85 | "vm-pageshortage", /* kMemorystatusKilledVMPageShortage */ | |
86 | "proc-thrashing", /* kMemorystatusKilledProcThrashing */ | |
87 | "fc-thrashing", /* kMemorystatusKilledFCThrashing */ | |
88 | "per-process-limit", /* kMemorystatusKilledPerProcessLimit */ | |
89 | "disk-space-shortage", /* kMemorystatusKilledDiskSpaceShortage */ | |
90 | "idle-exit", /* kMemorystatusKilledIdleExit */ | |
91 | "zone-map-exhaustion", /* kMemorystatusKilledZoneMapExhaustion */ | |
92 | "vm-compressor-thrashing", /* kMemorystatusKilledVMCompressorThrashing */ | |
93 | "vm-compressor-space-shortage", /* kMemorystatusKilledVMCompressorSpaceShortage */ | |
fe8ab488 A |
94 | }; |
95 | ||
5ba3f43e A |
96 | static const char * |
97 | memorystatus_priority_band_name(int32_t priority) | |
98 | { | |
99 | switch (priority) { | |
100 | case JETSAM_PRIORITY_FOREGROUND: | |
101 | return "FOREGROUND"; | |
102 | case JETSAM_PRIORITY_AUDIO_AND_ACCESSORY: | |
103 | return "AUDIO_AND_ACCESSORY"; | |
104 | case JETSAM_PRIORITY_CONDUCTOR: | |
105 | return "CONDUCTOR"; | |
cb323159 A |
106 | case JETSAM_PRIORITY_DRIVER_APPLE: |
107 | return "DRIVER_APPLE"; | |
5ba3f43e A |
108 | case JETSAM_PRIORITY_HOME: |
109 | return "HOME"; | |
110 | case JETSAM_PRIORITY_EXECUTIVE: | |
111 | return "EXECUTIVE"; | |
112 | case JETSAM_PRIORITY_IMPORTANT: | |
113 | return "IMPORTANT"; | |
114 | case JETSAM_PRIORITY_CRITICAL: | |
115 | return "CRITICAL"; | |
116 | } | |
117 | ||
0a7de745 | 118 | return "?"; |
5ba3f43e A |
119 | } |
120 | ||
fe8ab488 A |
121 | /* Does cause indicate vm or fc thrashing? */ |
122 | static boolean_t | |
5ba3f43e | 123 | is_reason_thrashing(unsigned cause) |
fe8ab488 A |
124 | { |
125 | switch (cause) { | |
fe8ab488 | 126 | case kMemorystatusKilledFCThrashing: |
d9a64523 A |
127 | case kMemorystatusKilledVMCompressorThrashing: |
128 | case kMemorystatusKilledVMCompressorSpaceShortage: | |
fe8ab488 A |
129 | return TRUE; |
130 | default: | |
131 | return FALSE; | |
132 | } | |
133 | } | |
134 | ||
5ba3f43e A |
135 | /* Is the zone map almost full? */ |
136 | static boolean_t | |
137 | is_reason_zone_map_exhaustion(unsigned cause) | |
138 | { | |
0a7de745 | 139 | if (cause == kMemorystatusKilledZoneMapExhaustion) { |
5ba3f43e | 140 | return TRUE; |
0a7de745 | 141 | } |
5ba3f43e A |
142 | return FALSE; |
143 | } | |
144 | ||
145 | /* | |
146 | * Returns the current zone map size and capacity to include in the jetsam snapshot. | |
147 | * Defined in zalloc.c | |
148 | */ | |
149 | extern void get_zone_map_size(uint64_t *current_size, uint64_t *capacity); | |
150 | ||
151 | /* | |
152 | * Returns the name of the largest zone and its size to include in the jetsam snapshot. | |
153 | * Defined in zalloc.c | |
154 | */ | |
155 | extern void get_largest_zone_info(char *zone_name, size_t zone_name_len, uint64_t *zone_size); | |
fe8ab488 | 156 | |
3e170ce0 A |
157 | /* |
158 | * Active / Inactive limit support | |
159 | * proc list must be locked | |
160 | * | |
161 | * The SET_*** macros are used to initialize a limit | |
162 | * for the first time. | |
163 | * | |
164 | * The CACHE_*** macros are use to cache the limit that will | |
165 | * soon be in effect down in the ledgers. | |
166 | */ | |
167 | ||
0a7de745 A |
168 | #define SET_ACTIVE_LIMITS_LOCKED(p, limit, is_fatal) \ |
169 | MACRO_BEGIN \ | |
170 | (p)->p_memstat_memlimit_active = (limit); \ | |
171 | if (is_fatal) { \ | |
172 | (p)->p_memstat_state |= P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL; \ | |
173 | } else { \ | |
174 | (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL; \ | |
175 | } \ | |
3e170ce0 A |
176 | MACRO_END |
177 | ||
0a7de745 A |
178 | #define SET_INACTIVE_LIMITS_LOCKED(p, limit, is_fatal) \ |
179 | MACRO_BEGIN \ | |
180 | (p)->p_memstat_memlimit_inactive = (limit); \ | |
181 | if (is_fatal) { \ | |
182 | (p)->p_memstat_state |= P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL; \ | |
183 | } else { \ | |
184 | (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL; \ | |
185 | } \ | |
3e170ce0 A |
186 | MACRO_END |
187 | ||
0a7de745 A |
188 | #define CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal) \ |
189 | MACRO_BEGIN \ | |
190 | (p)->p_memstat_memlimit = (p)->p_memstat_memlimit_active; \ | |
191 | if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL) { \ | |
192 | (p)->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; \ | |
193 | is_fatal = TRUE; \ | |
194 | } else { \ | |
195 | (p)->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; \ | |
196 | is_fatal = FALSE; \ | |
197 | } \ | |
3e170ce0 A |
198 | MACRO_END |
199 | ||
0a7de745 A |
200 | #define CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal) \ |
201 | MACRO_BEGIN \ | |
202 | (p)->p_memstat_memlimit = (p)->p_memstat_memlimit_inactive; \ | |
203 | if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) { \ | |
204 | (p)->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; \ | |
205 | is_fatal = TRUE; \ | |
206 | } else { \ | |
207 | (p)->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; \ | |
208 | is_fatal = FALSE; \ | |
209 | } \ | |
3e170ce0 A |
210 | MACRO_END |
211 | ||
212 | ||
39236c6e A |
213 | /* General tunables */ |
214 | ||
215 | unsigned long delta_percentage = 5; | |
216 | unsigned long critical_threshold_percentage = 5; | |
cb323159 A |
217 | // On embedded devices with more than 3GB of memory we lower the critical percentage. |
218 | uint64_t config_jetsam_large_memory_cutoff = 3UL * (1UL << 30); | |
219 | unsigned long critical_threshold_percentage_larger_devices = 4; | |
220 | unsigned long delta_percentage_larger_devices = 4; | |
39236c6e A |
221 | unsigned long idle_offset_percentage = 5; |
222 | unsigned long pressure_threshold_percentage = 15; | |
39037602 | 223 | unsigned long policy_more_free_offset_percentage = 5; |
cb323159 | 224 | unsigned long sysproc_aging_aggr_threshold_percentage = 7; |
39236c6e A |
225 | |
226 | /* | |
cb323159 | 227 | * default jetsam snapshot support |
39236c6e | 228 | */ |
cb323159 A |
229 | memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot; |
230 | memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot_copy; | |
f427ee49 A |
231 | |
232 | #if CONFIG_FREEZE | |
233 | memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot_freezer; | |
234 | /* | |
235 | * The size of the freezer snapshot is given by memorystatus_jetsam_snapshot_max / JETSAM_SNAPSHOT_FREEZER_MAX_FACTOR | |
236 | * The freezer snapshot can be much smaller than the default snapshot | |
237 | * because it only includes apps that have been killed and dasd consumes it every 30 minutes. | |
238 | * Since the snapshots are always wired we don't want to overallocate too much. | |
239 | */ | |
240 | #define JETSAM_SNAPSHOT_FREEZER_MAX_FACTOR 20 | |
241 | unsigned int memorystatus_jetsam_snapshot_freezer_max; | |
242 | unsigned int memorystatus_jetsam_snapshot_freezer_size; | |
243 | TUNABLE(bool, memorystatus_jetsam_use_freezer_snapshot, "kern.jetsam_user_freezer_snapshot", true); | |
244 | #endif /* CONFIG_FREEZE */ | |
245 | ||
cb323159 A |
246 | unsigned int memorystatus_jetsam_snapshot_count = 0; |
247 | unsigned int memorystatus_jetsam_snapshot_copy_count = 0; | |
248 | unsigned int memorystatus_jetsam_snapshot_max = 0; | |
249 | unsigned int memorystatus_jetsam_snapshot_size = 0; | |
250 | uint64_t memorystatus_jetsam_snapshot_last_timestamp = 0; | |
251 | uint64_t memorystatus_jetsam_snapshot_timeout = 0; | |
39236c6e | 252 | |
f427ee49 A |
253 | #if DEVELOPMENT || DEBUG |
254 | /* | |
255 | * On development and debug kernels, we allow one pid to take ownership | |
c3c9b80d A |
256 | * of some memorystatus data structures for testing purposes (via memorystatus_control). |
257 | * If there's an owner, then only they may consume the jetsam snapshot & set freezer probabilities. | |
258 | * This is used when testing these interface to avoid racing with other | |
259 | * processes on the system that typically use them (namely OSAnalytics & dasd). | |
f427ee49 | 260 | */ |
c3c9b80d A |
261 | static pid_t memorystatus_testing_pid = 0; |
262 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_testing_pid, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_testing_pid, 0, ""); | |
f427ee49 A |
263 | #endif /* DEVELOPMENT || DEBUG */ |
264 | static void memorystatus_init_jetsam_snapshot_header(memorystatus_jetsam_snapshot_t *snapshot); | |
265 | ||
cb323159 | 266 | /* General memorystatus stuff */ |
39236c6e | 267 | |
cb323159 A |
268 | uint64_t memorystatus_sysprocs_idle_delay_time = 0; |
269 | uint64_t memorystatus_apps_idle_delay_time = 0; | |
f427ee49 A |
270 | /* Some devices give entitled apps a higher memory limit */ |
271 | #if __arm64__ | |
272 | int32_t memorystatus_entitled_max_task_footprint_mb = 0; | |
273 | ||
274 | #if DEVELOPMENT || DEBUG | |
275 | SYSCTL_INT(_kern, OID_AUTO, entitled_max_task_pmem, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_entitled_max_task_footprint_mb, 0, ""); | |
276 | #endif /* DEVELOPMENT || DEBUG */ | |
277 | #endif /* __arm64__ */ | |
cb323159 | 278 | |
c3c9b80d A |
279 | static LCK_GRP_DECLARE(memorystatus_jetsam_fg_band_lock_grp, |
280 | "memorystatus_jetsam_fg_band"); | |
281 | LCK_MTX_DECLARE(memorystatus_jetsam_fg_band_lock, | |
282 | &memorystatus_jetsam_fg_band_lock_grp); | |
39236c6e A |
283 | |
284 | /* Idle guard handling */ | |
285 | ||
39037602 A |
286 | static int32_t memorystatus_scheduled_idle_demotions_sysprocs = 0; |
287 | static int32_t memorystatus_scheduled_idle_demotions_apps = 0; | |
39236c6e | 288 | |
39236c6e A |
289 | static void memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2); |
290 | static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state); | |
39236c6e | 291 | static void memorystatus_reschedule_idle_demotion_locked(void); |
5ba3f43e | 292 | int memorystatus_update_priority_for_appnap(proc_t p, boolean_t is_appnap); |
39037602 | 293 | vm_pressure_level_t convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); |
fe8ab488 | 294 | boolean_t is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t); |
39037602 | 295 | void memorystatus_klist_reset_all_for_level(vm_pressure_level_t pressure_level_to_clear); |
fe8ab488 | 296 | void memorystatus_send_low_swap_note(void); |
cb323159 A |
297 | int memorystatus_get_proccnt_upto_priority(int32_t max_bucket_index); |
298 | boolean_t memorystatus_kill_elevated_process(uint32_t cause, os_reason_t jetsam_reason, unsigned int band, int aggr_count, | |
299 | uint32_t *errors, uint64_t *memory_reclaimed); | |
300 | uint64_t memorystatus_available_memory_internal(proc_t p); | |
39236c6e | 301 | |
39236c6e | 302 | unsigned int memorystatus_level = 0; |
316670eb | 303 | static int memorystatus_list_count = 0; |
39236c6e | 304 | memstat_bucket_t memstat_bucket[MEMSTAT_BUCKET_COUNT]; |
cb323159 | 305 | static thread_call_t memorystatus_idle_demotion_call; |
39236c6e | 306 | uint64_t memstat_idle_demotion_deadline = 0; |
39037602 A |
307 | int system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND1; |
308 | int applications_aging_band = JETSAM_PRIORITY_IDLE; | |
309 | ||
0a7de745 | 310 | #define isProcessInAgingBands(p) ((isSysProc(p) && system_procs_aging_band && (p->p_memstat_effectivepriority == system_procs_aging_band)) || (isApp(p) && applications_aging_band && (p->p_memstat_effectivepriority == applications_aging_band))) |
d9a64523 | 311 | |
0a7de745 A |
312 | #define kJetsamAgingPolicyNone (0) |
313 | #define kJetsamAgingPolicyLegacy (1) | |
314 | #define kJetsamAgingPolicySysProcsReclaimedFirst (2) | |
315 | #define kJetsamAgingPolicyAppsReclaimedFirst (3) | |
316 | #define kJetsamAgingPolicyMax kJetsamAgingPolicyAppsReclaimedFirst | |
39037602 | 317 | |
cb323159 | 318 | unsigned int jetsam_aging_policy = kJetsamAgingPolicySysProcsReclaimedFirst; |
39037602 A |
319 | |
320 | extern int corpse_for_fatal_memkill; | |
a39ff7e2 A |
321 | extern uint64_t vm_purgeable_purge_task_owned(task_t task); |
322 | boolean_t memorystatus_allowed_vm_map_fork(task_t); | |
323 | #if DEVELOPMENT || DEBUG | |
324 | void memorystatus_abort_vm_map_fork(task_t); | |
325 | #endif | |
39037602 | 326 | |
cb323159 A |
327 | /* |
328 | * Idle delay timeout factors for daemons based on relaunch behavior. Only used in | |
329 | * kJetsamAgingPolicySysProcsReclaimedFirst aging policy. | |
330 | */ | |
331 | #define kJetsamSysProcsIdleDelayTimeLowRatio (5) | |
332 | #define kJetsamSysProcsIdleDelayTimeMedRatio (2) | |
333 | #define kJetsamSysProcsIdleDelayTimeHighRatio (1) | |
334 | static_assert(kJetsamSysProcsIdleDelayTimeLowRatio <= DEFERRED_IDLE_EXIT_TIME_SECS, "sysproc idle delay time for low relaunch daemons would be 0"); | |
335 | ||
336 | /* | |
337 | * For the kJetsamAgingPolicySysProcsReclaimedFirst aging policy, treat apps as well | |
338 | * behaved daemons for aging purposes. | |
339 | */ | |
340 | #define kJetsamAppsIdleDelayTimeRatio (kJetsamSysProcsIdleDelayTimeLowRatio) | |
341 | ||
342 | static uint64_t | |
343 | memorystatus_sysprocs_idle_time(proc_t p) | |
344 | { | |
345 | /* | |
346 | * The kJetsamAgingPolicySysProcsReclaimedFirst aging policy uses the relaunch behavior to | |
347 | * determine the exact idle deferred time provided to the daemons. For all other aging | |
348 | * policies, simply return the default aging idle time. | |
349 | */ | |
350 | if (jetsam_aging_policy != kJetsamAgingPolicySysProcsReclaimedFirst) { | |
351 | return memorystatus_sysprocs_idle_delay_time; | |
352 | } | |
353 | ||
354 | uint64_t idle_delay_time = 0; | |
355 | /* | |
356 | * For system processes, base the idle delay time on the | |
357 | * jetsam relaunch behavior specified by launchd. The idea | |
358 | * is to provide extra protection to the daemons which would | |
359 | * relaunch immediately after jetsam. | |
360 | */ | |
361 | switch (p->p_memstat_relaunch_flags) { | |
362 | case P_MEMSTAT_RELAUNCH_UNKNOWN: | |
363 | case P_MEMSTAT_RELAUNCH_LOW: | |
364 | idle_delay_time = memorystatus_sysprocs_idle_delay_time / kJetsamSysProcsIdleDelayTimeLowRatio; | |
365 | break; | |
366 | case P_MEMSTAT_RELAUNCH_MED: | |
367 | idle_delay_time = memorystatus_sysprocs_idle_delay_time / kJetsamSysProcsIdleDelayTimeMedRatio; | |
368 | break; | |
369 | case P_MEMSTAT_RELAUNCH_HIGH: | |
370 | idle_delay_time = memorystatus_sysprocs_idle_delay_time / kJetsamSysProcsIdleDelayTimeHighRatio; | |
371 | break; | |
372 | default: | |
373 | panic("Unknown relaunch flags on process!"); | |
374 | break; | |
375 | } | |
376 | return idle_delay_time; | |
377 | } | |
378 | ||
379 | static uint64_t | |
380 | memorystatus_apps_idle_time(__unused proc_t p) | |
381 | { | |
382 | /* | |
383 | * For kJetsamAgingPolicySysProcsReclaimedFirst, the Apps are considered as low | |
384 | * relaunch candidates. So only provide limited protection to them. In the other | |
385 | * aging policies, return the default aging idle time. | |
386 | */ | |
387 | if (jetsam_aging_policy != kJetsamAgingPolicySysProcsReclaimedFirst) { | |
388 | return memorystatus_apps_idle_delay_time; | |
389 | } | |
390 | ||
391 | return memorystatus_apps_idle_delay_time / kJetsamAppsIdleDelayTimeRatio; | |
392 | } | |
393 | ||
394 | ||
39037602 A |
395 | #if 0 |
396 | ||
397 | /* Keeping around for future use if we need a utility that can do this OR an app that needs a dynamic adjustment. */ | |
398 | ||
399 | static int | |
400 | sysctl_set_jetsam_aging_policy SYSCTL_HANDLER_ARGS | |
401 | { | |
402 | #pragma unused(oidp, arg1, arg2) | |
403 | ||
404 | int error = 0, val = 0; | |
405 | memstat_bucket_t *old_bucket = 0; | |
406 | int old_system_procs_aging_band = 0, new_system_procs_aging_band = 0; | |
407 | int old_applications_aging_band = 0, new_applications_aging_band = 0; | |
408 | proc_t p = NULL, next_proc = NULL; | |
409 | ||
410 | ||
411 | error = sysctl_io_number(req, jetsam_aging_policy, sizeof(int), &val, NULL); | |
412 | if (error || !req->newptr) { | |
0a7de745 | 413 | return error; |
39037602 A |
414 | } |
415 | ||
416 | if ((val < 0) || (val > kJetsamAgingPolicyMax)) { | |
417 | printf("jetsam: ordering policy sysctl has invalid value - %d\n", val); | |
418 | return EINVAL; | |
419 | } | |
420 | ||
421 | /* | |
422 | * We need to synchronize with any potential adding/removal from aging bands | |
423 | * that might be in progress currently. We use the proc_list_lock() just for | |
424 | * consistency with all the routines dealing with 'aging' processes. We need | |
425 | * a lighterweight lock. | |
0a7de745 | 426 | */ |
39037602 A |
427 | proc_list_lock(); |
428 | ||
429 | old_system_procs_aging_band = system_procs_aging_band; | |
430 | old_applications_aging_band = applications_aging_band; | |
39037602 | 431 | |
0a7de745 A |
432 | switch (val) { |
433 | case kJetsamAgingPolicyNone: | |
434 | new_system_procs_aging_band = JETSAM_PRIORITY_IDLE; | |
435 | new_applications_aging_band = JETSAM_PRIORITY_IDLE; | |
436 | break; | |
39037602 | 437 | |
0a7de745 A |
438 | case kJetsamAgingPolicyLegacy: |
439 | /* | |
440 | * Legacy behavior where some daemons get a 10s protection once and only before the first clean->dirty->clean transition before going into IDLE band. | |
441 | */ | |
442 | new_system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND1; | |
443 | new_applications_aging_band = JETSAM_PRIORITY_IDLE; | |
444 | break; | |
39037602 | 445 | |
0a7de745 A |
446 | case kJetsamAgingPolicySysProcsReclaimedFirst: |
447 | new_system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND1; | |
448 | new_applications_aging_band = JETSAM_PRIORITY_AGING_BAND2; | |
449 | break; | |
39037602 | 450 | |
0a7de745 A |
451 | case kJetsamAgingPolicyAppsReclaimedFirst: |
452 | new_system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND2; | |
453 | new_applications_aging_band = JETSAM_PRIORITY_AGING_BAND1; | |
454 | break; | |
39037602 | 455 | |
0a7de745 A |
456 | default: |
457 | break; | |
39037602 A |
458 | } |
459 | ||
460 | if (old_system_procs_aging_band && (old_system_procs_aging_band != new_system_procs_aging_band)) { | |
39037602 A |
461 | old_bucket = &memstat_bucket[old_system_procs_aging_band]; |
462 | p = TAILQ_FIRST(&old_bucket->list); | |
0a7de745 | 463 | |
39037602 | 464 | while (p) { |
39037602 A |
465 | next_proc = TAILQ_NEXT(p, p_memstat_list); |
466 | ||
467 | if (isSysProc(p)) { | |
468 | if (new_system_procs_aging_band == JETSAM_PRIORITY_IDLE) { | |
469 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
470 | } | |
471 | ||
472 | memorystatus_update_priority_locked(p, new_system_procs_aging_band, false, true); | |
473 | } | |
474 | ||
475 | p = next_proc; | |
476 | continue; | |
477 | } | |
478 | } | |
479 | ||
480 | if (old_applications_aging_band && (old_applications_aging_band != new_applications_aging_band)) { | |
39037602 A |
481 | old_bucket = &memstat_bucket[old_applications_aging_band]; |
482 | p = TAILQ_FIRST(&old_bucket->list); | |
39037602 | 483 | |
0a7de745 | 484 | while (p) { |
39037602 A |
485 | next_proc = TAILQ_NEXT(p, p_memstat_list); |
486 | ||
487 | if (isApp(p)) { | |
488 | if (new_applications_aging_band == JETSAM_PRIORITY_IDLE) { | |
489 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
490 | } | |
491 | ||
492 | memorystatus_update_priority_locked(p, new_applications_aging_band, false, true); | |
493 | } | |
494 | ||
495 | p = next_proc; | |
496 | continue; | |
497 | } | |
498 | } | |
499 | ||
500 | jetsam_aging_policy = val; | |
501 | system_procs_aging_band = new_system_procs_aging_band; | |
502 | applications_aging_band = new_applications_aging_band; | |
503 | ||
504 | proc_list_unlock(); | |
505 | ||
0a7de745 | 506 | return 0; |
39037602 A |
507 | } |
508 | ||
0a7de745 A |
509 | SYSCTL_PROC(_kern, OID_AUTO, set_jetsam_aging_policy, CTLTYPE_INT | CTLFLAG_RW, |
510 | 0, 0, sysctl_set_jetsam_aging_policy, "I", "Jetsam Aging Policy"); | |
39037602 A |
511 | #endif /*0*/ |
512 | ||
513 | static int | |
514 | sysctl_jetsam_set_sysprocs_idle_delay_time SYSCTL_HANDLER_ARGS | |
515 | { | |
516 | #pragma unused(oidp, arg1, arg2) | |
517 | ||
518 | int error = 0, val = 0, old_time_in_secs = 0; | |
519 | uint64_t old_time_in_ns = 0; | |
520 | ||
521 | absolutetime_to_nanoseconds(memorystatus_sysprocs_idle_delay_time, &old_time_in_ns); | |
f427ee49 | 522 | old_time_in_secs = (int) (old_time_in_ns / NSEC_PER_SEC); |
39037602 A |
523 | |
524 | error = sysctl_io_number(req, old_time_in_secs, sizeof(int), &val, NULL); | |
525 | if (error || !req->newptr) { | |
0a7de745 | 526 | return error; |
39037602 A |
527 | } |
528 | ||
529 | if ((val < 0) || (val > INT32_MAX)) { | |
530 | printf("jetsam: new idle delay interval has invalid value.\n"); | |
531 | return EINVAL; | |
532 | } | |
533 | ||
534 | nanoseconds_to_absolutetime((uint64_t)val * NSEC_PER_SEC, &memorystatus_sysprocs_idle_delay_time); | |
0a7de745 A |
535 | |
536 | return 0; | |
39037602 A |
537 | } |
538 | ||
0a7de745 A |
539 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_sysprocs_idle_delay_time, CTLTYPE_INT | CTLFLAG_RW, |
540 | 0, 0, sysctl_jetsam_set_sysprocs_idle_delay_time, "I", "Aging window for system processes"); | |
39037602 A |
541 | |
542 | ||
543 | static int | |
544 | sysctl_jetsam_set_apps_idle_delay_time SYSCTL_HANDLER_ARGS | |
545 | { | |
546 | #pragma unused(oidp, arg1, arg2) | |
547 | ||
548 | int error = 0, val = 0, old_time_in_secs = 0; | |
549 | uint64_t old_time_in_ns = 0; | |
550 | ||
551 | absolutetime_to_nanoseconds(memorystatus_apps_idle_delay_time, &old_time_in_ns); | |
f427ee49 | 552 | old_time_in_secs = (int) (old_time_in_ns / NSEC_PER_SEC); |
39037602 A |
553 | |
554 | error = sysctl_io_number(req, old_time_in_secs, sizeof(int), &val, NULL); | |
555 | if (error || !req->newptr) { | |
0a7de745 | 556 | return error; |
39037602 A |
557 | } |
558 | ||
559 | if ((val < 0) || (val > INT32_MAX)) { | |
560 | printf("jetsam: new idle delay interval has invalid value.\n"); | |
561 | return EINVAL; | |
562 | } | |
563 | ||
564 | nanoseconds_to_absolutetime((uint64_t)val * NSEC_PER_SEC, &memorystatus_apps_idle_delay_time); | |
0a7de745 A |
565 | |
566 | return 0; | |
39037602 A |
567 | } |
568 | ||
0a7de745 A |
569 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_apps_idle_delay_time, CTLTYPE_INT | CTLFLAG_RW, |
570 | 0, 0, sysctl_jetsam_set_apps_idle_delay_time, "I", "Aging window for applications"); | |
39037602 | 571 | |
0a7de745 | 572 | SYSCTL_INT(_kern, OID_AUTO, jetsam_aging_policy, CTLTYPE_INT | CTLFLAG_RD, &jetsam_aging_policy, 0, ""); |
39037602 | 573 | |
316670eb | 574 | static unsigned int memorystatus_dirty_count = 0; |
6d2010ae | 575 | |
0a7de745 | 576 | SYSCTL_INT(_kern, OID_AUTO, max_task_pmem, CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED, &max_task_footprint_mb, 0, ""); |
3e170ce0 | 577 | |
cb323159 A |
578 | static int memorystatus_highwater_enabled = 1; /* Update the cached memlimit data. */ |
579 | static boolean_t proc_jetsam_state_is_active_locked(proc_t); | |
580 | ||
581 | #if __arm64__ | |
cb323159 A |
582 | int legacy_footprint_bonus_mb = 50; /* This value was chosen after looking at the top 30 apps |
583 | * that needed the additional room in their footprint when | |
584 | * the 'correct' accounting methods were applied to them. | |
585 | */ | |
586 | ||
587 | #if DEVELOPMENT || DEBUG | |
588 | SYSCTL_INT(_kern, OID_AUTO, legacy_footprint_bonus_mb, CTLFLAG_RW | CTLFLAG_LOCKED, &legacy_footprint_bonus_mb, 0, ""); | |
589 | #endif /* DEVELOPMENT || DEBUG */ | |
f427ee49 A |
590 | /* |
591 | * Raise the inactive and active memory limits to new values. | |
592 | * Will only raise the limits and will do nothing if either of the current | |
593 | * limits are 0. | |
594 | * Caller must hold the proc_list_lock | |
595 | */ | |
596 | static void | |
597 | memorystatus_raise_memlimit(proc_t p, int new_memlimit_active, int new_memlimit_inactive) | |
cb323159 A |
598 | { |
599 | int memlimit_mb_active = 0, memlimit_mb_inactive = 0; | |
f427ee49 | 600 | boolean_t memlimit_active_is_fatal = FALSE, memlimit_inactive_is_fatal = FALSE, use_active_limit = FALSE; |
cb323159 | 601 | |
c3c9b80d | 602 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED); |
cb323159 A |
603 | |
604 | if (p->p_memstat_memlimit_active > 0) { | |
605 | memlimit_mb_active = p->p_memstat_memlimit_active; | |
606 | } else if (p->p_memstat_memlimit_active == -1) { | |
607 | memlimit_mb_active = max_task_footprint_mb; | |
608 | } else { | |
609 | /* | |
610 | * Nothing to do for '0' which is | |
611 | * a special value only used internally | |
612 | * to test 'no limits'. | |
613 | */ | |
cb323159 A |
614 | return; |
615 | } | |
616 | ||
617 | if (p->p_memstat_memlimit_inactive > 0) { | |
618 | memlimit_mb_inactive = p->p_memstat_memlimit_inactive; | |
619 | } else if (p->p_memstat_memlimit_inactive == -1) { | |
620 | memlimit_mb_inactive = max_task_footprint_mb; | |
621 | } else { | |
622 | /* | |
623 | * Nothing to do for '0' which is | |
624 | * a special value only used internally | |
625 | * to test 'no limits'. | |
626 | */ | |
cb323159 A |
627 | return; |
628 | } | |
629 | ||
f427ee49 A |
630 | memlimit_mb_active = MAX(new_memlimit_active, memlimit_mb_active); |
631 | memlimit_mb_inactive = MAX(new_memlimit_inactive, memlimit_mb_inactive); | |
cb323159 A |
632 | |
633 | memlimit_active_is_fatal = (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL); | |
634 | memlimit_inactive_is_fatal = (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL); | |
635 | ||
636 | SET_ACTIVE_LIMITS_LOCKED(p, memlimit_mb_active, memlimit_active_is_fatal); | |
637 | SET_INACTIVE_LIMITS_LOCKED(p, memlimit_mb_inactive, memlimit_inactive_is_fatal); | |
638 | ||
639 | if (proc_jetsam_state_is_active_locked(p) == TRUE) { | |
640 | use_active_limit = TRUE; | |
641 | CACHE_ACTIVE_LIMITS_LOCKED(p, memlimit_active_is_fatal); | |
642 | } else { | |
643 | CACHE_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive_is_fatal); | |
644 | } | |
645 | ||
cb323159 A |
646 | if (memorystatus_highwater_enabled) { |
647 | task_set_phys_footprint_limit_internal(p->task, | |
648 | (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1, | |
649 | NULL, /*return old value */ | |
650 | use_active_limit, /*active limit?*/ | |
651 | (use_active_limit ? memlimit_active_is_fatal : memlimit_inactive_is_fatal)); | |
652 | } | |
cb323159 A |
653 | } |
654 | ||
94ff46dc | 655 | void |
f427ee49 | 656 | memorystatus_act_on_legacy_footprint_entitlement(proc_t p, boolean_t footprint_increase) |
94ff46dc A |
657 | { |
658 | int memlimit_mb_active = 0, memlimit_mb_inactive = 0; | |
94ff46dc | 659 | |
f427ee49 | 660 | if (p == NULL) { |
94ff46dc A |
661 | return; |
662 | } | |
663 | ||
664 | proc_list_lock(); | |
665 | ||
666 | if (p->p_memstat_memlimit_active > 0) { | |
667 | memlimit_mb_active = p->p_memstat_memlimit_active; | |
668 | } else if (p->p_memstat_memlimit_active == -1) { | |
669 | memlimit_mb_active = max_task_footprint_mb; | |
670 | } else { | |
671 | /* | |
672 | * Nothing to do for '0' which is | |
673 | * a special value only used internally | |
674 | * to test 'no limits'. | |
675 | */ | |
676 | proc_list_unlock(); | |
677 | return; | |
678 | } | |
679 | ||
680 | if (p->p_memstat_memlimit_inactive > 0) { | |
681 | memlimit_mb_inactive = p->p_memstat_memlimit_inactive; | |
682 | } else if (p->p_memstat_memlimit_inactive == -1) { | |
683 | memlimit_mb_inactive = max_task_footprint_mb; | |
684 | } else { | |
685 | /* | |
686 | * Nothing to do for '0' which is | |
687 | * a special value only used internally | |
688 | * to test 'no limits'. | |
689 | */ | |
690 | proc_list_unlock(); | |
691 | return; | |
692 | } | |
693 | ||
f427ee49 A |
694 | if (footprint_increase) { |
695 | memlimit_mb_active += legacy_footprint_bonus_mb; | |
696 | memlimit_mb_inactive += legacy_footprint_bonus_mb; | |
94ff46dc | 697 | } else { |
f427ee49 A |
698 | memlimit_mb_active -= legacy_footprint_bonus_mb; |
699 | if (memlimit_mb_active == max_task_footprint_mb) { | |
700 | memlimit_mb_active = -1; /* reverting back to default system limit */ | |
701 | } | |
702 | ||
703 | memlimit_mb_inactive -= legacy_footprint_bonus_mb; | |
704 | if (memlimit_mb_inactive == max_task_footprint_mb) { | |
705 | memlimit_mb_inactive = -1; /* reverting back to default system limit */ | |
706 | } | |
94ff46dc | 707 | } |
f427ee49 | 708 | memorystatus_raise_memlimit(p, memlimit_mb_active, memlimit_mb_inactive); |
94ff46dc | 709 | |
f427ee49 A |
710 | proc_list_unlock(); |
711 | } | |
94ff46dc | 712 | |
f427ee49 A |
713 | void |
714 | memorystatus_act_on_ios13extended_footprint_entitlement(proc_t p) | |
715 | { | |
716 | if (max_mem < 1500ULL * 1024 * 1024 || | |
717 | max_mem > 2ULL * 1024 * 1024 * 1024) { | |
718 | /* ios13extended_footprint is only for 2GB devices */ | |
719 | return; | |
94ff46dc | 720 | } |
f427ee49 A |
721 | /* limit to "almost 2GB" */ |
722 | proc_list_lock(); | |
723 | memorystatus_raise_memlimit(p, 1800, 1800); | |
94ff46dc A |
724 | proc_list_unlock(); |
725 | } | |
726 | ||
f427ee49 A |
727 | void |
728 | memorystatus_act_on_entitled_task_limit(proc_t p) | |
729 | { | |
730 | if (memorystatus_entitled_max_task_footprint_mb == 0) { | |
731 | // Entitlement is not supported on this device. | |
732 | return; | |
733 | } | |
734 | proc_list_lock(); | |
735 | memorystatus_raise_memlimit(p, memorystatus_entitled_max_task_footprint_mb, memorystatus_entitled_max_task_footprint_mb); | |
736 | proc_list_unlock(); | |
737 | } | |
cb323159 A |
738 | #endif /* __arm64__ */ |
739 | ||
0a7de745 | 740 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_level, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_level, 0, ""); |
5ba3f43e | 741 | |
39236c6e A |
742 | int |
743 | memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret) | |
744 | { | |
0a7de745 A |
745 | user_addr_t level = 0; |
746 | ||
39236c6e | 747 | level = args->level; |
0a7de745 | 748 | |
39236c6e A |
749 | if (copyout(&memorystatus_level, level, sizeof(memorystatus_level)) != 0) { |
750 | return EFAULT; | |
751 | } | |
0a7de745 | 752 | |
39236c6e A |
753 | return 0; |
754 | } | |
755 | ||
39236c6e | 756 | static void memorystatus_thread(void *param __unused, wait_result_t wr __unused); |
6d2010ae | 757 | |
39037602 A |
758 | /* Memory Limits */ |
759 | ||
39037602 A |
760 | static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause, os_reason_t jetsam_reason); |
761 | static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause, os_reason_t jetsam_reason); | |
762 | ||
763 | ||
3e170ce0 A |
764 | static int memorystatus_cmd_set_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval); |
765 | ||
766 | static int memorystatus_set_memlimit_properties(pid_t pid, memorystatus_memlimit_properties_t *entry); | |
767 | ||
768 | static int memorystatus_cmd_get_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval); | |
769 | ||
39037602 | 770 | static int memorystatus_cmd_get_memlimit_excess_np(pid_t pid, uint32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval); |
3e170ce0 | 771 | |
cb323159 A |
772 | static void memorystatus_get_memlimit_properties_internal(proc_t p, memorystatus_memlimit_properties_t *p_entry); |
773 | static int memorystatus_set_memlimit_properties_internal(proc_t p, memorystatus_memlimit_properties_t *p_entry); | |
774 | ||
fe8ab488 A |
775 | int proc_get_memstat_priority(proc_t, boolean_t); |
776 | ||
fe8ab488 | 777 | static boolean_t memorystatus_idle_snapshot = 0; |
39236c6e | 778 | |
316670eb A |
779 | unsigned int memorystatus_delta = 0; |
780 | ||
3e170ce0 | 781 | /* Jetsam Loop Detection */ |
0a7de745 A |
782 | static boolean_t memorystatus_jld_enabled = FALSE; /* Enable jetsam loop detection */ |
783 | static uint32_t memorystatus_jld_eval_period_msecs = 0; /* Init pass sets this based on device memory size */ | |
784 | static int memorystatus_jld_eval_aggressive_count = 3; /* Raise the priority max after 'n' aggressive loops */ | |
3e170ce0 A |
785 | static int memorystatus_jld_eval_aggressive_priority_band_max = 15; /* Kill aggressively up through this band */ |
786 | ||
490019cf A |
787 | /* |
788 | * A FG app can request that the aggressive jetsam mechanism display some leniency in the FG band. This 'lenient' mode is described as: | |
789 | * --- if aggressive jetsam kills an app in the FG band and gets back >=AGGRESSIVE_JETSAM_LENIENT_MODE_THRESHOLD memory, it will stop the aggressive march further into and up the jetsam bands. | |
790 | * | |
791 | * RESTRICTIONS: | |
792 | * - Such a request is respected/acknowledged only once while that 'requesting' app is in the FG band i.e. if aggressive jetsam was | |
0a7de745 | 793 | * needed and the 'lenient' mode was deployed then that's it for this special mode while the app is in the FG band. |
490019cf A |
794 | * |
795 | * - If the app is still in the FG band and aggressive jetsam is needed again, there will be no stop-and-check the next time around. | |
796 | * | |
797 | * - Also, the transition of the 'requesting' app away from the FG band will void this special behavior. | |
798 | */ | |
799 | ||
0a7de745 A |
800 | #define AGGRESSIVE_JETSAM_LENIENT_MODE_THRESHOLD 25 |
801 | boolean_t memorystatus_aggressive_jetsam_lenient_allowed = FALSE; | |
802 | boolean_t memorystatus_aggressive_jetsam_lenient = FALSE; | |
490019cf | 803 | |
3e170ce0 | 804 | #if DEVELOPMENT || DEBUG |
0a7de745 | 805 | /* |
3e170ce0 A |
806 | * Jetsam Loop Detection tunables. |
807 | */ | |
808 | ||
0a7de745 A |
809 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_period_msecs, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_jld_eval_period_msecs, 0, ""); |
810 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_aggressive_count, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_jld_eval_aggressive_count, 0, ""); | |
811 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_aggressive_priority_band_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_jld_eval_aggressive_priority_band_max, 0, ""); | |
3e170ce0 A |
812 | #endif /* DEVELOPMENT || DEBUG */ |
813 | ||
fe8ab488 | 814 | static uint32_t kill_under_pressure_cause = 0; |
316670eb | 815 | |
3e170ce0 A |
816 | /* |
817 | * snapshot support for memstats collected at boot. | |
818 | */ | |
819 | static memorystatus_jetsam_snapshot_t memorystatus_at_boot_snapshot; | |
316670eb | 820 | |
39037602 A |
821 | static void memorystatus_init_jetsam_snapshot_locked(memorystatus_jetsam_snapshot_t *od_snapshot, uint32_t ods_list_count); |
822 | static boolean_t memorystatus_init_jetsam_snapshot_entry_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry, uint64_t gencount); | |
823 | static void memorystatus_update_jetsam_snapshot_entry_locked(proc_t p, uint32_t kill_cause, uint64_t killtime); | |
824 | ||
39236c6e | 825 | static void memorystatus_clear_errors(void); |
39037602 | 826 | static void memorystatus_get_task_phys_footprint_page_counts(task_t task, |
0a7de745 A |
827 | uint64_t *internal_pages, uint64_t *internal_compressed_pages, |
828 | uint64_t *purgeable_nonvolatile_pages, uint64_t *purgeable_nonvolatile_compressed_pages, | |
829 | uint64_t *alternate_accounting_pages, uint64_t *alternate_accounting_compressed_pages, | |
f427ee49 | 830 | uint64_t *iokit_mapped_pages, uint64_t *page_table_pages, uint64_t *frozen_to_swap_pages); |
39037602 A |
831 | |
832 | static void memorystatus_get_task_memory_region_count(task_t task, uint64_t *count); | |
833 | ||
39236c6e | 834 | static uint32_t memorystatus_build_state(proc_t p); |
fe8ab488 | 835 | //static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured); |
39236c6e | 836 | |
cb323159 A |
837 | static boolean_t memorystatus_kill_top_process(boolean_t any, boolean_t sort_flag, uint32_t cause, os_reason_t jetsam_reason, int32_t *priority, |
838 | uint32_t *errors, uint64_t *memory_reclaimed); | |
839 | static boolean_t memorystatus_kill_processes_aggressive(uint32_t cause, int aggr_count, int32_t priority_max, uint32_t *errors, uint64_t *memory_reclaimed); | |
840 | static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors, boolean_t *purged, uint64_t *memory_reclaimed); | |
39236c6e A |
841 | |
842 | static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause); | |
316670eb | 843 | |
3e170ce0 A |
844 | /* Priority Band Sorting Routines */ |
845 | static int memorystatus_sort_bucket(unsigned int bucket_index, int sort_order); | |
846 | static int memorystatus_sort_by_largest_coalition_locked(unsigned int bucket_index, int coal_sort_order); | |
847 | static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index); | |
848 | static int memorystatus_move_list_locked(unsigned int bucket_index, pid_t *pid_list, int list_sz); | |
849 | ||
850 | /* qsort routines */ | |
851 | typedef int (*cmpfunc_t)(const void *a, const void *b); | |
852 | extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp); | |
853 | static int memstat_asc_cmp(const void *a, const void *b); | |
854 | ||
316670eb | 855 | /* VM pressure */ |
6d2010ae | 856 | |
fe8ab488 A |
857 | extern unsigned int vm_page_free_count; |
858 | extern unsigned int vm_page_active_count; | |
859 | extern unsigned int vm_page_inactive_count; | |
860 | extern unsigned int vm_page_throttled_count; | |
861 | extern unsigned int vm_page_purgeable_count; | |
862 | extern unsigned int vm_page_wire_count; | |
f427ee49 A |
863 | extern unsigned int vm_page_speculative_count; |
864 | ||
865 | #if CONFIG_JETSAM | |
866 | #define MEMORYSTATUS_LOG_AVAILABLE_PAGES memorystatus_available_pages | |
867 | #else /* CONFIG_JETSAM */ | |
868 | #define MEMORYSTATUS_LOG_AVAILABLE_PAGES (vm_page_active_count + vm_page_inactive_count + vm_page_free_count + vm_page_speculative_count) | |
869 | #endif /* CONFIG_JETSAM */ | |
39037602 | 870 | #if CONFIG_SECLUDED_MEMORY |
0a7de745 | 871 | extern unsigned int vm_page_secluded_count; |
cb323159 | 872 | extern unsigned int vm_page_secluded_count_over_target; |
39037602 | 873 | #endif /* CONFIG_SECLUDED_MEMORY */ |
fe8ab488 | 874 | |
cb323159 A |
875 | /* Aggressive jetsam pages threshold for sysproc aging policy */ |
876 | unsigned int memorystatus_sysproc_aging_aggr_pages = 0; | |
877 | ||
5ba3f43e | 878 | #if CONFIG_JETSAM |
fe8ab488 A |
879 | unsigned int memorystatus_available_pages = (unsigned int)-1; |
880 | unsigned int memorystatus_available_pages_pressure = 0; | |
881 | unsigned int memorystatus_available_pages_critical = 0; | |
cb323159 A |
882 | unsigned int memorystatus_available_pages_critical_base = 0; |
883 | unsigned int memorystatus_available_pages_critical_idle_offset = 0; | |
fe8ab488 | 884 | |
00867663 A |
885 | #if DEVELOPMENT || DEBUG |
886 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_available_pages, 0, ""); | |
887 | #else | |
5ba3f43e | 888 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD | CTLFLAG_MASKED | CTLFLAG_LOCKED, &memorystatus_available_pages, 0, ""); |
00867663 | 889 | #endif /* DEVELOPMENT || DEBUG */ |
5ba3f43e A |
890 | |
891 | static unsigned int memorystatus_jetsam_policy = kPolicyDefault; | |
892 | unsigned int memorystatus_policy_more_free_offset_pages = 0; | |
893 | static void memorystatus_update_levels_locked(boolean_t critical_only); | |
894 | static unsigned int memorystatus_thread_wasted_wakeup = 0; | |
895 | ||
896 | /* Callback into vm_compressor.c to signal that thrashing has been mitigated. */ | |
897 | extern void vm_thrashing_jetsam_done(void); | |
898 | static int memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit); | |
cb323159 A |
899 | #if DEVELOPMENT || DEBUG |
900 | static inline uint32_t | |
901 | roundToNearestMB(uint32_t in) | |
902 | { | |
903 | return (in + ((1 << 20) - 1)) >> 20; | |
904 | } | |
905 | ||
906 | static int memorystatus_cmd_increase_jetsam_task_limit(pid_t pid, uint32_t byte_increase); | |
907 | #endif | |
5ba3f43e A |
908 | |
909 | int32_t max_kill_priority = JETSAM_PRIORITY_MAX; | |
910 | ||
911 | #else /* CONFIG_JETSAM */ | |
912 | ||
913 | uint64_t memorystatus_available_pages = (uint64_t)-1; | |
914 | uint64_t memorystatus_available_pages_pressure = (uint64_t)-1; | |
915 | uint64_t memorystatus_available_pages_critical = (uint64_t)-1; | |
916 | ||
917 | int32_t max_kill_priority = JETSAM_PRIORITY_IDLE; | |
00867663 A |
918 | #endif /* CONFIG_JETSAM */ |
919 | ||
39037602 A |
920 | #if DEVELOPMENT || DEBUG |
921 | ||
c3c9b80d A |
922 | static LCK_GRP_DECLARE(disconnect_page_mappings_lck_grp, "disconnect_page_mappings"); |
923 | static LCK_MTX_DECLARE(disconnect_page_mappings_mutex, &disconnect_page_mappings_lck_grp); | |
39037602 | 924 | |
f427ee49 | 925 | extern bool kill_on_no_paging_space; |
5ba3f43e | 926 | #endif /* DEVELOPMENT || DEBUG */ |
39037602 A |
927 | |
928 | ||
316670eb | 929 | /* Debug */ |
6d2010ae | 930 | |
fe8ab488 A |
931 | extern struct knote *vm_find_knote_from_pid(pid_t, struct klist *); |
932 | ||
6d2010ae | 933 | #if DEVELOPMENT || DEBUG |
6d2010ae | 934 | |
39037602 | 935 | static unsigned int memorystatus_debug_dump_this_bucket = 0; |
39236c6e | 936 | |
3e170ce0 | 937 | static void |
0a7de745 | 938 | memorystatus_debug_dump_bucket_locked(unsigned int bucket_index) |
3e170ce0 A |
939 | { |
940 | proc_t p = NULL; | |
39037602 A |
941 | uint64_t bytes = 0; |
942 | int ledger_limit = 0; | |
3e170ce0 A |
943 | unsigned int b = bucket_index; |
944 | boolean_t traverse_all_buckets = FALSE; | |
945 | ||
0a7de745 | 946 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { |
3e170ce0 A |
947 | traverse_all_buckets = TRUE; |
948 | b = 0; | |
0a7de745 | 949 | } else { |
3e170ce0 A |
950 | traverse_all_buckets = FALSE; |
951 | b = bucket_index; | |
952 | } | |
953 | ||
954 | /* | |
39037602 A |
955 | * footprint reported in [pages / MB ] |
956 | * limits reported as: | |
957 | * L-limit proc's Ledger limit | |
958 | * C-limit proc's Cached limit, should match Ledger | |
959 | * A-limit proc's Active limit | |
960 | * IA-limit proc's Inactive limit | |
961 | * F==Fatal, NF==NonFatal | |
3e170ce0 | 962 | */ |
39037602 | 963 | |
0a7de745 | 964 | printf("memorystatus_debug_dump ***START*(PAGE_SIZE_64=%llu)**\n", PAGE_SIZE_64); |
cb323159 | 965 | printf("bucket [pid] [pages / MB] [state] [EP / RP / AP] dirty deadline [L-limit / C-limit / A-limit / IA-limit] name\n"); |
3e170ce0 A |
966 | p = memorystatus_get_first_proc_locked(&b, traverse_all_buckets); |
967 | while (p) { | |
39037602 A |
968 | bytes = get_task_phys_footprint(p->task); |
969 | task_get_phys_footprint_limit(p->task, &ledger_limit); | |
cb323159 | 970 | printf("%2d [%5d] [%5lld /%3lldMB] 0x%-8x [%2d / %2d / %2d] 0x%-3x %10lld [%3d / %3d%s / %3d%s / %3d%s] %s\n", |
0a7de745 A |
971 | b, p->p_pid, |
972 | (bytes / PAGE_SIZE_64), /* task's footprint converted from bytes to pages */ | |
973 | (bytes / (1024ULL * 1024ULL)), /* task's footprint converted from bytes to MB */ | |
cb323159 A |
974 | p->p_memstat_state, p->p_memstat_effectivepriority, p->p_memstat_requestedpriority, p->p_memstat_assertionpriority, |
975 | p->p_memstat_dirty, p->p_memstat_idledeadline, | |
0a7de745 A |
976 | ledger_limit, |
977 | p->p_memstat_memlimit, | |
978 | (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), | |
979 | p->p_memstat_memlimit_active, | |
980 | (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL ? "F " : "NF"), | |
981 | p->p_memstat_memlimit_inactive, | |
982 | (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL ? "F " : "NF"), | |
983 | (*p->p_name ? p->p_name : "unknown")); | |
3e170ce0 | 984 | p = memorystatus_get_next_proc_locked(&b, p, traverse_all_buckets); |
0a7de745 A |
985 | } |
986 | printf("memorystatus_debug_dump ***END***\n"); | |
3e170ce0 A |
987 | } |
988 | ||
989 | static int | |
990 | sysctl_memorystatus_debug_dump_bucket SYSCTL_HANDLER_ARGS | |
991 | { | |
992 | #pragma unused(oidp, arg2) | |
0a7de745 A |
993 | int bucket_index = 0; |
994 | int error; | |
3e170ce0 A |
995 | error = SYSCTL_OUT(req, arg1, sizeof(int)); |
996 | if (error || !req->newptr) { | |
0a7de745 A |
997 | return error; |
998 | } | |
999 | error = SYSCTL_IN(req, &bucket_index, sizeof(int)); | |
1000 | if (error || !req->newptr) { | |
1001 | return error; | |
3e170ce0 | 1002 | } |
3e170ce0 A |
1003 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { |
1004 | /* | |
1005 | * All jetsam buckets will be dumped. | |
1006 | */ | |
0a7de745 | 1007 | } else { |
3e170ce0 A |
1008 | /* |
1009 | * Only a single bucket will be dumped. | |
1010 | */ | |
1011 | } | |
1012 | ||
1013 | proc_list_lock(); | |
1014 | memorystatus_debug_dump_bucket_locked(bucket_index); | |
1015 | proc_list_unlock(); | |
1016 | memorystatus_debug_dump_this_bucket = bucket_index; | |
0a7de745 | 1017 | return error; |
3e170ce0 A |
1018 | } |
1019 | ||
1020 | /* | |
1021 | * Debug aid to look at jetsam buckets and proc jetsam fields. | |
1022 | * Use this sysctl to act on a particular jetsam bucket. | |
1023 | * Writing the sysctl triggers the dump. | |
0a7de745 | 1024 | * Usage: sysctl kern.memorystatus_debug_dump_this_bucket=<bucket_index> |
3e170ce0 A |
1025 | */ |
1026 | ||
0a7de745 | 1027 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_debug_dump_this_bucket, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_debug_dump_this_bucket, 0, sysctl_memorystatus_debug_dump_bucket, "I", ""); |
3e170ce0 A |
1028 | |
1029 | ||
39236c6e A |
1030 | /* Debug aid to aid determination of limit */ |
1031 | ||
1032 | static int | |
1033 | sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS | |
1034 | { | |
1035 | #pragma unused(oidp, arg2) | |
1036 | proc_t p; | |
1037 | unsigned int b = 0; | |
1038 | int error, enable = 0; | |
0a7de745 | 1039 | boolean_t use_active; /* use the active limit and active limit attributes */ |
813fb2f6 | 1040 | boolean_t is_fatal; |
39236c6e A |
1041 | |
1042 | error = SYSCTL_OUT(req, arg1, sizeof(int)); | |
1043 | if (error || !req->newptr) { | |
0a7de745 | 1044 | return error; |
39236c6e A |
1045 | } |
1046 | ||
1047 | error = SYSCTL_IN(req, &enable, sizeof(int)); | |
1048 | if (error || !req->newptr) { | |
0a7de745 | 1049 | return error; |
39236c6e A |
1050 | } |
1051 | ||
1052 | if (!(enable == 0 || enable == 1)) { | |
1053 | return EINVAL; | |
1054 | } | |
1055 | ||
1056 | proc_list_lock(); | |
1057 | ||
1058 | p = memorystatus_get_first_proc_locked(&b, TRUE); | |
1059 | while (p) { | |
813fb2f6 | 1060 | use_active = proc_jetsam_state_is_active_locked(p); |
3e170ce0 | 1061 | |
39236c6e | 1062 | if (enable) { |
813fb2f6 A |
1063 | if (use_active == TRUE) { |
1064 | CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal); | |
39236c6e | 1065 | } else { |
813fb2f6 | 1066 | CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal); |
39236c6e A |
1067 | } |
1068 | } else { | |
3e170ce0 A |
1069 | /* |
1070 | * Disabling limits does not touch the stored variants. | |
1071 | * Set the cached limit fields to system_wide defaults. | |
1072 | */ | |
1073 | p->p_memstat_memlimit = -1; | |
1074 | p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; | |
813fb2f6 | 1075 | is_fatal = TRUE; |
fe8ab488 | 1076 | } |
3e170ce0 A |
1077 | |
1078 | /* | |
1079 | * Enforce the cached limit by writing to the ledger. | |
1080 | */ | |
813fb2f6 | 1081 | task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit: -1, NULL, use_active, is_fatal); |
3e170ce0 | 1082 | |
39236c6e A |
1083 | p = memorystatus_get_next_proc_locked(&b, p, TRUE); |
1084 | } | |
0a7de745 | 1085 | |
39236c6e A |
1086 | memorystatus_highwater_enabled = enable; |
1087 | ||
1088 | proc_list_unlock(); | |
1089 | ||
1090 | return 0; | |
1091 | } | |
1092 | ||
0a7de745 | 1093 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_highwater_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_highwater_enabled, 0, sysctl_memorystatus_highwater_enable, "I", ""); |
39236c6e | 1094 | |
cb323159 A |
1095 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_idle_snapshot, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_idle_snapshot, 0, ""); |
1096 | ||
1097 | #if CONFIG_JETSAM | |
1098 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_available_pages_critical, 0, ""); | |
1099 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_base, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_available_pages_critical_base, 0, ""); | |
1100 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_idle_offset, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_available_pages_critical_idle_offset, 0, ""); | |
1101 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_policy_more_free_offset_pages, CTLFLAG_RW, &memorystatus_policy_more_free_offset_pages, 0, ""); | |
1102 | ||
1103 | static unsigned int memorystatus_jetsam_panic_debug = 0; | |
1104 | ||
39037602 A |
1105 | #if VM_PRESSURE_EVENTS |
1106 | ||
cb323159 | 1107 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_available_pages_pressure, 0, ""); |
39037602 | 1108 | |
cb323159 | 1109 | #endif /* VM_PRESSURE_EVENTS */ |
39037602 | 1110 | |
cb323159 | 1111 | #endif /* CONFIG_JETSAM */ |
39037602 | 1112 | |
cb323159 | 1113 | #endif /* DEVELOPMENT || DEBUG */ |
39037602 | 1114 | |
cb323159 A |
1115 | extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation, |
1116 | void *parameter, | |
1117 | integer_t priority, | |
1118 | thread_t *new_thread); | |
39037602 | 1119 | |
cb323159 | 1120 | #if DEVELOPMENT || DEBUG |
39037602 | 1121 | |
cb323159 A |
1122 | static int |
1123 | sysctl_memorystatus_disconnect_page_mappings SYSCTL_HANDLER_ARGS | |
1124 | { | |
1125 | #pragma unused(arg1, arg2) | |
1126 | int error = 0, pid = 0; | |
1127 | proc_t p; | |
39037602 | 1128 | |
cb323159 A |
1129 | error = sysctl_handle_int(oidp, &pid, 0, req); |
1130 | if (error || !req->newptr) { | |
0a7de745 | 1131 | return error; |
39037602 A |
1132 | } |
1133 | ||
cb323159 A |
1134 | lck_mtx_lock(&disconnect_page_mappings_mutex); |
1135 | ||
1136 | if (pid == -1) { | |
1137 | vm_pageout_disconnect_all_pages(); | |
1138 | } else { | |
1139 | p = proc_find(pid); | |
39037602 | 1140 | |
cb323159 A |
1141 | if (p != NULL) { |
1142 | error = task_disconnect_page_mappings(p->task); | |
39037602 | 1143 | |
cb323159 | 1144 | proc_rele(p); |
39037602 | 1145 | |
cb323159 A |
1146 | if (error) { |
1147 | error = EIO; | |
1148 | } | |
1149 | } else { | |
1150 | error = EINVAL; | |
39037602 A |
1151 | } |
1152 | } | |
cb323159 | 1153 | lck_mtx_unlock(&disconnect_page_mappings_mutex); |
39037602 | 1154 | |
0a7de745 | 1155 | return error; |
39037602 A |
1156 | } |
1157 | ||
cb323159 A |
1158 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_disconnect_page_mappings, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, |
1159 | 0, 0, &sysctl_memorystatus_disconnect_page_mappings, "I", ""); | |
39236c6e | 1160 | |
cb323159 | 1161 | #endif /* DEVELOPMENT || DEBUG */ |
316670eb | 1162 | |
f427ee49 A |
1163 | /* |
1164 | * Sorts the given bucket. | |
1165 | * | |
1166 | * Input: | |
1167 | * bucket_index - jetsam priority band to be sorted. | |
1168 | * sort_order - JETSAM_SORT_xxx from kern_memorystatus.h | |
1169 | * Currently sort_order is only meaningful when handling | |
1170 | * coalitions. | |
1171 | * | |
1172 | * proc_list_lock must be held by the caller. | |
1173 | */ | |
1174 | static void | |
1175 | memorystatus_sort_bucket_locked(unsigned int bucket_index, int sort_order) | |
1176 | { | |
c3c9b80d | 1177 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED); |
f427ee49 A |
1178 | if (memstat_bucket[bucket_index].count == 0) { |
1179 | return; | |
1180 | } | |
1181 | ||
1182 | switch (bucket_index) { | |
1183 | case JETSAM_PRIORITY_FOREGROUND: | |
1184 | if (memorystatus_sort_by_largest_coalition_locked(bucket_index, sort_order) == 0) { | |
1185 | /* | |
1186 | * Fall back to per process sorting when zero coalitions are found. | |
1187 | */ | |
1188 | memorystatus_sort_by_largest_process_locked(bucket_index); | |
1189 | } | |
1190 | break; | |
1191 | default: | |
1192 | memorystatus_sort_by_largest_process_locked(bucket_index); | |
1193 | break; | |
1194 | } | |
1195 | } | |
316670eb | 1196 | |
cb323159 A |
1197 | /* |
1198 | * Picks the sorting routine for a given jetsam priority band. | |
1199 | * | |
1200 | * Input: | |
1201 | * bucket_index - jetsam priority band to be sorted. | |
1202 | * sort_order - JETSAM_SORT_xxx from kern_memorystatus.h | |
1203 | * Currently sort_order is only meaningful when handling | |
1204 | * coalitions. | |
1205 | * | |
1206 | * Return: | |
1207 | * 0 on success | |
1208 | * non-0 on failure | |
1209 | */ | |
316670eb | 1210 | static int |
cb323159 | 1211 | memorystatus_sort_bucket(unsigned int bucket_index, int sort_order) |
316670eb | 1212 | { |
cb323159 | 1213 | int coal_sort_order; |
0a7de745 | 1214 | |
cb323159 A |
1215 | /* |
1216 | * Verify the jetsam priority | |
1217 | */ | |
1218 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
1219 | return EINVAL; | |
1220 | } | |
316670eb | 1221 | |
cb323159 A |
1222 | #if DEVELOPMENT || DEBUG |
1223 | if (sort_order == JETSAM_SORT_DEFAULT) { | |
1224 | coal_sort_order = COALITION_SORT_DEFAULT; | |
1225 | } else { | |
1226 | coal_sort_order = sort_order; /* only used for testing scenarios */ | |
0a7de745 | 1227 | } |
cb323159 A |
1228 | #else |
1229 | /* Verify default */ | |
1230 | if (sort_order == JETSAM_SORT_DEFAULT) { | |
1231 | coal_sort_order = COALITION_SORT_DEFAULT; | |
1232 | } else { | |
316670eb A |
1233 | return EINVAL; |
1234 | } | |
cb323159 | 1235 | #endif |
0a7de745 | 1236 | |
39236c6e | 1237 | proc_list_lock(); |
f427ee49 | 1238 | memorystatus_sort_bucket_locked(bucket_index, coal_sort_order); |
39236c6e | 1239 | proc_list_unlock(); |
0a7de745 | 1240 | |
0a7de745 | 1241 | return 0; |
316670eb A |
1242 | } |
1243 | ||
0a7de745 | 1244 | /* |
cb323159 | 1245 | * Sort processes by size for a single jetsam bucket. |
0a7de745 | 1246 | */ |
0a7de745 | 1247 | |
cb323159 A |
1248 | static void |
1249 | memorystatus_sort_by_largest_process_locked(unsigned int bucket_index) | |
316670eb | 1250 | { |
cb323159 A |
1251 | proc_t p = NULL, insert_after_proc = NULL, max_proc = NULL; |
1252 | proc_t next_p = NULL, prev_max_proc = NULL; | |
1253 | uint32_t pages = 0, max_pages = 0; | |
1254 | memstat_bucket_t *current_bucket; | |
3e170ce0 | 1255 | |
cb323159 A |
1256 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { |
1257 | return; | |
3e170ce0 A |
1258 | } |
1259 | ||
cb323159 | 1260 | current_bucket = &memstat_bucket[bucket_index]; |
d9a64523 | 1261 | |
cb323159 | 1262 | p = TAILQ_FIRST(¤t_bucket->list); |
316670eb | 1263 | |
cb323159 A |
1264 | while (p) { |
1265 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL); | |
1266 | max_pages = pages; | |
1267 | max_proc = p; | |
1268 | prev_max_proc = p; | |
d9a64523 | 1269 | |
cb323159 A |
1270 | while ((next_p = TAILQ_NEXT(p, p_memstat_list)) != NULL) { |
1271 | /* traversing list until we find next largest process */ | |
1272 | p = next_p; | |
1273 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL); | |
1274 | if (pages > max_pages) { | |
1275 | max_pages = pages; | |
1276 | max_proc = p; | |
d9a64523 | 1277 | } |
cb323159 | 1278 | } |
0a7de745 | 1279 | |
cb323159 A |
1280 | if (prev_max_proc != max_proc) { |
1281 | /* found a larger process, place it in the list */ | |
1282 | TAILQ_REMOVE(¤t_bucket->list, max_proc, p_memstat_list); | |
1283 | if (insert_after_proc == NULL) { | |
1284 | TAILQ_INSERT_HEAD(¤t_bucket->list, max_proc, p_memstat_list); | |
d9a64523 | 1285 | } else { |
cb323159 | 1286 | TAILQ_INSERT_AFTER(¤t_bucket->list, insert_after_proc, max_proc, p_memstat_list); |
d9a64523 | 1287 | } |
cb323159 | 1288 | prev_max_proc = max_proc; |
d9a64523 A |
1289 | } |
1290 | ||
cb323159 | 1291 | insert_after_proc = max_proc; |
3e170ce0 | 1292 | |
cb323159 | 1293 | p = TAILQ_NEXT(max_proc, p_memstat_list); |
39236c6e | 1294 | } |
316670eb A |
1295 | } |
1296 | ||
cb323159 A |
1297 | proc_t |
1298 | memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search) | |
316670eb | 1299 | { |
cb323159 A |
1300 | memstat_bucket_t *current_bucket; |
1301 | proc_t next_p; | |
fe8ab488 | 1302 | |
cb323159 A |
1303 | if ((*bucket_index) >= MEMSTAT_BUCKET_COUNT) { |
1304 | return NULL; | |
0a7de745 | 1305 | } |
316670eb | 1306 | |
cb323159 A |
1307 | current_bucket = &memstat_bucket[*bucket_index]; |
1308 | next_p = TAILQ_FIRST(¤t_bucket->list); | |
1309 | if (!next_p && search) { | |
1310 | while (!next_p && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { | |
1311 | current_bucket = &memstat_bucket[*bucket_index]; | |
1312 | next_p = TAILQ_FIRST(¤t_bucket->list); | |
d190cdc3 | 1313 | } |
316670eb A |
1314 | } |
1315 | ||
cb323159 | 1316 | return next_p; |
316670eb A |
1317 | } |
1318 | ||
cb323159 A |
1319 | proc_t |
1320 | memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search) | |
d9a64523 | 1321 | { |
cb323159 A |
1322 | memstat_bucket_t *current_bucket; |
1323 | proc_t next_p; | |
d9a64523 | 1324 | |
cb323159 A |
1325 | if (!p || ((*bucket_index) >= MEMSTAT_BUCKET_COUNT)) { |
1326 | return NULL; | |
d9a64523 A |
1327 | } |
1328 | ||
cb323159 A |
1329 | next_p = TAILQ_NEXT(p, p_memstat_list); |
1330 | while (!next_p && search && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { | |
1331 | current_bucket = &memstat_bucket[*bucket_index]; | |
1332 | next_p = TAILQ_FIRST(¤t_bucket->list); | |
d9a64523 A |
1333 | } |
1334 | ||
cb323159 A |
1335 | return next_p; |
1336 | } | |
d9a64523 | 1337 | |
cb323159 A |
1338 | /* |
1339 | * Structure to hold state for a jetsam thread. | |
1340 | * Typically there should be a single jetsam thread | |
1341 | * unless parallel jetsam is enabled. | |
1342 | */ | |
1343 | struct jetsam_thread_state { | |
1344 | uint8_t inited; /* boolean - if the thread is initialized */ | |
1345 | uint8_t limit_to_low_bands; /* boolean */ | |
1346 | int memorystatus_wakeup; /* wake channel */ | |
1347 | int index; /* jetsam thread index */ | |
1348 | thread_t thread; /* jetsam thread pointer */ | |
1349 | } *jetsam_threads; | |
d9a64523 | 1350 | |
cb323159 A |
1351 | /* Maximum number of jetsam threads allowed */ |
1352 | #define JETSAM_THREADS_LIMIT 3 | |
d9a64523 | 1353 | |
cb323159 A |
1354 | /* Number of active jetsam threads */ |
1355 | _Atomic int active_jetsam_threads = 1; | |
d9a64523 | 1356 | |
cb323159 A |
1357 | /* Number of maximum jetsam threads configured */ |
1358 | int max_jetsam_threads = JETSAM_THREADS_LIMIT; | |
d9a64523 | 1359 | |
cb323159 A |
1360 | /* |
1361 | * Global switch for enabling fast jetsam. Fast jetsam is | |
1362 | * hooked up via the system_override() system call. It has the | |
1363 | * following effects: | |
1364 | * - Raise the jetsam threshold ("clear-the-deck") | |
1365 | * - Enabled parallel jetsam on eligible devices | |
1366 | */ | |
c6bf4f31 A |
1367 | #if __AMP__ |
1368 | int fast_jetsam_enabled = 1; | |
1369 | #else /* __AMP__ */ | |
cb323159 | 1370 | int fast_jetsam_enabled = 0; |
c6bf4f31 | 1371 | #endif /* __AMP__ */ |
d9a64523 | 1372 | |
f427ee49 A |
1373 | #if CONFIG_DIRTYSTATUS_TRACKING |
1374 | int dirtystatus_tracking_enabled = 0; | |
1375 | SYSCTL_INT(_kern, OID_AUTO, dirtystatus_tracking_enabled, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &dirtystatus_tracking_enabled, 0, ""); | |
1376 | #endif | |
1377 | ||
cb323159 A |
1378 | /* Routine to find the jetsam state structure for the current jetsam thread */ |
1379 | static inline struct jetsam_thread_state * | |
1380 | jetsam_current_thread(void) | |
1381 | { | |
1382 | for (int thr_id = 0; thr_id < max_jetsam_threads; thr_id++) { | |
1383 | if (jetsam_threads[thr_id].thread == current_thread()) { | |
1384 | return &(jetsam_threads[thr_id]); | |
1385 | } | |
1386 | } | |
1387 | return NULL; | |
1388 | } | |
0a7de745 | 1389 | |
d9a64523 | 1390 | |
cb323159 A |
1391 | __private_extern__ void |
1392 | memorystatus_init(void) | |
1393 | { | |
1394 | kern_return_t result; | |
1395 | int i; | |
d9a64523 | 1396 | |
cb323159 A |
1397 | #if CONFIG_FREEZE |
1398 | memorystatus_freeze_jetsam_band = JETSAM_PRIORITY_UI_SUPPORT; | |
1399 | memorystatus_frozen_processes_max = FREEZE_PROCESSES_MAX; | |
1400 | memorystatus_frozen_shared_mb_max = ((MAX_FROZEN_SHARED_MB_PERCENT * max_task_footprint_mb) / 100); /* 10% of the system wide task limit */ | |
1401 | memorystatus_freeze_shared_mb_per_process_max = (memorystatus_frozen_shared_mb_max / 4); | |
1402 | memorystatus_freeze_pages_min = FREEZE_PAGES_MIN; | |
1403 | memorystatus_freeze_pages_max = FREEZE_PAGES_MAX; | |
1404 | memorystatus_max_frozen_demotions_daily = MAX_FROZEN_PROCESS_DEMOTIONS; | |
1405 | memorystatus_thaw_count_demotion_threshold = MIN_THAW_DEMOTION_THRESHOLD; | |
1406 | #endif | |
d9a64523 | 1407 | |
cb323159 | 1408 | #if DEVELOPMENT || DEBUG |
f427ee49 | 1409 | if (kill_on_no_paging_space) { |
cb323159 A |
1410 | max_kill_priority = JETSAM_PRIORITY_MAX; |
1411 | } | |
1412 | #endif | |
d9a64523 | 1413 | |
cb323159 A |
1414 | /* Init buckets */ |
1415 | for (i = 0; i < MEMSTAT_BUCKET_COUNT; i++) { | |
1416 | TAILQ_INIT(&memstat_bucket[i].list); | |
1417 | memstat_bucket[i].count = 0; | |
1418 | memstat_bucket[i].relaunch_high_count = 0; | |
1419 | } | |
1420 | memorystatus_idle_demotion_call = thread_call_allocate((thread_call_func_t)memorystatus_perform_idle_demotion, NULL); | |
d9a64523 | 1421 | |
cb323159 A |
1422 | nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_sysprocs_idle_delay_time); |
1423 | nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_apps_idle_delay_time); | |
0a7de745 | 1424 | |
cb323159 A |
1425 | #if CONFIG_JETSAM |
1426 | /* Apply overrides */ | |
1427 | if (!PE_parse_boot_argn("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage))) { | |
1428 | PE_get_default("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage)); | |
1429 | } | |
1430 | if (delta_percentage == 0) { | |
1431 | delta_percentage = 5; | |
1432 | } | |
1433 | if (max_mem > config_jetsam_large_memory_cutoff) { | |
1434 | critical_threshold_percentage = critical_threshold_percentage_larger_devices; | |
1435 | delta_percentage = delta_percentage_larger_devices; | |
1436 | } | |
1437 | assert(delta_percentage < 100); | |
1438 | if (!PE_parse_boot_argn("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage))) { | |
1439 | PE_get_default("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage)); | |
1440 | } | |
1441 | assert(critical_threshold_percentage < 100); | |
1442 | PE_get_default("kern.jetsam_idle_offset", &idle_offset_percentage, sizeof(idle_offset_percentage)); | |
1443 | assert(idle_offset_percentage < 100); | |
1444 | PE_get_default("kern.jetsam_pressure_threshold", &pressure_threshold_percentage, sizeof(pressure_threshold_percentage)); | |
1445 | assert(pressure_threshold_percentage < 100); | |
1446 | PE_get_default("kern.jetsam_freeze_threshold", &freeze_threshold_percentage, sizeof(freeze_threshold_percentage)); | |
1447 | assert(freeze_threshold_percentage < 100); | |
d9a64523 | 1448 | |
d9a64523 | 1449 | |
cb323159 A |
1450 | if (!PE_parse_boot_argn("jetsam_aging_policy", &jetsam_aging_policy, |
1451 | sizeof(jetsam_aging_policy))) { | |
1452 | if (!PE_get_default("kern.jetsam_aging_policy", &jetsam_aging_policy, | |
1453 | sizeof(jetsam_aging_policy))) { | |
1454 | jetsam_aging_policy = kJetsamAgingPolicySysProcsReclaimedFirst; | |
d9a64523 | 1455 | } |
cb323159 | 1456 | } |
d9a64523 | 1457 | |
cb323159 A |
1458 | if (jetsam_aging_policy > kJetsamAgingPolicyMax) { |
1459 | jetsam_aging_policy = kJetsamAgingPolicySysProcsReclaimedFirst; | |
1460 | } | |
d9a64523 | 1461 | |
cb323159 A |
1462 | switch (jetsam_aging_policy) { |
1463 | case kJetsamAgingPolicyNone: | |
1464 | system_procs_aging_band = JETSAM_PRIORITY_IDLE; | |
1465 | applications_aging_band = JETSAM_PRIORITY_IDLE; | |
1466 | break; | |
d9a64523 | 1467 | |
cb323159 A |
1468 | case kJetsamAgingPolicyLegacy: |
1469 | /* | |
1470 | * Legacy behavior where some daemons get a 10s protection once | |
1471 | * AND only before the first clean->dirty->clean transition before | |
1472 | * going into IDLE band. | |
1473 | */ | |
1474 | system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND1; | |
1475 | applications_aging_band = JETSAM_PRIORITY_IDLE; | |
1476 | break; | |
d9a64523 | 1477 | |
cb323159 A |
1478 | case kJetsamAgingPolicySysProcsReclaimedFirst: |
1479 | system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND1; | |
1480 | applications_aging_band = JETSAM_PRIORITY_AGING_BAND2; | |
1481 | break; | |
d9a64523 | 1482 | |
cb323159 A |
1483 | case kJetsamAgingPolicyAppsReclaimedFirst: |
1484 | system_procs_aging_band = JETSAM_PRIORITY_AGING_BAND2; | |
1485 | applications_aging_band = JETSAM_PRIORITY_AGING_BAND1; | |
1486 | break; | |
0a7de745 | 1487 | |
cb323159 A |
1488 | default: |
1489 | break; | |
d9a64523 | 1490 | } |
0a7de745 | 1491 | |
cb323159 A |
1492 | /* |
1493 | * The aging bands cannot overlap with the JETSAM_PRIORITY_ELEVATED_INACTIVE | |
1494 | * band and must be below it in priority. This is so that we don't have to make | |
1495 | * our 'aging' code worry about a mix of processes, some of which need to age | |
1496 | * and some others that need to stay elevated in the jetsam bands. | |
1497 | */ | |
1498 | assert(JETSAM_PRIORITY_ELEVATED_INACTIVE > system_procs_aging_band); | |
1499 | assert(JETSAM_PRIORITY_ELEVATED_INACTIVE > applications_aging_band); | |
1500 | ||
1501 | /* Take snapshots for idle-exit kills by default? First check the boot-arg... */ | |
1502 | if (!PE_parse_boot_argn("jetsam_idle_snapshot", &memorystatus_idle_snapshot, sizeof(memorystatus_idle_snapshot))) { | |
1503 | /* ...no boot-arg, so check the device tree */ | |
1504 | PE_get_default("kern.jetsam_idle_snapshot", &memorystatus_idle_snapshot, sizeof(memorystatus_idle_snapshot)); | |
1505 | } | |
d9a64523 | 1506 | |
f427ee49 A |
1507 | memorystatus_delta = (unsigned int) (delta_percentage * atop_64(max_mem) / 100); |
1508 | memorystatus_available_pages_critical_idle_offset = (unsigned int) (idle_offset_percentage * atop_64(max_mem) / 100); | |
1509 | memorystatus_available_pages_critical_base = (unsigned int) ((critical_threshold_percentage / delta_percentage) * memorystatus_delta); | |
1510 | memorystatus_policy_more_free_offset_pages = (unsigned int) ((policy_more_free_offset_percentage / delta_percentage) * memorystatus_delta); | |
1511 | memorystatus_sysproc_aging_aggr_pages = (unsigned int) (sysproc_aging_aggr_threshold_percentage * atop_64(max_mem) / 100); | |
d9a64523 | 1512 | |
cb323159 A |
1513 | /* Jetsam Loop Detection */ |
1514 | if (max_mem <= (512 * 1024 * 1024)) { | |
1515 | /* 512 MB devices */ | |
1516 | memorystatus_jld_eval_period_msecs = 8000; /* 8000 msecs == 8 second window */ | |
d9a64523 | 1517 | } else { |
cb323159 A |
1518 | /* 1GB and larger devices */ |
1519 | memorystatus_jld_eval_period_msecs = 6000; /* 6000 msecs == 6 second window */ | |
d9a64523 A |
1520 | } |
1521 | ||
cb323159 | 1522 | memorystatus_jld_enabled = TRUE; |
0a7de745 | 1523 | |
cb323159 A |
1524 | /* No contention at this point */ |
1525 | memorystatus_update_levels_locked(FALSE); | |
d9a64523 | 1526 | |
cb323159 | 1527 | #endif /* CONFIG_JETSAM */ |
d9a64523 | 1528 | |
f427ee49 A |
1529 | #if __arm64__ |
1530 | if (!PE_parse_boot_argn("entitled_max_task_pmem", &memorystatus_entitled_max_task_footprint_mb, | |
1531 | sizeof(memorystatus_entitled_max_task_footprint_mb))) { | |
1532 | if (!PE_get_default("kern.entitled_max_task_pmem", &memorystatus_entitled_max_task_footprint_mb, | |
1533 | sizeof(memorystatus_entitled_max_task_footprint_mb))) { | |
1534 | // entitled_max_task_pmem is not supported on this system. | |
1535 | memorystatus_entitled_max_task_footprint_mb = 0; | |
1536 | } | |
1537 | } | |
1538 | if (memorystatus_entitled_max_task_footprint_mb > max_mem / (1UL << 20) || memorystatus_entitled_max_task_footprint_mb < 0) { | |
1539 | os_log_with_startup_serial(OS_LOG_DEFAULT, "Invalid value (%d) for entitled_max_task_pmem. Setting to 0", | |
1540 | memorystatus_entitled_max_task_footprint_mb); | |
1541 | } | |
1542 | #endif /* __arm64__ */ | |
1543 | ||
cb323159 | 1544 | memorystatus_jetsam_snapshot_max = maxproc; |
d9a64523 | 1545 | |
cb323159 A |
1546 | memorystatus_jetsam_snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + |
1547 | (sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_max); | |
d9a64523 | 1548 | |
f427ee49 | 1549 | memorystatus_jetsam_snapshot = kalloc_flags(memorystatus_jetsam_snapshot_size, Z_WAITOK | Z_ZERO); |
cb323159 A |
1550 | if (!memorystatus_jetsam_snapshot) { |
1551 | panic("Could not allocate memorystatus_jetsam_snapshot"); | |
1552 | } | |
2d21ac55 | 1553 | |
f427ee49 | 1554 | memorystatus_jetsam_snapshot_copy = kalloc_flags(memorystatus_jetsam_snapshot_size, Z_WAITOK | Z_ZERO); |
cb323159 A |
1555 | if (!memorystatus_jetsam_snapshot_copy) { |
1556 | panic("Could not allocate memorystatus_jetsam_snapshot_copy"); | |
1557 | } | |
fe8ab488 | 1558 | |
f427ee49 A |
1559 | #if CONFIG_FREEZE |
1560 | memorystatus_jetsam_snapshot_freezer_max = memorystatus_jetsam_snapshot_max / JETSAM_SNAPSHOT_FREEZER_MAX_FACTOR; | |
1561 | memorystatus_jetsam_snapshot_freezer_size = sizeof(memorystatus_jetsam_snapshot_t) + | |
1562 | (sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_freezer_max); | |
1563 | ||
1564 | memorystatus_jetsam_snapshot_freezer = kalloc_flags(memorystatus_jetsam_snapshot_freezer_size, Z_WAITOK | Z_ZERO); | |
1565 | if (!memorystatus_jetsam_snapshot_freezer) { | |
1566 | panic("Could not allocate memorystatus_jetsam_snapshot_freezer"); | |
1567 | } | |
1568 | #endif /* CONFIG_FREEZE */ | |
1569 | ||
cb323159 | 1570 | nanoseconds_to_absolutetime((uint64_t)JETSAM_SNAPSHOT_TIMEOUT_SECS * NSEC_PER_SEC, &memorystatus_jetsam_snapshot_timeout); |
39236c6e | 1571 | |
cb323159 | 1572 | memset(&memorystatus_at_boot_snapshot, 0, sizeof(memorystatus_jetsam_snapshot_t)); |
39037602 | 1573 | |
cb323159 | 1574 | #if CONFIG_FREEZE |
f427ee49 | 1575 | memorystatus_freeze_threshold = (unsigned int) ((freeze_threshold_percentage / delta_percentage) * memorystatus_delta); |
cb323159 | 1576 | #endif |
39037602 | 1577 | |
cb323159 A |
1578 | /* Check the boot-arg to see if fast jetsam is allowed */ |
1579 | if (!PE_parse_boot_argn("fast_jetsam_enabled", &fast_jetsam_enabled, sizeof(fast_jetsam_enabled))) { | |
1580 | fast_jetsam_enabled = 0; | |
0a7de745 | 1581 | } |
39037602 | 1582 | |
cb323159 A |
1583 | /* Check the boot-arg to configure the maximum number of jetsam threads */ |
1584 | if (!PE_parse_boot_argn("max_jetsam_threads", &max_jetsam_threads, sizeof(max_jetsam_threads))) { | |
1585 | max_jetsam_threads = JETSAM_THREADS_LIMIT; | |
1586 | } | |
39037602 | 1587 | |
cb323159 A |
1588 | /* Restrict the maximum number of jetsam threads to JETSAM_THREADS_LIMIT */ |
1589 | if (max_jetsam_threads > JETSAM_THREADS_LIMIT) { | |
1590 | max_jetsam_threads = JETSAM_THREADS_LIMIT; | |
1591 | } | |
39037602 | 1592 | |
cb323159 A |
1593 | /* For low CPU systems disable fast jetsam mechanism */ |
1594 | if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) { | |
1595 | max_jetsam_threads = 1; | |
1596 | fast_jetsam_enabled = 0; | |
1597 | } | |
39037602 | 1598 | |
cb323159 | 1599 | /* Initialize the jetsam_threads state array */ |
f427ee49 A |
1600 | jetsam_threads = zalloc_permanent(sizeof(struct jetsam_thread_state) * |
1601 | max_jetsam_threads, ZALIGN(struct jetsam_thread_state)); | |
39037602 | 1602 | |
cb323159 A |
1603 | /* Initialize all the jetsam threads */ |
1604 | for (i = 0; i < max_jetsam_threads; i++) { | |
1605 | jetsam_threads[i].inited = FALSE; | |
1606 | jetsam_threads[i].index = i; | |
1607 | result = kernel_thread_start_priority(memorystatus_thread, NULL, 95 /* MAXPRI_KERNEL */, &jetsam_threads[i].thread); | |
1608 | if (result != KERN_SUCCESS) { | |
1609 | panic("Could not create memorystatus_thread %d", i); | |
0a7de745 | 1610 | } |
cb323159 | 1611 | thread_deallocate(jetsam_threads[i].thread); |
39037602 | 1612 | } |
39037602 A |
1613 | } |
1614 | ||
cb323159 A |
1615 | /* Centralised for the purposes of allowing panic-on-jetsam */ |
1616 | extern void | |
1617 | vm_run_compactor(void); | |
c3c9b80d A |
1618 | extern void |
1619 | vm_wake_compactor_swapper(void); | |
39037602 | 1620 | |
3e170ce0 | 1621 | /* |
cb323159 A |
1622 | * The jetsam no frills kill call |
1623 | * Return: 0 on success | |
1624 | * error code on failure (EINVAL...) | |
3e170ce0 | 1625 | */ |
0a7de745 | 1626 | static int |
cb323159 | 1627 | jetsam_do_kill(proc_t p, int jetsam_flags, os_reason_t jetsam_reason) |
3e170ce0 | 1628 | { |
cb323159 A |
1629 | int error = 0; |
1630 | error = exit_with_reason(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags, jetsam_reason); | |
1631 | return error; | |
1632 | } | |
3e170ce0 | 1633 | |
cb323159 A |
1634 | /* |
1635 | * Wrapper for processes exiting with memorystatus details | |
1636 | */ | |
1637 | static boolean_t | |
1638 | memorystatus_do_kill(proc_t p, uint32_t cause, os_reason_t jetsam_reason, uint64_t *footprint_of_killed_proc) | |
1639 | { | |
1640 | int error = 0; | |
1641 | __unused pid_t victim_pid = p->p_pid; | |
1642 | uint64_t footprint = get_task_phys_footprint(p->task); | |
1643 | #if (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) | |
1644 | int32_t memstat_effectivepriority = p->p_memstat_effectivepriority; | |
1645 | #endif /* (KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD) */ | |
3e170ce0 | 1646 | |
cb323159 A |
1647 | KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_START, |
1648 | victim_pid, cause, vm_page_free_count, footprint, 0); | |
1649 | DTRACE_MEMORYSTATUS4(memorystatus_do_kill, proc_t, p, os_reason_t, jetsam_reason, uint32_t, cause, uint64_t, footprint); | |
1650 | #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) | |
1651 | if (memorystatus_jetsam_panic_debug & (1 << cause)) { | |
1652 | panic("memorystatus_do_kill(): jetsam debug panic (cause: %d)", cause); | |
3e170ce0 A |
1653 | } |
1654 | #else | |
cb323159 | 1655 | #pragma unused(cause) |
3e170ce0 A |
1656 | #endif |
1657 | ||
cb323159 A |
1658 | if (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND) { |
1659 | printf("memorystatus: killing process %d [%s] in high band %s (%d) - memorystatus_available_pages: %llu\n", p->p_pid, | |
1660 | (*p->p_name ? p->p_name : "unknown"), | |
1661 | memorystatus_priority_band_name(p->p_memstat_effectivepriority), p->p_memstat_effectivepriority, | |
f427ee49 | 1662 | (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES); |
5ba3f43e A |
1663 | } |
1664 | ||
cb323159 A |
1665 | /* |
1666 | * The jetsam_reason (os_reason_t) has enough information about the kill cause. | |
1667 | * We don't really need jetsam_flags anymore, so it's okay that not all possible kill causes have been mapped. | |
1668 | */ | |
1669 | int jetsam_flags = P_LTERM_JETSAM; | |
1670 | switch (cause) { | |
1671 | case kMemorystatusKilledHiwat: jetsam_flags |= P_JETSAM_HIWAT; break; | |
1672 | case kMemorystatusKilledVnodes: jetsam_flags |= P_JETSAM_VNODE; break; | |
1673 | case kMemorystatusKilledVMPageShortage: jetsam_flags |= P_JETSAM_VMPAGESHORTAGE; break; | |
1674 | case kMemorystatusKilledVMCompressorThrashing: | |
1675 | case kMemorystatusKilledVMCompressorSpaceShortage: jetsam_flags |= P_JETSAM_VMTHRASHING; break; | |
1676 | case kMemorystatusKilledFCThrashing: jetsam_flags |= P_JETSAM_FCTHRASHING; break; | |
1677 | case kMemorystatusKilledPerProcessLimit: jetsam_flags |= P_JETSAM_PID; break; | |
1678 | case kMemorystatusKilledIdleExit: jetsam_flags |= P_JETSAM_IDLEEXIT; break; | |
3e170ce0 | 1679 | } |
cb323159 A |
1680 | error = jetsam_do_kill(p, jetsam_flags, jetsam_reason); |
1681 | *footprint_of_killed_proc = ((error == 0) ? footprint : 0); | |
0a7de745 | 1682 | |
cb323159 A |
1683 | KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_END, |
1684 | victim_pid, memstat_effectivepriority, vm_page_free_count, error, 0); | |
1685 | ||
1686 | KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_COMPACTOR_RUN)) | DBG_FUNC_START, | |
1687 | victim_pid, cause, vm_page_free_count, *footprint_of_killed_proc, 0); | |
1688 | ||
c3c9b80d A |
1689 | if (jetsam_reason->osr_code == JETSAM_REASON_VNODE) { |
1690 | /* | |
1691 | * vnode jetsams are syncronous and not caused by memory pressure. | |
1692 | * Running the compactor on this thread adds significant latency to the filesystem operation | |
1693 | * that triggered this jetsam. | |
1694 | * Kick of compactor thread asyncronously instead. | |
1695 | */ | |
1696 | vm_wake_compactor_swapper(); | |
1697 | } else { | |
1698 | vm_run_compactor(); | |
1699 | } | |
cb323159 A |
1700 | |
1701 | KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_COMPACTOR_RUN)) | DBG_FUNC_END, | |
1702 | victim_pid, cause, vm_page_free_count, 0, 0); | |
1703 | ||
1704 | return error == 0; | |
3e170ce0 A |
1705 | } |
1706 | ||
fe8ab488 | 1707 | /* |
cb323159 | 1708 | * Node manipulation |
fe8ab488 A |
1709 | */ |
1710 | ||
0a7de745 | 1711 | static void |
cb323159 | 1712 | memorystatus_check_levels_locked(void) |
fe8ab488 | 1713 | { |
cb323159 A |
1714 | #if CONFIG_JETSAM |
1715 | /* Update levels */ | |
1716 | memorystatus_update_levels_locked(TRUE); | |
1717 | #else /* CONFIG_JETSAM */ | |
1718 | /* | |
1719 | * Nothing to do here currently since we update | |
1720 | * memorystatus_available_pages in vm_pressure_response. | |
1721 | */ | |
1722 | #endif /* CONFIG_JETSAM */ | |
1723 | } | |
0a7de745 | 1724 | |
cb323159 A |
1725 | /* |
1726 | * Pin a process to a particular jetsam band when it is in the background i.e. not doing active work. | |
1727 | * For an application: that means no longer in the FG band | |
1728 | * For a daemon: that means no longer in its 'requested' jetsam priority band | |
1729 | */ | |
0a7de745 | 1730 | |
cb323159 A |
1731 | int |
1732 | memorystatus_update_inactive_jetsam_priority_band(pid_t pid, uint32_t op_flags, int jetsam_prio, boolean_t effective_now) | |
1733 | { | |
1734 | int error = 0; | |
1735 | boolean_t enable = FALSE; | |
1736 | proc_t p = NULL; | |
fe8ab488 | 1737 | |
cb323159 A |
1738 | if (op_flags == MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE) { |
1739 | enable = TRUE; | |
1740 | } else if (op_flags == MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_DISABLE) { | |
1741 | enable = FALSE; | |
1742 | } else { | |
1743 | return EINVAL; | |
1744 | } | |
fe8ab488 | 1745 | |
cb323159 A |
1746 | p = proc_find(pid); |
1747 | if (p != NULL) { | |
1748 | if ((enable && ((p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) == P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND)) || | |
1749 | (!enable && ((p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) == 0))) { | |
1750 | /* | |
1751 | * No change in state. | |
1752 | */ | |
1753 | } else { | |
1754 | proc_list_lock(); | |
0a7de745 | 1755 | |
cb323159 A |
1756 | if (enable) { |
1757 | p->p_memstat_state |= P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND; | |
1758 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
fe8ab488 | 1759 | |
cb323159 A |
1760 | if (effective_now) { |
1761 | if (p->p_memstat_effectivepriority < jetsam_prio) { | |
1762 | if (memorystatus_highwater_enabled) { | |
1763 | /* | |
1764 | * Process is about to transition from | |
1765 | * inactive --> active | |
1766 | * assign active state | |
1767 | */ | |
1768 | boolean_t is_fatal; | |
1769 | boolean_t use_active = TRUE; | |
1770 | CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal); | |
1771 | task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1, NULL, use_active, is_fatal); | |
1772 | } | |
1773 | memorystatus_update_priority_locked(p, jetsam_prio, FALSE, FALSE); | |
1774 | } | |
1775 | } else { | |
1776 | if (isProcessInAgingBands(p)) { | |
1777 | memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, FALSE, TRUE); | |
1778 | } | |
1779 | } | |
fe8ab488 | 1780 | } else { |
cb323159 A |
1781 | p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND; |
1782 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
fe8ab488 | 1783 | |
cb323159 A |
1784 | if (effective_now) { |
1785 | if (p->p_memstat_effectivepriority == jetsam_prio) { | |
1786 | memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, FALSE, TRUE); | |
1787 | } | |
1788 | } else { | |
1789 | if (isProcessInAgingBands(p)) { | |
1790 | memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, FALSE, TRUE); | |
1791 | } | |
1792 | } | |
1793 | } | |
fe8ab488 | 1794 | |
cb323159 A |
1795 | proc_list_unlock(); |
1796 | } | |
1797 | proc_rele(p); | |
1798 | error = 0; | |
1799 | } else { | |
1800 | error = ESRCH; | |
fe8ab488 | 1801 | } |
cb323159 A |
1802 | |
1803 | return error; | |
fe8ab488 A |
1804 | } |
1805 | ||
cb323159 A |
1806 | static void |
1807 | memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2) | |
0a7de745 | 1808 | { |
cb323159 A |
1809 | proc_t p; |
1810 | uint64_t current_time = 0, idle_delay_time = 0; | |
1811 | int demote_prio_band = 0; | |
1812 | memstat_bucket_t *demotion_bucket; | |
39236c6e | 1813 | |
cb323159 | 1814 | MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion()\n"); |
39236c6e | 1815 | |
cb323159 | 1816 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_START, 0, 0, 0, 0, 0); |
0a7de745 | 1817 | |
cb323159 | 1818 | current_time = mach_absolute_time(); |
39236c6e | 1819 | |
cb323159 | 1820 | proc_list_lock(); |
0a7de745 | 1821 | |
cb323159 | 1822 | demote_prio_band = JETSAM_PRIORITY_IDLE + 1; |
39236c6e | 1823 | |
cb323159 A |
1824 | for (; demote_prio_band < JETSAM_PRIORITY_MAX; demote_prio_band++) { |
1825 | if (demote_prio_band != system_procs_aging_band && demote_prio_band != applications_aging_band) { | |
1826 | continue; | |
1827 | } | |
39236c6e | 1828 | |
cb323159 A |
1829 | demotion_bucket = &memstat_bucket[demote_prio_band]; |
1830 | p = TAILQ_FIRST(&demotion_bucket->list); | |
d9a64523 | 1831 | |
cb323159 A |
1832 | while (p) { |
1833 | MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion() found %d\n", p->p_pid); | |
d9a64523 | 1834 | |
cb323159 | 1835 | assert(p->p_memstat_idledeadline); |
d9a64523 | 1836 | |
cb323159 | 1837 | assert(p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS); |
d9a64523 | 1838 | |
cb323159 A |
1839 | if (current_time >= p->p_memstat_idledeadline) { |
1840 | if ((isSysProc(p) && | |
1841 | ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED | P_DIRTY_IS_DIRTY)) != P_DIRTY_IDLE_EXIT_ENABLED)) || /* system proc marked dirty*/ | |
1842 | task_has_assertions((struct task *)(p->task))) { /* has outstanding assertions which might indicate outstanding work too */ | |
1843 | idle_delay_time = (isSysProc(p)) ? memorystatus_sysprocs_idle_time(p) : memorystatus_apps_idle_time(p); | |
d9a64523 | 1844 | |
cb323159 A |
1845 | p->p_memstat_idledeadline += idle_delay_time; |
1846 | p = TAILQ_NEXT(p, p_memstat_list); | |
1847 | } else { | |
1848 | proc_t next_proc = NULL; | |
d9a64523 | 1849 | |
cb323159 A |
1850 | next_proc = TAILQ_NEXT(p, p_memstat_list); |
1851 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
d9a64523 | 1852 | |
cb323159 | 1853 | memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, false, true); |
39236c6e | 1854 | |
cb323159 A |
1855 | p = next_proc; |
1856 | continue; | |
1857 | } | |
1858 | } else { | |
1859 | // No further candidates | |
1860 | break; | |
1861 | } | |
1862 | } | |
1863 | } | |
fe8ab488 | 1864 | |
cb323159 | 1865 | memorystatus_reschedule_idle_demotion_locked(); |
39037602 | 1866 | |
cb323159 | 1867 | proc_list_unlock(); |
5ba3f43e | 1868 | |
cb323159 A |
1869 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_END, 0, 0, 0, 0, 0); |
1870 | } | |
0a7de745 | 1871 | |
cb323159 A |
1872 | static void |
1873 | memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state) | |
1874 | { | |
1875 | boolean_t present_in_sysprocs_aging_bucket = FALSE; | |
1876 | boolean_t present_in_apps_aging_bucket = FALSE; | |
1877 | uint64_t idle_delay_time = 0; | |
39037602 | 1878 | |
cb323159 A |
1879 | if (jetsam_aging_policy == kJetsamAgingPolicyNone) { |
1880 | return; | |
39236c6e | 1881 | } |
0a7de745 | 1882 | |
cb323159 A |
1883 | if ((p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) || |
1884 | (p->p_memstat_state & P_MEMSTAT_PRIORITY_ASSERTION)) { | |
1885 | /* | |
1886 | * This process isn't going to be making the trip to the lower bands. | |
1887 | */ | |
1888 | return; | |
39037602 | 1889 | } |
39037602 | 1890 | |
cb323159 A |
1891 | if (isProcessInAgingBands(p)) { |
1892 | if (jetsam_aging_policy != kJetsamAgingPolicyLegacy) { | |
1893 | assert((p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) != P_DIRTY_AGING_IN_PROGRESS); | |
39037602 | 1894 | } |
39037602 | 1895 | |
cb323159 A |
1896 | if (isSysProc(p) && system_procs_aging_band) { |
1897 | present_in_sysprocs_aging_bucket = TRUE; | |
1898 | } else if (isApp(p) && applications_aging_band) { | |
1899 | present_in_apps_aging_bucket = TRUE; | |
1900 | } | |
39037602 A |
1901 | } |
1902 | ||
cb323159 A |
1903 | assert(!present_in_sysprocs_aging_bucket); |
1904 | assert(!present_in_apps_aging_bucket); | |
39037602 | 1905 | |
cb323159 A |
1906 | MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for pid %d (dirty:0x%x, set_state %d, demotions %d).\n", |
1907 | p->p_pid, p->p_memstat_dirty, set_state, (memorystatus_scheduled_idle_demotions_sysprocs + memorystatus_scheduled_idle_demotions_apps)); | |
39037602 | 1908 | |
cb323159 A |
1909 | if (isSysProc(p)) { |
1910 | assert((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED); | |
1911 | } | |
39037602 | 1912 | |
cb323159 A |
1913 | idle_delay_time = (isSysProc(p)) ? memorystatus_sysprocs_idle_time(p) : memorystatus_apps_idle_time(p); |
1914 | if (set_state) { | |
1915 | p->p_memstat_dirty |= P_DIRTY_AGING_IN_PROGRESS; | |
1916 | p->p_memstat_idledeadline = mach_absolute_time() + idle_delay_time; | |
39037602 A |
1917 | } |
1918 | ||
cb323159 | 1919 | assert(p->p_memstat_idledeadline); |
39037602 | 1920 | |
cb323159 A |
1921 | if (isSysProc(p) && present_in_sysprocs_aging_bucket == FALSE) { |
1922 | memorystatus_scheduled_idle_demotions_sysprocs++; | |
1923 | } else if (isApp(p) && present_in_apps_aging_bucket == FALSE) { | |
1924 | memorystatus_scheduled_idle_demotions_apps++; | |
39037602 | 1925 | } |
cb323159 | 1926 | } |
3e170ce0 | 1927 | |
cb323159 A |
1928 | void |
1929 | memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state) | |
1930 | { | |
1931 | boolean_t present_in_sysprocs_aging_bucket = FALSE; | |
1932 | boolean_t present_in_apps_aging_bucket = FALSE; | |
0a7de745 | 1933 | |
cb323159 A |
1934 | if (!system_procs_aging_band && !applications_aging_band) { |
1935 | return; | |
5ba3f43e A |
1936 | } |
1937 | ||
cb323159 A |
1938 | if ((p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) == 0) { |
1939 | return; | |
1940 | } | |
5ba3f43e | 1941 | |
cb323159 A |
1942 | if (isProcessInAgingBands(p)) { |
1943 | if (jetsam_aging_policy != kJetsamAgingPolicyLegacy) { | |
1944 | assert((p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) == P_DIRTY_AGING_IN_PROGRESS); | |
1945 | } | |
5ba3f43e | 1946 | |
cb323159 A |
1947 | if (isSysProc(p) && system_procs_aging_band) { |
1948 | assert(p->p_memstat_effectivepriority == system_procs_aging_band); | |
1949 | assert(p->p_memstat_idledeadline); | |
1950 | present_in_sysprocs_aging_bucket = TRUE; | |
1951 | } else if (isApp(p) && applications_aging_band) { | |
1952 | assert(p->p_memstat_effectivepriority == applications_aging_band); | |
1953 | assert(p->p_memstat_idledeadline); | |
1954 | present_in_apps_aging_bucket = TRUE; | |
1955 | } | |
1956 | } | |
5ba3f43e | 1957 | |
cb323159 A |
1958 | MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for pid %d (clear_state %d, demotions %d).\n", |
1959 | p->p_pid, clear_state, (memorystatus_scheduled_idle_demotions_sysprocs + memorystatus_scheduled_idle_demotions_apps)); | |
d9a64523 | 1960 | |
d9a64523 | 1961 | |
cb323159 A |
1962 | if (clear_state) { |
1963 | p->p_memstat_idledeadline = 0; | |
1964 | p->p_memstat_dirty &= ~P_DIRTY_AGING_IN_PROGRESS; | |
39236c6e A |
1965 | } |
1966 | ||
cb323159 A |
1967 | if (isSysProc(p) && present_in_sysprocs_aging_bucket == TRUE) { |
1968 | memorystatus_scheduled_idle_demotions_sysprocs--; | |
1969 | assert(memorystatus_scheduled_idle_demotions_sysprocs >= 0); | |
1970 | } else if (isApp(p) && present_in_apps_aging_bucket == TRUE) { | |
1971 | memorystatus_scheduled_idle_demotions_apps--; | |
1972 | assert(memorystatus_scheduled_idle_demotions_apps >= 0); | |
d9a64523 A |
1973 | } |
1974 | ||
cb323159 A |
1975 | assert((memorystatus_scheduled_idle_demotions_sysprocs + memorystatus_scheduled_idle_demotions_apps) >= 0); |
1976 | } | |
0a7de745 | 1977 | |
cb323159 A |
1978 | static void |
1979 | memorystatus_reschedule_idle_demotion_locked(void) | |
1980 | { | |
1981 | if (0 == (memorystatus_scheduled_idle_demotions_sysprocs + memorystatus_scheduled_idle_demotions_apps)) { | |
1982 | if (memstat_idle_demotion_deadline) { | |
1983 | /* Transitioned 1->0, so cancel next call */ | |
1984 | thread_call_cancel(memorystatus_idle_demotion_call); | |
1985 | memstat_idle_demotion_deadline = 0; | |
1986 | } | |
1987 | } else { | |
1988 | memstat_bucket_t *demotion_bucket; | |
1989 | proc_t p = NULL, p1 = NULL, p2 = NULL; | |
d9a64523 | 1990 | |
cb323159 A |
1991 | if (system_procs_aging_band) { |
1992 | demotion_bucket = &memstat_bucket[system_procs_aging_band]; | |
1993 | p1 = TAILQ_FIRST(&demotion_bucket->list); | |
d9a64523 | 1994 | |
cb323159 A |
1995 | p = p1; |
1996 | } | |
d9a64523 | 1997 | |
cb323159 A |
1998 | if (applications_aging_band) { |
1999 | demotion_bucket = &memstat_bucket[applications_aging_band]; | |
2000 | p2 = TAILQ_FIRST(&demotion_bucket->list); | |
d9a64523 | 2001 | |
cb323159 A |
2002 | if (p1 && p2) { |
2003 | p = (p1->p_memstat_idledeadline > p2->p_memstat_idledeadline) ? p2 : p1; | |
2004 | } else { | |
2005 | p = (p1 == NULL) ? p2 : p1; | |
2006 | } | |
2007 | } | |
d9a64523 | 2008 | |
cb323159 A |
2009 | assert(p); |
2010 | ||
2011 | if (p != NULL) { | |
2012 | assert(p && p->p_memstat_idledeadline); | |
2013 | if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline) { | |
2014 | thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline); | |
2015 | memstat_idle_demotion_deadline = p->p_memstat_idledeadline; | |
2016 | } | |
d9a64523 | 2017 | } |
316670eb | 2018 | } |
39236c6e | 2019 | } |
316670eb | 2020 | |
fe8ab488 | 2021 | /* |
cb323159 | 2022 | * List manipulation |
fe8ab488 | 2023 | */ |
fe8ab488 | 2024 | |
cb323159 A |
2025 | int |
2026 | memorystatus_add(proc_t p, boolean_t locked) | |
0a7de745 | 2027 | { |
cb323159 | 2028 | memstat_bucket_t *bucket; |
fe8ab488 | 2029 | |
cb323159 | 2030 | MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding pid %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority); |
39236c6e | 2031 | |
cb323159 A |
2032 | if (!locked) { |
2033 | proc_list_lock(); | |
316670eb | 2034 | } |
5ba3f43e | 2035 | |
cb323159 A |
2036 | DTRACE_MEMORYSTATUS2(memorystatus_add, proc_t, p, int32_t, p->p_memstat_effectivepriority); |
2037 | ||
2038 | /* Processes marked internal do not have priority tracked */ | |
2039 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { | |
2040 | goto exit; | |
5ba3f43e A |
2041 | } |
2042 | ||
d9a64523 | 2043 | /* |
cb323159 A |
2044 | * Opt out system processes from being frozen by default. |
2045 | * For coalition-based freezing, we only want to freeze sysprocs that have specifically opted in. | |
d9a64523 | 2046 | */ |
cb323159 A |
2047 | if (isSysProc(p)) { |
2048 | p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED; | |
39236c6e | 2049 | } |
2a1bd2d3 A |
2050 | #if CONFIG_FREEZE |
2051 | memorystatus_freeze_init_proc(p); | |
2052 | #endif | |
fe8ab488 | 2053 | |
cb323159 | 2054 | bucket = &memstat_bucket[p->p_memstat_effectivepriority]; |
39236c6e | 2055 | |
cb323159 A |
2056 | if (isSysProc(p) && system_procs_aging_band && (p->p_memstat_effectivepriority == system_procs_aging_band)) { |
2057 | assert(bucket->count == memorystatus_scheduled_idle_demotions_sysprocs - 1); | |
2058 | } else if (isApp(p) && applications_aging_band && (p->p_memstat_effectivepriority == applications_aging_band)) { | |
2059 | assert(bucket->count == memorystatus_scheduled_idle_demotions_apps - 1); | |
2060 | } else if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) { | |
2061 | /* | |
2062 | * Entering the idle band. | |
2063 | * Record idle start time. | |
2064 | */ | |
2065 | p->p_memstat_idle_start = mach_absolute_time(); | |
2066 | } | |
fe8ab488 | 2067 | |
cb323159 A |
2068 | TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list); |
2069 | bucket->count++; | |
2070 | if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) { | |
2071 | bucket->relaunch_high_count++; | |
2072 | } | |
316670eb | 2073 | |
cb323159 | 2074 | memorystatus_list_count++; |
316670eb | 2075 | |
cb323159 A |
2076 | memorystatus_check_levels_locked(); |
2077 | ||
2078 | exit: | |
2079 | if (!locked) { | |
2080 | proc_list_unlock(); | |
2081 | } | |
2082 | ||
2083 | return 0; | |
39236c6e | 2084 | } |
316670eb | 2085 | |
0a7de745 | 2086 | /* |
cb323159 A |
2087 | * Description: |
2088 | * Moves a process from one jetsam bucket to another. | |
2089 | * which changes the LRU position of the process. | |
2090 | * | |
2091 | * Monitors transition between buckets and if necessary | |
2092 | * will update cached memory limits accordingly. | |
2093 | * | |
2094 | * skip_demotion_check: | |
2095 | * - if the 'jetsam aging policy' is NOT 'legacy': | |
2096 | * When this flag is TRUE, it means we are going | |
2097 | * to age the ripe processes out of the aging bands and into the | |
2098 | * IDLE band and apply their inactive memory limits. | |
2099 | * | |
2100 | * - if the 'jetsam aging policy' is 'legacy': | |
2101 | * When this flag is TRUE, it might mean the above aging mechanism | |
2102 | * OR | |
2103 | * It might be that we have a process that has used up its 'idle deferral' | |
2104 | * stay that is given to it once per lifetime. And in this case, the process | |
2105 | * won't be going through any aging codepaths. But we still need to apply | |
2106 | * the right inactive limits and so we explicitly set this to TRUE if the | |
2107 | * new priority for the process is the IDLE band. | |
39037602 | 2108 | */ |
cb323159 A |
2109 | void |
2110 | memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert, boolean_t skip_demotion_check) | |
39037602 | 2111 | { |
cb323159 | 2112 | memstat_bucket_t *old_bucket, *new_bucket; |
39037602 | 2113 | |
cb323159 A |
2114 | assert(priority < MEMSTAT_BUCKET_COUNT); |
2115 | ||
2116 | /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */ | |
2117 | if ((p->p_listflag & P_LIST_EXITED) != 0) { | |
2118 | return; | |
39037602 A |
2119 | } |
2120 | ||
cb323159 A |
2121 | MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting %s(%d) to priority %d, inserting at %s\n", |
2122 | (*p->p_name ? p->p_name : "unknown"), p->p_pid, priority, head_insert ? "head" : "tail"); | |
2123 | ||
2124 | DTRACE_MEMORYSTATUS3(memorystatus_update_priority, proc_t, p, int32_t, p->p_memstat_effectivepriority, int, priority); | |
2125 | ||
2126 | old_bucket = &memstat_bucket[p->p_memstat_effectivepriority]; | |
2127 | ||
2128 | if (skip_demotion_check == FALSE) { | |
2129 | if (isSysProc(p)) { | |
39037602 | 2130 | /* |
cb323159 A |
2131 | * For system processes, the memorystatus_dirty_* routines take care of adding/removing |
2132 | * the processes from the aging bands and balancing the demotion counts. | |
2133 | * We can, however, override that if the process has an 'elevated inactive jetsam band' attribute. | |
39037602 | 2134 | */ |
39037602 | 2135 | |
cb323159 A |
2136 | if (p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) { |
2137 | /* | |
2138 | * 2 types of processes can use the non-standard elevated inactive band: | |
2139 | * - Frozen processes that always land in memorystatus_freeze_jetsam_band | |
2140 | * OR | |
2141 | * - processes that specifically opt-in to the elevated inactive support e.g. docked processes. | |
2142 | */ | |
2143 | #if CONFIG_FREEZE | |
2144 | if (p->p_memstat_state & P_MEMSTAT_FROZEN) { | |
2145 | if (priority <= memorystatus_freeze_jetsam_band) { | |
2146 | priority = memorystatus_freeze_jetsam_band; | |
2147 | } | |
2148 | } else | |
2149 | #endif /* CONFIG_FREEZE */ | |
2150 | { | |
2151 | if (priority <= JETSAM_PRIORITY_ELEVATED_INACTIVE) { | |
2152 | priority = JETSAM_PRIORITY_ELEVATED_INACTIVE; | |
39037602 A |
2153 | } |
2154 | } | |
cb323159 A |
2155 | assert(!(p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS)); |
2156 | } | |
2157 | } else if (isApp(p)) { | |
2158 | /* | |
2159 | * Check to see if the application is being lowered in jetsam priority. If so, and: | |
2160 | * - it has an 'elevated inactive jetsam band' attribute, then put it in the appropriate band. | |
2161 | * - it is a normal application, then let it age in the aging band if that policy is in effect. | |
2162 | */ | |
39037602 | 2163 | |
cb323159 A |
2164 | if (p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) { |
2165 | #if CONFIG_FREEZE | |
2166 | if (p->p_memstat_state & P_MEMSTAT_FROZEN) { | |
2167 | if (priority <= memorystatus_freeze_jetsam_band) { | |
2168 | priority = memorystatus_freeze_jetsam_band; | |
39037602 | 2169 | } |
cb323159 A |
2170 | } else |
2171 | #endif /* CONFIG_FREEZE */ | |
2172 | { | |
2173 | if (priority <= JETSAM_PRIORITY_ELEVATED_INACTIVE) { | |
2174 | priority = JETSAM_PRIORITY_ELEVATED_INACTIVE; | |
39037602 A |
2175 | } |
2176 | } | |
cb323159 A |
2177 | } else { |
2178 | if (applications_aging_band) { | |
2179 | if (p->p_memstat_effectivepriority == applications_aging_band) { | |
2180 | assert(old_bucket->count == (memorystatus_scheduled_idle_demotions_apps + 1)); | |
2181 | } | |
39037602 | 2182 | |
cb323159 A |
2183 | if ((jetsam_aging_policy != kJetsamAgingPolicyLegacy) && (priority <= applications_aging_band)) { |
2184 | assert(!(p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS)); | |
2185 | priority = applications_aging_band; | |
2186 | memorystatus_schedule_idle_demotion_locked(p, TRUE); | |
2187 | } | |
2188 | } | |
2189 | } | |
39037602 | 2190 | } |
39037602 A |
2191 | } |
2192 | ||
cb323159 A |
2193 | if ((system_procs_aging_band && (priority == system_procs_aging_band)) || (applications_aging_band && (priority == applications_aging_band))) { |
2194 | assert(p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS); | |
2195 | } | |
0a7de745 | 2196 | |
cb323159 A |
2197 | #if DEVELOPMENT || DEBUG |
2198 | if (priority == JETSAM_PRIORITY_IDLE && /* if the process is on its way into the IDLE band */ | |
2199 | skip_demotion_check == FALSE && /* and it isn't via the path that will set the INACTIVE memlimits */ | |
2200 | (p->p_memstat_dirty & P_DIRTY_TRACK) && /* and it has 'DIRTY' tracking enabled */ | |
2201 | ((p->p_memstat_memlimit != p->p_memstat_memlimit_inactive) || /* and we notice that the current limit isn't the right value (inactive) */ | |
2202 | ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) ? (!(p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT)) : (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT)))) { /* OR type (fatal vs non-fatal) */ | |
2203 | printf("memorystatus_update_priority_locked: on %s with 0x%x, prio: %d and %d\n", p->p_name, p->p_memstat_state, priority, p->p_memstat_memlimit); /* then we must catch this */ | |
2204 | } | |
2205 | #endif /* DEVELOPMENT || DEBUG */ | |
39037602 | 2206 | |
cb323159 A |
2207 | TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list); |
2208 | old_bucket->count--; | |
2209 | if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) { | |
2210 | old_bucket->relaunch_high_count--; | |
2211 | } | |
39037602 | 2212 | |
cb323159 A |
2213 | new_bucket = &memstat_bucket[priority]; |
2214 | if (head_insert) { | |
2215 | TAILQ_INSERT_HEAD(&new_bucket->list, p, p_memstat_list); | |
2216 | } else { | |
2217 | TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list); | |
2218 | } | |
2219 | new_bucket->count++; | |
2220 | if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) { | |
2221 | new_bucket->relaunch_high_count++; | |
2222 | } | |
39037602 | 2223 | |
cb323159 A |
2224 | if (memorystatus_highwater_enabled) { |
2225 | boolean_t is_fatal; | |
2226 | boolean_t use_active; | |
39037602 | 2227 | |
cb323159 A |
2228 | /* |
2229 | * If cached limit data is updated, then the limits | |
2230 | * will be enforced by writing to the ledgers. | |
2231 | */ | |
2232 | boolean_t ledger_update_needed = TRUE; | |
39037602 | 2233 | |
cb323159 A |
2234 | /* |
2235 | * Here, we must update the cached memory limit if the task | |
2236 | * is transitioning between: | |
2237 | * active <--> inactive | |
2238 | * FG <--> BG | |
2239 | * but: | |
2240 | * dirty <--> clean is ignored | |
2241 | * | |
2242 | * We bypass non-idle processes that have opted into dirty tracking because | |
2243 | * a move between buckets does not imply a transition between the | |
2244 | * dirty <--> clean state. | |
2245 | */ | |
0a7de745 | 2246 | |
cb323159 A |
2247 | if (p->p_memstat_dirty & P_DIRTY_TRACK) { |
2248 | if (skip_demotion_check == TRUE && priority == JETSAM_PRIORITY_IDLE) { | |
2249 | CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal); | |
2250 | use_active = FALSE; | |
39037602 | 2251 | } else { |
cb323159 | 2252 | ledger_update_needed = FALSE; |
39037602 | 2253 | } |
cb323159 A |
2254 | } else if ((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) { |
2255 | /* | |
2256 | * inactive --> active | |
2257 | * BG --> FG | |
2258 | * assign active state | |
2259 | */ | |
2260 | CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal); | |
2261 | use_active = TRUE; | |
2262 | } else if ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { | |
2263 | /* | |
2264 | * active --> inactive | |
2265 | * FG --> BG | |
2266 | * assign inactive state | |
2267 | */ | |
2268 | CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal); | |
2269 | use_active = FALSE; | |
2270 | } else { | |
2271 | /* | |
2272 | * The transition between jetsam priority buckets apparently did | |
2273 | * not affect active/inactive state. | |
2274 | * This is not unusual... especially during startup when | |
2275 | * processes are getting established in their respective bands. | |
2276 | */ | |
2277 | ledger_update_needed = FALSE; | |
316670eb | 2278 | } |
316670eb | 2279 | |
cb323159 A |
2280 | /* |
2281 | * Enforce the new limits by writing to the ledger | |
2282 | */ | |
2283 | if (ledger_update_needed) { | |
2284 | task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1, NULL, use_active, is_fatal); | |
39037602 | 2285 | |
cb323159 A |
2286 | MEMORYSTATUS_DEBUG(3, "memorystatus_update_priority_locked: new limit on pid %d (%dMB %s) priority old --> new (%d --> %d) dirty?=0x%x %s\n", |
2287 | p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), | |
2288 | (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, priority, p->p_memstat_dirty, | |
2289 | (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); | |
2290 | } | |
39037602 A |
2291 | } |
2292 | ||
cb323159 A |
2293 | /* |
2294 | * Record idle start or idle delta. | |
2295 | */ | |
2296 | if (p->p_memstat_effectivepriority == priority) { | |
39037602 | 2297 | /* |
cb323159 A |
2298 | * This process is not transitioning between |
2299 | * jetsam priority buckets. Do nothing. | |
39037602 | 2300 | */ |
cb323159 A |
2301 | } else if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) { |
2302 | uint64_t now; | |
2303 | /* | |
2304 | * Transitioning out of the idle priority bucket. | |
2305 | * Record idle delta. | |
2306 | */ | |
2307 | assert(p->p_memstat_idle_start != 0); | |
2308 | now = mach_absolute_time(); | |
2309 | if (now > p->p_memstat_idle_start) { | |
2310 | p->p_memstat_idle_delta = now - p->p_memstat_idle_start; | |
39037602 A |
2311 | } |
2312 | ||
cb323159 A |
2313 | /* |
2314 | * About to become active and so memory footprint could change. | |
2315 | * So mark it eligible for freeze-considerations next time around. | |
2316 | */ | |
2317 | if (p->p_memstat_state & P_MEMSTAT_FREEZE_IGNORE) { | |
2318 | p->p_memstat_state &= ~P_MEMSTAT_FREEZE_IGNORE; | |
39037602 | 2319 | } |
cb323159 A |
2320 | } else if (priority == JETSAM_PRIORITY_IDLE) { |
2321 | /* | |
2322 | * Transitioning into the idle priority bucket. | |
2323 | * Record idle start. | |
2324 | */ | |
2325 | p->p_memstat_idle_start = mach_absolute_time(); | |
39037602 A |
2326 | } |
2327 | ||
cb323159 | 2328 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CHANGE_PRIORITY), p->p_pid, priority, p->p_memstat_effectivepriority, 0, 0); |
39037602 | 2329 | |
cb323159 | 2330 | p->p_memstat_effectivepriority = priority; |
316670eb | 2331 | |
cb323159 A |
2332 | #if CONFIG_SECLUDED_MEMORY |
2333 | if (secluded_for_apps && | |
2334 | task_could_use_secluded_mem(p->task)) { | |
2335 | task_set_can_use_secluded_mem( | |
2336 | p->task, | |
2337 | (priority >= JETSAM_PRIORITY_FOREGROUND)); | |
39037602 | 2338 | } |
cb323159 | 2339 | #endif /* CONFIG_SECLUDED_MEMORY */ |
39037602 | 2340 | |
cb323159 A |
2341 | memorystatus_check_levels_locked(); |
2342 | } | |
316670eb | 2343 | |
cb323159 A |
2344 | int |
2345 | memorystatus_relaunch_flags_update(proc_t p, int relaunch_flags) | |
2346 | { | |
2347 | p->p_memstat_relaunch_flags = relaunch_flags; | |
2348 | KDBG(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_RELAUNCH_FLAGS), p->p_pid, relaunch_flags, 0, 0, 0); | |
2349 | return 0; | |
316670eb A |
2350 | } |
2351 | ||
cb323159 A |
2352 | /* |
2353 | * | |
2354 | * Description: Update the jetsam priority and memory limit attributes for a given process. | |
2355 | * | |
2356 | * Parameters: | |
2357 | * p init this process's jetsam information. | |
2358 | * priority The jetsam priority band | |
2359 | * user_data user specific data, unused by the kernel | |
2360 | * is_assertion When true, a priority update is driven by an assertion. | |
2361 | * effective guards against race if process's update already occurred | |
2362 | * update_memlimit When true we know this is the init step via the posix_spawn path. | |
2363 | * | |
2364 | * memlimit_active Value in megabytes; The monitored footprint level while the | |
2365 | * process is active. Exceeding it may result in termination | |
2366 | * based on it's associated fatal flag. | |
2367 | * | |
2368 | * memlimit_active_is_fatal When a process is active and exceeds its memory footprint, | |
2369 | * this describes whether or not it should be immediately fatal. | |
2370 | * | |
2371 | * memlimit_inactive Value in megabytes; The monitored footprint level while the | |
2372 | * process is inactive. Exceeding it may result in termination | |
2373 | * based on it's associated fatal flag. | |
2374 | * | |
2375 | * memlimit_inactive_is_fatal When a process is inactive and exceeds its memory footprint, | |
2376 | * this describes whether or not it should be immediatly fatal. | |
2377 | * | |
2378 | * Returns: 0 Success | |
2379 | * non-0 Failure | |
2380 | */ | |
39037602 | 2381 | |
cb323159 A |
2382 | int |
2383 | memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t is_assertion, boolean_t effective, boolean_t update_memlimit, | |
2384 | int32_t memlimit_active, boolean_t memlimit_active_is_fatal, | |
2385 | int32_t memlimit_inactive, boolean_t memlimit_inactive_is_fatal) | |
2386 | { | |
2387 | int ret; | |
2388 | boolean_t head_insert = false; | |
39037602 | 2389 | |
cb323159 | 2390 | MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing (%s) pid %d: priority %d, user_data 0x%llx\n", (*p->p_name ? p->p_name : "unknown"), p->p_pid, priority, user_data); |
39037602 | 2391 | |
cb323159 | 2392 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0); |
39037602 | 2393 | |
cb323159 A |
2394 | if (priority == -1) { |
2395 | /* Use as shorthand for default priority */ | |
2396 | priority = JETSAM_PRIORITY_DEFAULT; | |
2397 | } else if ((priority == system_procs_aging_band) || (priority == applications_aging_band)) { | |
2398 | /* Both the aging bands are reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */ | |
2399 | priority = JETSAM_PRIORITY_IDLE; | |
2400 | } else if (priority == JETSAM_PRIORITY_IDLE_HEAD) { | |
2401 | /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle queue */ | |
2402 | priority = JETSAM_PRIORITY_IDLE; | |
2403 | head_insert = TRUE; | |
2404 | } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) { | |
2405 | /* Sanity check */ | |
2406 | ret = EINVAL; | |
2407 | goto out; | |
fe8ab488 A |
2408 | } |
2409 | ||
cb323159 | 2410 | proc_list_lock(); |
0a7de745 | 2411 | |
cb323159 | 2412 | assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); |
0a7de745 | 2413 | |
cb323159 A |
2414 | if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) { |
2415 | ret = EALREADY; | |
2416 | proc_list_unlock(); | |
2417 | MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", p->p_pid); | |
2418 | goto out; | |
316670eb | 2419 | } |
0a7de745 | 2420 | |
cb323159 A |
2421 | if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) || ((p->p_listflag & P_LIST_EXITED) != 0)) { |
2422 | /* | |
2423 | * This could happen when a process calling posix_spawn() is exiting on the jetsam thread. | |
2424 | */ | |
2425 | ret = EBUSY; | |
2426 | proc_list_unlock(); | |
2427 | goto out; | |
fe8ab488 A |
2428 | } |
2429 | ||
cb323159 A |
2430 | p->p_memstat_state |= P_MEMSTAT_PRIORITYUPDATED; |
2431 | p->p_memstat_userdata = user_data; | |
316670eb | 2432 | |
cb323159 A |
2433 | if (is_assertion) { |
2434 | if (priority == JETSAM_PRIORITY_IDLE) { | |
2435 | /* | |
2436 | * Assertions relinquish control when the process is heading to IDLE. | |
2437 | */ | |
2438 | if (p->p_memstat_state & P_MEMSTAT_PRIORITY_ASSERTION) { | |
2439 | /* | |
2440 | * Mark the process as no longer being managed by assertions. | |
2441 | */ | |
2442 | p->p_memstat_state &= ~P_MEMSTAT_PRIORITY_ASSERTION; | |
2443 | } else { | |
2444 | /* | |
2445 | * Ignore an idle priority transition if the process is not | |
2446 | * already managed by assertions. We won't treat this as | |
2447 | * an error, but we will log the unexpected behavior and bail. | |
2448 | */ | |
2449 | os_log(OS_LOG_DEFAULT, "memorystatus: Ignore assertion driven idle priority. Process not previously controlled %s:%d\n", | |
2450 | (*p->p_name ? p->p_name : "unknown"), p->p_pid); | |
2451 | ||
2452 | ret = 0; | |
2453 | proc_list_unlock(); | |
2454 | goto out; | |
2455 | } | |
2456 | } else { | |
2457 | /* | |
2458 | * Process is now being managed by assertions, | |
2459 | */ | |
2460 | p->p_memstat_state |= P_MEMSTAT_PRIORITY_ASSERTION; | |
0a7de745 | 2461 | } |
0a7de745 | 2462 | |
cb323159 | 2463 | /* Always update the assertion priority in this path */ |
39037602 | 2464 | |
cb323159 | 2465 | p->p_memstat_assertionpriority = priority; |
39037602 | 2466 | |
cb323159 | 2467 | int memstat_dirty_flags = memorystatus_dirty_get(p, TRUE); /* proc_list_lock is held */ |
39037602 | 2468 | |
cb323159 A |
2469 | if (memstat_dirty_flags != 0) { |
2470 | /* | |
2471 | * Calculate maximum priority only when dirty tracking processes are involved. | |
2472 | */ | |
2473 | int maxpriority; | |
2474 | if (memstat_dirty_flags & PROC_DIRTY_IS_DIRTY) { | |
2475 | maxpriority = MAX(p->p_memstat_assertionpriority, p->p_memstat_requestedpriority); | |
39037602 | 2476 | } else { |
cb323159 | 2477 | /* clean */ |
39037602 | 2478 | |
cb323159 A |
2479 | if (memstat_dirty_flags & PROC_DIRTY_ALLOWS_IDLE_EXIT) { |
2480 | /* | |
2481 | * The aging policy must be evaluated and applied here because runnningboardd | |
2482 | * has relinquished its hold on the jetsam priority by attempting to move a | |
2483 | * clean process to the idle band. | |
2484 | */ | |
39037602 | 2485 | |
cb323159 A |
2486 | int newpriority = JETSAM_PRIORITY_IDLE; |
2487 | if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED | P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) { | |
2488 | newpriority = (p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) ? system_procs_aging_band : JETSAM_PRIORITY_IDLE; | |
2489 | } | |
2490 | ||
2491 | maxpriority = MAX(p->p_memstat_assertionpriority, newpriority ); | |
2492 | ||
2493 | if (newpriority == system_procs_aging_band) { | |
2494 | memorystatus_schedule_idle_demotion_locked(p, FALSE); | |
2495 | } | |
2496 | } else { | |
2497 | /* | |
2498 | * Preserves requestedpriority when the process does not support pressured exit. | |
2499 | */ | |
2500 | maxpriority = MAX(p->p_memstat_assertionpriority, p->p_memstat_requestedpriority); | |
2501 | } | |
39037602 | 2502 | } |
cb323159 | 2503 | priority = maxpriority; |
39236c6e | 2504 | } |
cb323159 A |
2505 | } else { |
2506 | p->p_memstat_requestedpriority = priority; | |
0a7de745 | 2507 | } |
316670eb | 2508 | |
cb323159 A |
2509 | if (update_memlimit) { |
2510 | boolean_t is_fatal; | |
2511 | boolean_t use_active; | |
0a7de745 | 2512 | |
cb323159 A |
2513 | /* |
2514 | * Posix_spawn'd processes come through this path to instantiate ledger limits. | |
2515 | * Forked processes do not come through this path, so no ledger limits exist. | |
2516 | * (That's why forked processes can consume unlimited memory.) | |
2517 | */ | |
0a7de745 | 2518 | |
cb323159 A |
2519 | MEMORYSTATUS_DEBUG(3, "memorystatus_update(enter): pid %d, priority %d, dirty=0x%x, Active(%dMB %s), Inactive(%dMB, %s)\n", |
2520 | p->p_pid, priority, p->p_memstat_dirty, | |
2521 | memlimit_active, (memlimit_active_is_fatal ? "F " : "NF"), | |
2522 | memlimit_inactive, (memlimit_inactive_is_fatal ? "F " : "NF")); | |
39037602 | 2523 | |
cb323159 A |
2524 | if (memlimit_active <= 0) { |
2525 | /* | |
2526 | * This process will have a system_wide task limit when active. | |
2527 | * System_wide task limit is always fatal. | |
2528 | * It's quite common to see non-fatal flag passed in here. | |
2529 | * It's not an error, we just ignore it. | |
2530 | */ | |
39037602 | 2531 | |
cb323159 A |
2532 | /* |
2533 | * For backward compatibility with some unexplained launchd behavior, | |
2534 | * we allow a zero sized limit. But we still enforce system_wide limit | |
2535 | * when written to the ledgers. | |
2536 | */ | |
39037602 | 2537 | |
cb323159 A |
2538 | if (memlimit_active < 0) { |
2539 | memlimit_active = -1; /* enforces system_wide task limit */ | |
2540 | } | |
2541 | memlimit_active_is_fatal = TRUE; | |
2542 | } | |
0a7de745 | 2543 | |
cb323159 A |
2544 | if (memlimit_inactive <= 0) { |
2545 | /* | |
2546 | * This process will have a system_wide task limit when inactive. | |
2547 | * System_wide task limit is always fatal. | |
2548 | */ | |
2549 | ||
2550 | memlimit_inactive = -1; | |
2551 | memlimit_inactive_is_fatal = TRUE; | |
2552 | } | |
0a7de745 | 2553 | |
39037602 | 2554 | /* |
cb323159 | 2555 | * Initialize the active limit variants for this process. |
39037602 | 2556 | */ |
cb323159 | 2557 | SET_ACTIVE_LIMITS_LOCKED(p, memlimit_active, memlimit_active_is_fatal); |
316670eb | 2558 | |
cb323159 A |
2559 | /* |
2560 | * Initialize the inactive limit variants for this process. | |
2561 | */ | |
2562 | SET_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive, memlimit_inactive_is_fatal); | |
316670eb | 2563 | |
cb323159 A |
2564 | /* |
2565 | * Initialize the cached limits for target process. | |
2566 | * When the target process is dirty tracked, it's typically | |
2567 | * in a clean state. Non dirty tracked processes are | |
2568 | * typically active (Foreground or above). | |
2569 | * But just in case, we don't make assumptions... | |
2570 | */ | |
0a7de745 | 2571 | |
cb323159 A |
2572 | if (proc_jetsam_state_is_active_locked(p) == TRUE) { |
2573 | CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal); | |
2574 | use_active = TRUE; | |
2575 | } else { | |
2576 | CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal); | |
2577 | use_active = FALSE; | |
2578 | } | |
2579 | ||
2580 | /* | |
2581 | * Enforce the cached limit by writing to the ledger. | |
2582 | */ | |
2583 | if (memorystatus_highwater_enabled) { | |
2584 | /* apply now */ | |
2585 | task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, use_active, is_fatal); | |
2586 | ||
2587 | MEMORYSTATUS_DEBUG(3, "memorystatus_update: init: limit on pid %d (%dMB %s) targeting priority(%d) dirty?=0x%x %s\n", | |
2588 | p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), | |
2589 | (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), priority, p->p_memstat_dirty, | |
2590 | (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); | |
2591 | } | |
0a7de745 A |
2592 | } |
2593 | ||
cb323159 A |
2594 | /* |
2595 | * We can't add to the aging bands buckets here. | |
2596 | * But, we could be removing it from those buckets. | |
2597 | * Check and take appropriate steps if so. | |
2598 | */ | |
2599 | ||
2600 | if (isProcessInAgingBands(p)) { | |
2601 | if ((jetsam_aging_policy != kJetsamAgingPolicyLegacy) && isApp(p) && (priority > applications_aging_band)) { | |
2602 | /* | |
2603 | * Runningboardd is pulling up an application that is in the aging band. | |
2604 | * We reset the app's state here so that it'll get a fresh stay in the | |
2605 | * aging band on the way back. | |
2606 | * | |
2607 | * We always handled the app 'aging' in the memorystatus_update_priority_locked() | |
2608 | * function. Daemons used to be handled via the dirty 'set/clear/track' path. | |
2609 | * But with extensions (daemon-app hybrid), runningboardd is now going through | |
2610 | * this routine for daemons too and things have gotten a bit tangled. This should | |
2611 | * be simplified/untangled at some point and might require some assistance from | |
2612 | * runningboardd. | |
2613 | */ | |
2614 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
2615 | } else { | |
2616 | memorystatus_invalidate_idle_demotion_locked(p, FALSE); | |
2617 | } | |
2618 | memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, FALSE, TRUE); | |
2619 | } else { | |
2620 | if (jetsam_aging_policy == kJetsamAgingPolicyLegacy && priority == JETSAM_PRIORITY_IDLE) { | |
2621 | /* | |
2622 | * Daemons with 'inactive' limits will go through the dirty tracking codepath. | |
2623 | * This path deals with apps that may have 'inactive' limits e.g. WebContent processes. | |
2624 | * If this is the legacy aging policy we explicitly need to apply those limits. If it | |
2625 | * is any other aging policy, then we don't need to worry because all processes | |
2626 | * will go through the aging bands and then the demotion thread will take care to | |
2627 | * move them into the IDLE band and apply the required limits. | |
2628 | */ | |
2629 | memorystatus_update_priority_locked(p, priority, head_insert, TRUE); | |
2630 | } | |
2631 | } | |
2632 | ||
2633 | memorystatus_update_priority_locked(p, priority, head_insert, FALSE); | |
2634 | ||
2635 | proc_list_unlock(); | |
2636 | ret = 0; | |
2637 | ||
2638 | out: | |
2639 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_END, ret, 0, 0, 0, 0); | |
2640 | ||
2641 | return ret; | |
39236c6e | 2642 | } |
316670eb | 2643 | |
cb323159 A |
2644 | int |
2645 | memorystatus_remove(proc_t p) | |
39236c6e | 2646 | { |
cb323159 A |
2647 | int ret; |
2648 | memstat_bucket_t *bucket; | |
2649 | boolean_t reschedule = FALSE; | |
0a7de745 | 2650 | |
cb323159 | 2651 | MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing pid %d\n", p->p_pid); |
0a7de745 | 2652 | |
cb323159 A |
2653 | /* |
2654 | * Check if this proc is locked (because we're performing a freeze). | |
2655 | * If so, we fail and instruct the caller to try again later. | |
2656 | */ | |
2657 | if (p->p_memstat_state & P_MEMSTAT_LOCKED) { | |
2658 | return EAGAIN; | |
316670eb | 2659 | } |
39037602 | 2660 | |
cb323159 | 2661 | assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); |
39037602 | 2662 | |
cb323159 | 2663 | bucket = &memstat_bucket[p->p_memstat_effectivepriority]; |
39037602 | 2664 | |
cb323159 A |
2665 | if (isSysProc(p) && system_procs_aging_band && (p->p_memstat_effectivepriority == system_procs_aging_band)) { |
2666 | assert(bucket->count == memorystatus_scheduled_idle_demotions_sysprocs); | |
2667 | reschedule = TRUE; | |
2668 | } else if (isApp(p) && applications_aging_band && (p->p_memstat_effectivepriority == applications_aging_band)) { | |
2669 | assert(bucket->count == memorystatus_scheduled_idle_demotions_apps); | |
2670 | reschedule = TRUE; | |
0a7de745 | 2671 | } |
316670eb | 2672 | |
cb323159 A |
2673 | /* |
2674 | * Record idle delta | |
2675 | */ | |
39037602 | 2676 | |
cb323159 A |
2677 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) { |
2678 | uint64_t now = mach_absolute_time(); | |
2679 | if (now > p->p_memstat_idle_start) { | |
2680 | p->p_memstat_idle_delta = now - p->p_memstat_idle_start; | |
2681 | } | |
2682 | } | |
39037602 | 2683 | |
cb323159 A |
2684 | TAILQ_REMOVE(&bucket->list, p, p_memstat_list); |
2685 | bucket->count--; | |
2686 | if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) { | |
2687 | bucket->relaunch_high_count--; | |
2688 | } | |
0a7de745 | 2689 | |
cb323159 | 2690 | memorystatus_list_count--; |
39037602 | 2691 | |
cb323159 A |
2692 | /* If awaiting demotion to the idle band, clean up */ |
2693 | if (reschedule) { | |
2694 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
2695 | memorystatus_reschedule_idle_demotion_locked(); | |
39037602 A |
2696 | } |
2697 | ||
cb323159 A |
2698 | memorystatus_check_levels_locked(); |
2699 | ||
2700 | #if CONFIG_FREEZE | |
2701 | if (p->p_memstat_state & (P_MEMSTAT_FROZEN)) { | |
2702 | if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) { | |
2703 | p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE; | |
2704 | memorystatus_refreeze_eligible_count--; | |
2705 | } | |
2706 | ||
2707 | memorystatus_frozen_count--; | |
2708 | memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages; | |
2709 | p->p_memstat_freeze_sharedanon_pages = 0; | |
fe8ab488 A |
2710 | } |
2711 | ||
cb323159 A |
2712 | if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { |
2713 | memorystatus_suspended_count--; | |
2714 | } | |
2715 | #endif | |
39037602 | 2716 | |
f427ee49 | 2717 | #if DEVELOPMENT || DEBUG |
c3c9b80d A |
2718 | if (p->p_pid == memorystatus_testing_pid) { |
2719 | memorystatus_testing_pid = 0; | |
f427ee49 A |
2720 | } |
2721 | #endif /* DEVELOPMENT || DEBUG */ | |
2722 | ||
cb323159 A |
2723 | if (p) { |
2724 | ret = 0; | |
0a7de745 | 2725 | } else { |
cb323159 | 2726 | ret = ESRCH; |
0a7de745 | 2727 | } |
3e170ce0 | 2728 | |
cb323159 A |
2729 | return ret; |
2730 | } | |
fe8ab488 | 2731 | |
cb323159 A |
2732 | /* |
2733 | * Validate dirty tracking flags with process state. | |
2734 | * | |
2735 | * Return: | |
2736 | * 0 on success | |
2737 | * non-0 on failure | |
2738 | * | |
2739 | * The proc_list_lock is held by the caller. | |
2740 | */ | |
fe8ab488 | 2741 | |
cb323159 A |
2742 | static int |
2743 | memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) | |
2744 | { | |
2745 | /* See that the process isn't marked for termination */ | |
2746 | if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) { | |
2747 | return EBUSY; | |
2748 | } | |
3e170ce0 | 2749 | |
cb323159 A |
2750 | /* Idle exit requires that process be tracked */ |
2751 | if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) && | |
2752 | !(pcontrol & PROC_DIRTY_TRACK)) { | |
2753 | return EINVAL; | |
2754 | } | |
3e170ce0 | 2755 | |
cb323159 A |
2756 | /* 'Launch in progress' tracking requires that process have enabled dirty tracking too. */ |
2757 | if ((pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) && | |
2758 | !(pcontrol & PROC_DIRTY_TRACK)) { | |
2759 | return EINVAL; | |
39236c6e | 2760 | } |
3e170ce0 | 2761 | |
cb323159 A |
2762 | /* Only one type of DEFER behavior is allowed.*/ |
2763 | if ((pcontrol & PROC_DIRTY_DEFER) && | |
2764 | (pcontrol & PROC_DIRTY_DEFER_ALWAYS)) { | |
2765 | return EINVAL; | |
2766 | } | |
2767 | ||
2768 | /* Deferral is only relevant if idle exit is specified */ | |
2769 | if (((pcontrol & PROC_DIRTY_DEFER) || | |
2770 | (pcontrol & PROC_DIRTY_DEFER_ALWAYS)) && | |
2771 | !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) { | |
2772 | return EINVAL; | |
2773 | } | |
2774 | ||
2775 | return 0; | |
2776 | } | |
2777 | ||
2778 | static void | |
2779 | memorystatus_update_idle_priority_locked(proc_t p) | |
2780 | { | |
2781 | int32_t priority; | |
2782 | ||
2783 | MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty); | |
2784 | ||
2785 | assert(isSysProc(p)); | |
2786 | ||
2787 | if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED | P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) { | |
2788 | priority = (p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) ? system_procs_aging_band : JETSAM_PRIORITY_IDLE; | |
2789 | } else { | |
2790 | priority = p->p_memstat_requestedpriority; | |
2791 | } | |
d9a64523 | 2792 | |
cb323159 | 2793 | if (p->p_memstat_state & P_MEMSTAT_PRIORITY_ASSERTION) { |
d9a64523 | 2794 | /* |
cb323159 A |
2795 | * This process has a jetsam priority managed by an assertion. |
2796 | * Policy is to choose the max priority. | |
d9a64523 | 2797 | */ |
cb323159 A |
2798 | if (p->p_memstat_assertionpriority > priority) { |
2799 | os_log(OS_LOG_DEFAULT, "memorystatus: assertion priority %d overrides priority %d for %s:%d\n", | |
2800 | p->p_memstat_assertionpriority, priority, | |
2801 | (*p->p_name ? p->p_name : "unknown"), p->p_pid); | |
2802 | priority = p->p_memstat_assertionpriority; | |
d9a64523 | 2803 | } |
39037602 A |
2804 | } |
2805 | ||
cb323159 A |
2806 | if (priority != p->p_memstat_effectivepriority) { |
2807 | if ((jetsam_aging_policy == kJetsamAgingPolicyLegacy) && | |
2808 | (priority == JETSAM_PRIORITY_IDLE)) { | |
2809 | /* | |
2810 | * This process is on its way into the IDLE band. The system is | |
2811 | * using 'legacy' jetsam aging policy. That means, this process | |
2812 | * has already used up its idle-deferral aging time that is given | |
2813 | * once per its lifetime. So we need to set the INACTIVE limits | |
2814 | * explicitly because it won't be going through the demotion paths | |
2815 | * that take care to apply the limits appropriately. | |
2816 | */ | |
d9a64523 | 2817 | |
cb323159 A |
2818 | if (p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) { |
2819 | /* | |
2820 | * This process has the 'elevated inactive jetsam band' attribute. | |
2821 | * So, there will be no trip to IDLE after all. | |
2822 | * Instead, we pin the process in the elevated band, | |
2823 | * where its ACTIVE limits will apply. | |
2824 | */ | |
39037602 | 2825 | |
cb323159 A |
2826 | priority = JETSAM_PRIORITY_ELEVATED_INACTIVE; |
2827 | } | |
0a7de745 | 2828 | |
cb323159 A |
2829 | memorystatus_update_priority_locked(p, priority, false, true); |
2830 | } else { | |
2831 | memorystatus_update_priority_locked(p, priority, false, false); | |
2832 | } | |
2833 | } | |
316670eb A |
2834 | } |
2835 | ||
3e170ce0 | 2836 | /* |
cb323159 A |
2837 | * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle |
2838 | * (clean). They may also indicate that they support termination when idle, with the result that they are promoted | |
2839 | * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low | |
2840 | * priority idle band when clean (and killed earlier, protecting higher priority procesess). | |
3e170ce0 | 2841 | * |
cb323159 A |
2842 | * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by |
2843 | * memorystatus_sysprocs_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band | |
2844 | * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to | |
2845 | * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle | |
2846 | * band. The deferral can be cleared early by clearing the appropriate flag. | |
3e170ce0 | 2847 | * |
cb323159 A |
2848 | * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process |
2849 | * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be | |
2850 | * re-enabled or the guard state cleared, depending on whether the guard deadline has passed. | |
3e170ce0 A |
2851 | */ |
2852 | ||
39236c6e | 2853 | int |
cb323159 | 2854 | memorystatus_dirty_track(proc_t p, uint32_t pcontrol) |
316670eb | 2855 | { |
cb323159 A |
2856 | unsigned int old_dirty; |
2857 | boolean_t reschedule = FALSE; | |
2858 | boolean_t already_deferred = FALSE; | |
2859 | boolean_t defer_now = FALSE; | |
2860 | int ret = 0; | |
0a7de745 | 2861 | |
cb323159 A |
2862 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_TRACK), |
2863 | p->p_pid, p->p_memstat_dirty, pcontrol, 0, 0); | |
3e170ce0 | 2864 | |
39236c6e | 2865 | proc_list_lock(); |
0a7de745 | 2866 | |
cb323159 | 2867 | if ((p->p_listflag & P_LIST_EXITED) != 0) { |
fe8ab488 | 2868 | /* |
cb323159 | 2869 | * Process is on its way out. |
fe8ab488 A |
2870 | */ |
2871 | ret = EBUSY; | |
cb323159 | 2872 | goto exit; |
316670eb A |
2873 | } |
2874 | ||
cb323159 A |
2875 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { |
2876 | ret = EPERM; | |
2877 | goto exit; | |
2878 | } | |
3e170ce0 | 2879 | |
cb323159 A |
2880 | if ((ret = memorystatus_validate_track_flags(p, pcontrol)) != 0) { |
2881 | /* error */ | |
2882 | goto exit; | |
2883 | } | |
3e170ce0 | 2884 | |
cb323159 | 2885 | old_dirty = p->p_memstat_dirty; |
3e170ce0 | 2886 | |
cb323159 A |
2887 | /* These bits are cumulative, as per <rdar://problem/11159924> */ |
2888 | if (pcontrol & PROC_DIRTY_TRACK) { | |
2889 | p->p_memstat_dirty |= P_DIRTY_TRACK; | |
2890 | } | |
3e170ce0 | 2891 | |
cb323159 A |
2892 | if (pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) { |
2893 | p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT; | |
2894 | } | |
3e170ce0 | 2895 | |
cb323159 A |
2896 | if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) { |
2897 | p->p_memstat_dirty |= P_DIRTY_LAUNCH_IN_PROGRESS; | |
2898 | } | |
3e170ce0 | 2899 | |
cb323159 A |
2900 | if (old_dirty & P_DIRTY_AGING_IN_PROGRESS) { |
2901 | already_deferred = TRUE; | |
2902 | } | |
3e170ce0 | 2903 | |
cb323159 A |
2904 | |
2905 | /* This can be set and cleared exactly once. */ | |
2906 | if (pcontrol & (PROC_DIRTY_DEFER | PROC_DIRTY_DEFER_ALWAYS)) { | |
2907 | if ((pcontrol & (PROC_DIRTY_DEFER)) && | |
2908 | !(old_dirty & P_DIRTY_DEFER)) { | |
2909 | p->p_memstat_dirty |= P_DIRTY_DEFER; | |
fe8ab488 | 2910 | } |
316670eb | 2911 | |
cb323159 A |
2912 | if ((pcontrol & (PROC_DIRTY_DEFER_ALWAYS)) && |
2913 | !(old_dirty & P_DIRTY_DEFER_ALWAYS)) { | |
2914 | p->p_memstat_dirty |= P_DIRTY_DEFER_ALWAYS; | |
2915 | } | |
3e170ce0 | 2916 | |
cb323159 A |
2917 | defer_now = TRUE; |
2918 | } | |
3e170ce0 | 2919 | |
cb323159 A |
2920 | MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for pid %d\n", |
2921 | ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N", | |
2922 | defer_now ? "Y" : "N", | |
2923 | p->p_memstat_dirty & P_DIRTY ? "Y" : "N", | |
2924 | p->p_pid); | |
3e170ce0 | 2925 | |
cb323159 A |
2926 | /* Kick off or invalidate the idle exit deferment if there's a state transition. */ |
2927 | if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) { | |
2928 | if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) { | |
2929 | if (defer_now && !already_deferred) { | |
2930 | /* | |
2931 | * Request to defer a clean process that's idle-exit enabled | |
2932 | * and not already in the jetsam deferred band. Most likely a | |
2933 | * new launch. | |
2934 | */ | |
2935 | memorystatus_schedule_idle_demotion_locked(p, TRUE); | |
2936 | reschedule = TRUE; | |
2937 | } else if (!defer_now) { | |
2938 | /* | |
2939 | * The process isn't asking for the 'aging' facility. | |
2940 | * Could be that it is: | |
2941 | */ | |
2942 | ||
2943 | if (already_deferred) { | |
2944 | /* | |
2945 | * already in the aging bands. Traditionally, | |
2946 | * some processes have tried to use this to | |
2947 | * opt out of the 'aging' facility. | |
2948 | */ | |
2949 | ||
2950 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
2951 | } else { | |
2952 | /* | |
2953 | * agnostic to the 'aging' facility. In that case, | |
2954 | * we'll go ahead and opt it in because this is likely | |
2955 | * a new launch (clean process, dirty tracking enabled) | |
2956 | */ | |
2957 | ||
2958 | memorystatus_schedule_idle_demotion_locked(p, TRUE); | |
2959 | } | |
3e170ce0 | 2960 | |
cb323159 A |
2961 | reschedule = TRUE; |
2962 | } | |
2963 | } | |
2964 | } else { | |
3e170ce0 | 2965 | /* |
cb323159 A |
2966 | * We are trying to operate on a dirty process. Dirty processes have to |
2967 | * be removed from the deferred band. The question is do we reset the | |
2968 | * deferred state or not? | |
2969 | * | |
2970 | * This could be a legal request like: | |
2971 | * - this process had opted into the 'aging' band | |
2972 | * - but it's now dirty and requests to opt out. | |
2973 | * In this case, we remove the process from the band and reset its | |
2974 | * state too. It'll opt back in properly when needed. | |
2975 | * | |
2976 | * OR, this request could be a user-space bug. E.g.: | |
2977 | * - this process had opted into the 'aging' band when clean | |
2978 | * - and, then issues another request to again put it into the band except | |
2979 | * this time the process is dirty. | |
2980 | * The process going dirty, as a transition in memorystatus_dirty_set(), will pull the process out of | |
2981 | * the deferred band with its state intact. So our request below is no-op. | |
2982 | * But we do it here anyways for coverage. | |
2983 | * | |
2984 | * memorystatus_update_idle_priority_locked() | |
2985 | * single-mindedly treats a dirty process as "cannot be in the aging band". | |
3e170ce0 | 2986 | */ |
3e170ce0 | 2987 | |
cb323159 A |
2988 | if (!defer_now && already_deferred) { |
2989 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
2990 | reschedule = TRUE; | |
2991 | } else { | |
2992 | boolean_t reset_state = (jetsam_aging_policy != kJetsamAgingPolicyLegacy) ? TRUE : FALSE; | |
2993 | ||
2994 | memorystatus_invalidate_idle_demotion_locked(p, reset_state); | |
2995 | reschedule = TRUE; | |
3e170ce0 A |
2996 | } |
2997 | } | |
3e170ce0 | 2998 | |
cb323159 | 2999 | memorystatus_update_idle_priority_locked(p); |
0a7de745 | 3000 | |
cb323159 A |
3001 | if (reschedule) { |
3002 | memorystatus_reschedule_idle_demotion_locked(); | |
fe8ab488 | 3003 | } |
39037602 | 3004 | |
39236c6e | 3005 | ret = 0; |
316670eb | 3006 | |
cb323159 A |
3007 | exit: |
3008 | proc_list_unlock(); | |
39236c6e | 3009 | |
316670eb A |
3010 | return ret; |
3011 | } | |
3012 | ||
39236c6e | 3013 | int |
cb323159 | 3014 | memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) |
316670eb | 3015 | { |
39236c6e | 3016 | int ret; |
cb323159 A |
3017 | boolean_t kill = false; |
3018 | boolean_t reschedule = FALSE; | |
3019 | boolean_t was_dirty = FALSE; | |
3020 | boolean_t now_dirty = FALSE; | |
f427ee49 A |
3021 | #if CONFIG_DIRTYSTATUS_TRACKING |
3022 | boolean_t notify_change = FALSE; | |
3023 | dirty_status_change_event_t change_event; | |
3024 | #endif | |
316670eb | 3025 | |
cb323159 A |
3026 | MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty); |
3027 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_SET), p->p_pid, self, pcontrol, 0, 0); | |
316670eb | 3028 | |
cb323159 | 3029 | proc_list_lock(); |
0a7de745 | 3030 | |
cb323159 A |
3031 | if ((p->p_listflag & P_LIST_EXITED) != 0) { |
3032 | /* | |
3033 | * Process is on its way out. | |
3034 | */ | |
3035 | ret = EBUSY; | |
3036 | goto exit; | |
3037 | } | |
39037602 | 3038 | |
cb323159 A |
3039 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { |
3040 | ret = EPERM; | |
3041 | goto exit; | |
39037602 A |
3042 | } |
3043 | ||
cb323159 A |
3044 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { |
3045 | was_dirty = TRUE; | |
3046 | } | |
39037602 | 3047 | |
cb323159 A |
3048 | if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { |
3049 | /* Dirty tracking not enabled */ | |
3050 | ret = EINVAL; | |
3051 | } else if (pcontrol && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { | |
3052 | /* | |
3053 | * Process is set to be terminated and we're attempting to mark it dirty. | |
3054 | * Set for termination and marking as clean is OK - see <rdar://problem/10594349>. | |
3055 | */ | |
3056 | ret = EBUSY; | |
3057 | } else { | |
3058 | int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN; | |
3059 | if (pcontrol && !(p->p_memstat_dirty & flag)) { | |
3060 | /* Mark the process as having been dirtied at some point */ | |
3061 | p->p_memstat_dirty |= (flag | P_DIRTY_MARKED); | |
3062 | memorystatus_dirty_count++; | |
3063 | ret = 0; | |
3064 | } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) { | |
3065 | if ((flag == P_DIRTY_SHUTDOWN) && (!(p->p_memstat_dirty & P_DIRTY))) { | |
3066 | /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */ | |
3067 | p->p_memstat_dirty |= P_DIRTY_TERMINATED; | |
3068 | kill = true; | |
3069 | } else if ((flag == P_DIRTY) && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { | |
3070 | /* Kill previously terminated processes if set clean */ | |
3071 | kill = true; | |
3072 | } | |
3073 | p->p_memstat_dirty &= ~flag; | |
3074 | memorystatus_dirty_count--; | |
3075 | ret = 0; | |
3076 | } else { | |
3077 | /* Already set */ | |
3078 | ret = EALREADY; | |
39037602 | 3079 | } |
fe8ab488 A |
3080 | } |
3081 | ||
cb323159 A |
3082 | if (ret != 0) { |
3083 | goto exit; | |
3084 | } | |
fe8ab488 | 3085 | |
cb323159 A |
3086 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { |
3087 | now_dirty = TRUE; | |
3088 | } | |
39037602 | 3089 | |
cb323159 A |
3090 | if ((was_dirty == TRUE && now_dirty == FALSE) || |
3091 | (was_dirty == FALSE && now_dirty == TRUE)) { | |
f427ee49 A |
3092 | #if CONFIG_DIRTYSTATUS_TRACKING |
3093 | if (dirtystatus_tracking_enabled) { | |
3094 | uint32_t pages = 0; | |
3095 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL); | |
3096 | change_event.dsc_pid = p->p_pid; | |
3097 | change_event.dsc_event_type = (now_dirty == TRUE) ? kDirtyStatusChangedDirty : kDirtyStatusChangedClean; | |
3098 | change_event.dsc_time = mach_absolute_time(); | |
3099 | change_event.dsc_pages = pages; | |
3100 | change_event.dsc_priority = p->p_memstat_effectivepriority; | |
3101 | strlcpy(&change_event.dsc_process_name[0], p->p_name, sizeof(change_event.dsc_process_name)); | |
3102 | notify_change = TRUE; | |
3103 | } | |
3104 | #endif | |
3105 | ||
cb323159 A |
3106 | /* Manage idle exit deferral, if applied */ |
3107 | if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) { | |
3108 | /* | |
3109 | * Legacy mode: P_DIRTY_AGING_IN_PROGRESS means the process is in the aging band OR it might be heading back | |
3110 | * there once it's clean again. For the legacy case, this only applies if it has some protection window left. | |
3111 | * P_DIRTY_DEFER: one-time protection window given at launch | |
3112 | * P_DIRTY_DEFER_ALWAYS: protection window given for every dirty->clean transition. Like non-legacy mode. | |
3113 | * | |
3114 | * Non-Legacy mode: P_DIRTY_AGING_IN_PROGRESS means the process is in the aging band. It will always stop over | |
3115 | * in that band on it's way to IDLE. | |
3116 | */ | |
39037602 | 3117 | |
cb323159 A |
3118 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { |
3119 | /* | |
3120 | * New dirty process i.e. "was_dirty == FALSE && now_dirty == TRUE" | |
3121 | * | |
3122 | * The process will move from its aging band to its higher requested | |
3123 | * jetsam band. | |
3124 | */ | |
3125 | boolean_t reset_state = (jetsam_aging_policy != kJetsamAgingPolicyLegacy) ? TRUE : FALSE; | |
39037602 | 3126 | |
cb323159 A |
3127 | memorystatus_invalidate_idle_demotion_locked(p, reset_state); |
3128 | reschedule = TRUE; | |
3129 | } else { | |
3130 | /* | |
3131 | * Process is back from "dirty" to "clean". | |
3132 | */ | |
39037602 | 3133 | |
cb323159 A |
3134 | if (jetsam_aging_policy == kJetsamAgingPolicyLegacy) { |
3135 | if (((p->p_memstat_dirty & P_DIRTY_DEFER_ALWAYS) == FALSE) && | |
3136 | (mach_absolute_time() >= p->p_memstat_idledeadline)) { | |
3137 | /* | |
3138 | * The process' hasn't enrolled in the "always defer after dirty" | |
3139 | * mode and its deadline has expired. It currently | |
3140 | * does not reside in any of the aging buckets. | |
3141 | * | |
3142 | * It's on its way to the JETSAM_PRIORITY_IDLE | |
3143 | * bucket via memorystatus_update_idle_priority_locked() | |
3144 | * below. | |
3145 | * | |
3146 | * So all we need to do is reset all the state on the | |
3147 | * process that's related to the aging bucket i.e. | |
3148 | * the AGING_IN_PROGRESS flag and the timer deadline. | |
3149 | */ | |
39037602 | 3150 | |
cb323159 A |
3151 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); |
3152 | reschedule = TRUE; | |
3153 | } else { | |
3154 | /* | |
3155 | * Process enrolled in "always stop in deferral band after dirty" OR | |
3156 | * it still has some protection window left and so | |
3157 | * we just re-arm the timer without modifying any | |
3158 | * state on the process iff it still wants into that band. | |
3159 | */ | |
39037602 | 3160 | |
cb323159 A |
3161 | if (p->p_memstat_dirty & P_DIRTY_DEFER_ALWAYS) { |
3162 | memorystatus_schedule_idle_demotion_locked(p, TRUE); | |
3163 | reschedule = TRUE; | |
3164 | } else if (p->p_memstat_dirty & P_DIRTY_AGING_IN_PROGRESS) { | |
3165 | memorystatus_schedule_idle_demotion_locked(p, FALSE); | |
3166 | reschedule = TRUE; | |
3167 | } | |
3168 | } | |
3169 | } else { | |
3170 | memorystatus_schedule_idle_demotion_locked(p, TRUE); | |
3171 | reschedule = TRUE; | |
3172 | } | |
3173 | } | |
3174 | } | |
fe8ab488 | 3175 | |
cb323159 | 3176 | memorystatus_update_idle_priority_locked(p); |
316670eb | 3177 | |
cb323159 A |
3178 | if (memorystatus_highwater_enabled) { |
3179 | boolean_t ledger_update_needed = TRUE; | |
3180 | boolean_t use_active; | |
3181 | boolean_t is_fatal; | |
3182 | /* | |
3183 | * We are in this path because this process transitioned between | |
3184 | * dirty <--> clean state. Update the cached memory limits. | |
3185 | */ | |
39037602 | 3186 | |
cb323159 A |
3187 | if (proc_jetsam_state_is_active_locked(p) == TRUE) { |
3188 | /* | |
3189 | * process is pinned in elevated band | |
3190 | * or | |
3191 | * process is dirty | |
3192 | */ | |
3193 | CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal); | |
3194 | use_active = TRUE; | |
3195 | ledger_update_needed = TRUE; | |
3196 | } else { | |
3197 | /* | |
3198 | * process is clean...but if it has opted into pressured-exit | |
3199 | * we don't apply the INACTIVE limit till the process has aged | |
3200 | * out and is entering the IDLE band. | |
3201 | * See memorystatus_update_priority_locked() for that. | |
3202 | */ | |
39037602 | 3203 | |
cb323159 A |
3204 | if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) { |
3205 | ledger_update_needed = FALSE; | |
3206 | } else { | |
3207 | CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal); | |
3208 | use_active = FALSE; | |
3209 | ledger_update_needed = TRUE; | |
3210 | } | |
3211 | } | |
d9a64523 | 3212 | |
cb323159 A |
3213 | /* |
3214 | * Enforce the new limits by writing to the ledger. | |
3215 | * | |
3216 | * This is a hot path and holding the proc_list_lock while writing to the ledgers, | |
3217 | * (where the task lock is taken) is bad. So, we temporarily drop the proc_list_lock. | |
3218 | * We aren't traversing the jetsam bucket list here, so we should be safe. | |
3219 | * See rdar://21394491. | |
3220 | */ | |
39037602 | 3221 | |
cb323159 A |
3222 | if (ledger_update_needed && proc_ref_locked(p) == p) { |
3223 | int ledger_limit; | |
3224 | if (p->p_memstat_memlimit > 0) { | |
3225 | ledger_limit = p->p_memstat_memlimit; | |
3226 | } else { | |
3227 | ledger_limit = -1; | |
3228 | } | |
3229 | proc_list_unlock(); | |
3230 | task_set_phys_footprint_limit_internal(p->task, ledger_limit, NULL, use_active, is_fatal); | |
3231 | proc_list_lock(); | |
3232 | proc_rele_locked(p); | |
b0d623f7 | 3233 | |
cb323159 A |
3234 | MEMORYSTATUS_DEBUG(3, "memorystatus_dirty_set: new limit on pid %d (%dMB %s) priority(%d) dirty?=0x%x %s\n", |
3235 | p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), | |
3236 | (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty, | |
3237 | (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); | |
3238 | } | |
3239 | } | |
39236c6e | 3240 | |
cb323159 A |
3241 | /* If the deferral state changed, reschedule the demotion timer */ |
3242 | if (reschedule) { | |
3243 | memorystatus_reschedule_idle_demotion_locked(); | |
3244 | } | |
3245 | } | |
3e170ce0 | 3246 | |
cb323159 A |
3247 | if (kill) { |
3248 | if (proc_ref_locked(p) == p) { | |
3249 | proc_list_unlock(); | |
3250 | psignal(p, SIGKILL); | |
3251 | proc_list_lock(); | |
3252 | proc_rele_locked(p); | |
3253 | } | |
3e170ce0 | 3254 | } |
5ba3f43e | 3255 | |
cb323159 A |
3256 | exit: |
3257 | proc_list_unlock(); | |
3e170ce0 | 3258 | |
f427ee49 A |
3259 | #if CONFIG_DIRTYSTATUS_TRACKING |
3260 | // Before returning, let's notify the dirtiness status if we have to | |
3261 | if (notify_change) { | |
3262 | memorystatus_send_dirty_status_change_note(&change_event, sizeof(change_event)); | |
3263 | } | |
3264 | #endif | |
3265 | ||
cb323159 | 3266 | return ret; |
3e170ce0 A |
3267 | } |
3268 | ||
cb323159 A |
3269 | int |
3270 | memorystatus_dirty_clear(proc_t p, uint32_t pcontrol) | |
3e170ce0 | 3271 | { |
cb323159 | 3272 | int ret = 0; |
3e170ce0 | 3273 | |
cb323159 | 3274 | MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_clear(): %d 0x%x 0x%x\n", p->p_pid, pcontrol, p->p_memstat_dirty); |
3e170ce0 | 3275 | |
cb323159 | 3276 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_CLEAR), p->p_pid, pcontrol, 0, 0, 0); |
d9a64523 | 3277 | |
cb323159 A |
3278 | proc_list_lock(); |
3279 | ||
3280 | if ((p->p_listflag & P_LIST_EXITED) != 0) { | |
3e170ce0 | 3281 | /* |
cb323159 A |
3282 | * Process is on its way out. |
3283 | */ | |
3284 | ret = EBUSY; | |
3285 | goto exit; | |
39236c6e A |
3286 | } |
3287 | ||
cb323159 A |
3288 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { |
3289 | ret = EPERM; | |
3290 | goto exit; | |
3291 | } | |
3e170ce0 | 3292 | |
cb323159 A |
3293 | if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { |
3294 | /* Dirty tracking not enabled */ | |
3295 | ret = EINVAL; | |
3296 | goto exit; | |
3297 | } | |
0a7de745 | 3298 | |
cb323159 A |
3299 | if (!pcontrol || (pcontrol & (PROC_DIRTY_LAUNCH_IN_PROGRESS | PROC_DIRTY_DEFER | PROC_DIRTY_DEFER_ALWAYS)) == 0) { |
3300 | ret = EINVAL; | |
3301 | goto exit; | |
3302 | } | |
0a7de745 | 3303 | |
cb323159 A |
3304 | if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) { |
3305 | p->p_memstat_dirty &= ~P_DIRTY_LAUNCH_IN_PROGRESS; | |
3306 | } | |
316670eb | 3307 | |
cb323159 A |
3308 | /* This can be set and cleared exactly once. */ |
3309 | if (pcontrol & (PROC_DIRTY_DEFER | PROC_DIRTY_DEFER_ALWAYS)) { | |
3310 | if (p->p_memstat_dirty & P_DIRTY_DEFER) { | |
3311 | p->p_memstat_dirty &= ~(P_DIRTY_DEFER); | |
0a7de745 | 3312 | } |
39236c6e | 3313 | |
cb323159 A |
3314 | if (p->p_memstat_dirty & P_DIRTY_DEFER_ALWAYS) { |
3315 | p->p_memstat_dirty &= ~(P_DIRTY_DEFER_ALWAYS); | |
3316 | } | |
3e170ce0 | 3317 | |
cb323159 A |
3318 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); |
3319 | memorystatus_update_idle_priority_locked(p); | |
3320 | memorystatus_reschedule_idle_demotion_locked(); | |
3e170ce0 | 3321 | } |
b0d623f7 | 3322 | |
cb323159 A |
3323 | ret = 0; |
3324 | exit: | |
3325 | proc_list_unlock(); | |
b0d623f7 | 3326 | |
cb323159 A |
3327 | return ret; |
3328 | } | |
3329 | ||
3330 | int | |
3331 | memorystatus_dirty_get(proc_t p, boolean_t locked) | |
0a7de745 | 3332 | { |
cb323159 | 3333 | int ret = 0; |
0a7de745 | 3334 | |
cb323159 A |
3335 | if (!locked) { |
3336 | proc_list_lock(); | |
b0d623f7 | 3337 | } |
39236c6e | 3338 | |
cb323159 A |
3339 | if (p->p_memstat_dirty & P_DIRTY_TRACK) { |
3340 | ret |= PROC_DIRTY_TRACKED; | |
3341 | if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) { | |
3342 | ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT; | |
3343 | } | |
3344 | if (p->p_memstat_dirty & P_DIRTY) { | |
3345 | ret |= PROC_DIRTY_IS_DIRTY; | |
3346 | } | |
3347 | if (p->p_memstat_dirty & P_DIRTY_LAUNCH_IN_PROGRESS) { | |
3348 | ret |= PROC_DIRTY_LAUNCH_IS_IN_PROGRESS; | |
3349 | } | |
39236c6e | 3350 | } |
0a7de745 | 3351 | |
cb323159 A |
3352 | if (!locked) { |
3353 | proc_list_unlock(); | |
3354 | } | |
0a7de745 | 3355 | |
39236c6e | 3356 | return ret; |
b0d623f7 A |
3357 | } |
3358 | ||
cb323159 A |
3359 | int |
3360 | memorystatus_on_terminate(proc_t p) | |
0a7de745 | 3361 | { |
cb323159 | 3362 | int sig; |
3e170ce0 | 3363 | |
cb323159 | 3364 | proc_list_lock(); |
3e170ce0 | 3365 | |
cb323159 A |
3366 | p->p_memstat_dirty |= P_DIRTY_TERMINATED; |
3367 | ||
f427ee49 A |
3368 | if (((p->p_memstat_dirty & (P_DIRTY_TRACK | P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) || |
3369 | (p->p_memstat_state & P_MEMSTAT_SUSPENDED)) { | |
3370 | /* | |
3371 | * Mark as terminated and issue SIGKILL if:- | |
3372 | * - process is clean, or, | |
3373 | * - if process is dirty but suspended. This case is likely | |
3374 | * an extension because apps don't opt into dirty-tracking | |
3375 | * and daemons aren't suspended. | |
3376 | */ | |
3377 | #if DEVELOPMENT || DEBUG | |
3378 | if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { | |
3379 | os_log(OS_LOG_DEFAULT, "memorystatus: sending suspended process %s (pid %d) SIGKILL", | |
3380 | (*p->p_name ? p->p_name : "unknown"), p->p_pid); | |
3381 | } | |
3382 | #endif /* DEVELOPMENT || DEBUG */ | |
cb323159 | 3383 | sig = SIGKILL; |
3e170ce0 | 3384 | } else { |
cb323159 A |
3385 | /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */ |
3386 | sig = SIGTERM; | |
3e170ce0 A |
3387 | } |
3388 | ||
cb323159 | 3389 | proc_list_unlock(); |
3e170ce0 | 3390 | |
cb323159 | 3391 | return sig; |
3e170ce0 A |
3392 | } |
3393 | ||
cb323159 A |
3394 | void |
3395 | memorystatus_on_suspend(proc_t p) | |
a39ff7e2 | 3396 | { |
cb323159 A |
3397 | #if CONFIG_FREEZE |
3398 | uint32_t pages; | |
3399 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL); | |
3400 | #endif | |
3401 | proc_list_lock(); | |
3402 | #if CONFIG_FREEZE | |
3403 | memorystatus_suspended_count++; | |
3404 | #endif | |
3405 | p->p_memstat_state |= P_MEMSTAT_SUSPENDED; | |
3406 | proc_list_unlock(); | |
3407 | } | |
a39ff7e2 | 3408 | |
f427ee49 A |
3409 | extern uint64_t memorystatus_thaw_count_since_boot; |
3410 | ||
cb323159 A |
3411 | void |
3412 | memorystatus_on_resume(proc_t p) | |
3413 | { | |
3414 | #if CONFIG_FREEZE | |
3415 | boolean_t frozen; | |
3416 | pid_t pid; | |
3417 | #endif | |
a39ff7e2 | 3418 | |
cb323159 | 3419 | proc_list_lock(); |
a39ff7e2 | 3420 | |
cb323159 A |
3421 | #if CONFIG_FREEZE |
3422 | frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN); | |
3423 | if (frozen) { | |
a39ff7e2 | 3424 | /* |
cb323159 A |
3425 | * Now that we don't _thaw_ a process completely, |
3426 | * resuming it (and having some on-demand swapins) | |
3427 | * shouldn't preclude it from being counted as frozen. | |
3428 | * | |
3429 | * memorystatus_frozen_count--; | |
3430 | * | |
3431 | * We preserve the P_MEMSTAT_FROZEN state since the process | |
3432 | * could have state on disk AND so will deserve some protection | |
3433 | * in the jetsam bands. | |
a39ff7e2 | 3434 | */ |
cb323159 A |
3435 | if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == 0) { |
3436 | p->p_memstat_state |= P_MEMSTAT_REFREEZE_ELIGIBLE; | |
3437 | memorystatus_refreeze_eligible_count++; | |
3438 | } | |
c3c9b80d A |
3439 | if (p->p_memstat_thaw_count == 0 || p->p_memstat_last_thaw_interval < memorystatus_freeze_current_interval) { |
3440 | os_atomic_inc(&(memorystatus_freezer_stats.mfs_processes_thawed), relaxed); | |
3441 | } | |
3442 | p->p_memstat_last_thaw_interval = memorystatus_freeze_current_interval; | |
cb323159 | 3443 | p->p_memstat_thaw_count++; |
a39ff7e2 | 3444 | |
cb323159 | 3445 | memorystatus_thaw_count++; |
f427ee49 | 3446 | memorystatus_thaw_count_since_boot++; |
cb323159 | 3447 | } |
a39ff7e2 | 3448 | |
cb323159 | 3449 | memorystatus_suspended_count--; |
a39ff7e2 | 3450 | |
cb323159 A |
3451 | pid = p->p_pid; |
3452 | #endif | |
3453 | ||
3454 | /* | |
3455 | * P_MEMSTAT_FROZEN will remain unchanged. This used to be: | |
3456 | * p->p_memstat_state &= ~(P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN); | |
3457 | */ | |
3458 | p->p_memstat_state &= ~P_MEMSTAT_SUSPENDED; | |
a39ff7e2 | 3459 | |
cb323159 | 3460 | proc_list_unlock(); |
a39ff7e2 | 3461 | |
cb323159 A |
3462 | #if CONFIG_FREEZE |
3463 | if (frozen) { | |
3464 | memorystatus_freeze_entry_t data = { pid, FALSE, 0 }; | |
3465 | memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); | |
3466 | } | |
3467 | #endif | |
3468 | } | |
a39ff7e2 | 3469 | |
cb323159 A |
3470 | void |
3471 | memorystatus_on_inactivity(proc_t p) | |
3472 | { | |
3473 | #pragma unused(p) | |
3474 | #if CONFIG_FREEZE | |
3475 | /* Wake the freeze thread */ | |
3476 | thread_wakeup((event_t)&memorystatus_freeze_wakeup); | |
3477 | #endif | |
3478 | } | |
3479 | ||
3480 | /* | |
3481 | * The proc_list_lock is held by the caller. | |
3482 | */ | |
3483 | static uint32_t | |
3484 | memorystatus_build_state(proc_t p) | |
3485 | { | |
3486 | uint32_t snapshot_state = 0; | |
3487 | ||
3488 | /* General */ | |
3489 | if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { | |
3490 | snapshot_state |= kMemorystatusSuspended; | |
3491 | } | |
3492 | if (p->p_memstat_state & P_MEMSTAT_FROZEN) { | |
3493 | snapshot_state |= kMemorystatusFrozen; | |
3494 | } | |
3495 | if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) { | |
3496 | snapshot_state |= kMemorystatusWasThawed; | |
3497 | } | |
3498 | if (p->p_memstat_state & P_MEMSTAT_PRIORITY_ASSERTION) { | |
3499 | snapshot_state |= kMemorystatusAssertion; | |
3500 | } | |
3501 | ||
3502 | /* Tracking */ | |
3503 | if (p->p_memstat_dirty & P_DIRTY_TRACK) { | |
3504 | snapshot_state |= kMemorystatusTracked; | |
3505 | } | |
3506 | if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) { | |
3507 | snapshot_state |= kMemorystatusSupportsIdleExit; | |
3508 | } | |
3509 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { | |
3510 | snapshot_state |= kMemorystatusDirty; | |
a39ff7e2 A |
3511 | } |
3512 | ||
cb323159 A |
3513 | return snapshot_state; |
3514 | } | |
a39ff7e2 | 3515 | |
cb323159 A |
3516 | static boolean_t |
3517 | kill_idle_exit_proc(void) | |
3518 | { | |
3519 | proc_t p, victim_p = PROC_NULL; | |
3520 | uint64_t current_time, footprint_of_killed_proc; | |
3521 | boolean_t killed = FALSE; | |
3522 | unsigned int i = 0; | |
3523 | os_reason_t jetsam_reason = OS_REASON_NULL; | |
a39ff7e2 | 3524 | |
cb323159 A |
3525 | /* Pick next idle exit victim. */ |
3526 | current_time = mach_absolute_time(); | |
a39ff7e2 | 3527 | |
cb323159 A |
3528 | jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_IDLE_EXIT); |
3529 | if (jetsam_reason == OS_REASON_NULL) { | |
3530 | printf("kill_idle_exit_proc: failed to allocate jetsam reason\n"); | |
3531 | } | |
d9a64523 | 3532 | |
cb323159 | 3533 | proc_list_lock(); |
a39ff7e2 | 3534 | |
cb323159 A |
3535 | p = memorystatus_get_first_proc_locked(&i, FALSE); |
3536 | while (p) { | |
3537 | /* No need to look beyond the idle band */ | |
3538 | if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) { | |
3539 | break; | |
a39ff7e2 | 3540 | } |
a39ff7e2 | 3541 | |
cb323159 A |
3542 | if ((p->p_memstat_dirty & (P_DIRTY_ALLOW_IDLE_EXIT | P_DIRTY_IS_DIRTY | P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) { |
3543 | if (current_time >= p->p_memstat_idledeadline) { | |
3544 | p->p_memstat_dirty |= P_DIRTY_TERMINATED; | |
3545 | victim_p = proc_ref_locked(p); | |
3546 | break; | |
a39ff7e2 A |
3547 | } |
3548 | } | |
3549 | ||
cb323159 A |
3550 | p = memorystatus_get_next_proc_locked(&i, p, FALSE); |
3551 | } | |
a39ff7e2 | 3552 | |
cb323159 | 3553 | proc_list_unlock(); |
a39ff7e2 | 3554 | |
cb323159 A |
3555 | if (victim_p) { |
3556 | printf("memorystatus: killing_idle_process pid %d [%s] jetsam_reason->osr_code: %llu\n", victim_p->p_pid, (*victim_p->p_name ? victim_p->p_name : "unknown"), jetsam_reason->osr_code); | |
3557 | killed = memorystatus_do_kill(victim_p, kMemorystatusKilledIdleExit, jetsam_reason, &footprint_of_killed_proc); | |
3558 | proc_rele(victim_p); | |
3559 | } else { | |
3560 | os_reason_free(jetsam_reason); | |
a39ff7e2 A |
3561 | } |
3562 | ||
cb323159 | 3563 | return killed; |
a39ff7e2 A |
3564 | } |
3565 | ||
cb323159 A |
3566 | static void |
3567 | memorystatus_thread_wake(void) | |
39236c6e | 3568 | { |
cb323159 A |
3569 | int thr_id = 0; |
3570 | int active_thr = atomic_load(&active_jetsam_threads); | |
316670eb | 3571 | |
cb323159 A |
3572 | /* Wakeup all the jetsam threads */ |
3573 | for (thr_id = 0; thr_id < active_thr; thr_id++) { | |
3574 | thread_wakeup((event_t)&jetsam_threads[thr_id].memorystatus_wakeup); | |
5ba3f43e | 3575 | } |
cb323159 | 3576 | } |
5ba3f43e | 3577 | |
cb323159 | 3578 | #if CONFIG_JETSAM |
5ba3f43e | 3579 | |
cb323159 A |
3580 | static void |
3581 | memorystatus_thread_pool_max() | |
3582 | { | |
3583 | /* Increase the jetsam thread pool to max_jetsam_threads */ | |
3584 | int max_threads = max_jetsam_threads; | |
3585 | printf("Expanding memorystatus pool to %d!\n", max_threads); | |
3586 | atomic_store(&active_jetsam_threads, max_threads); | |
3587 | } | |
3e170ce0 | 3588 | |
cb323159 A |
3589 | static void |
3590 | memorystatus_thread_pool_default() | |
3591 | { | |
3592 | /* Restore the jetsam thread pool to a single thread */ | |
3593 | printf("Reverting memorystatus pool back to 1\n"); | |
3594 | atomic_store(&active_jetsam_threads, 1); | |
3595 | } | |
5ba3f43e A |
3596 | |
3597 | #endif /* CONFIG_JETSAM */ | |
3598 | ||
cb323159 | 3599 | extern void vm_pressure_response(void); |
0a7de745 | 3600 | |
cb323159 A |
3601 | static int |
3602 | memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation) | |
3603 | { | |
3604 | struct jetsam_thread_state *jetsam_thread = jetsam_current_thread(); | |
0a7de745 | 3605 | |
cb323159 A |
3606 | assert(jetsam_thread != NULL); |
3607 | if (interval_ms) { | |
3608 | assert_wait_timeout(&jetsam_thread->memorystatus_wakeup, THREAD_UNINT, interval_ms, NSEC_PER_MSEC); | |
3609 | } else { | |
3610 | assert_wait(&jetsam_thread->memorystatus_wakeup, THREAD_UNINT); | |
3611 | } | |
0a7de745 | 3612 | |
cb323159 A |
3613 | return thread_block(continuation); |
3614 | } | |
316670eb | 3615 | |
cb323159 A |
3616 | static boolean_t |
3617 | memorystatus_avail_pages_below_pressure(void) | |
3618 | { | |
f427ee49 | 3619 | #if CONFIG_JETSAM |
cb323159 | 3620 | return memorystatus_available_pages <= memorystatus_available_pages_pressure; |
f427ee49 | 3621 | #else /* CONFIG_JETSAM */ |
cb323159 | 3622 | return FALSE; |
f427ee49 | 3623 | #endif /* CONFIG_JETSAM */ |
cb323159 | 3624 | } |
0a7de745 | 3625 | |
cb323159 A |
3626 | static boolean_t |
3627 | memorystatus_avail_pages_below_critical(void) | |
3628 | { | |
f427ee49 | 3629 | #if CONFIG_JETSAM |
cb323159 | 3630 | return memorystatus_available_pages <= memorystatus_available_pages_critical; |
f427ee49 | 3631 | #else /* CONFIG_JETSAM */ |
cb323159 | 3632 | return FALSE; |
f427ee49 | 3633 | #endif /* CONFIG_JETSAM */ |
cb323159 | 3634 | } |
316670eb | 3635 | |
cb323159 A |
3636 | static boolean_t |
3637 | memorystatus_post_snapshot(int32_t priority, uint32_t cause) | |
3638 | { | |
3639 | boolean_t is_idle_priority; | |
fe8ab488 | 3640 | |
cb323159 A |
3641 | if (jetsam_aging_policy == kJetsamAgingPolicyLegacy) { |
3642 | is_idle_priority = (priority == JETSAM_PRIORITY_IDLE); | |
3643 | } else { | |
3644 | is_idle_priority = (priority == JETSAM_PRIORITY_IDLE || priority == JETSAM_PRIORITY_IDLE_DEFERRED); | |
3645 | } | |
f427ee49 | 3646 | #if CONFIG_JETSAM |
cb323159 A |
3647 | #pragma unused(cause) |
3648 | /* | |
3649 | * Don't generate logs for steady-state idle-exit kills, | |
3650 | * unless it is overridden for debug or by the device | |
3651 | * tree. | |
3652 | */ | |
fe8ab488 | 3653 | |
cb323159 | 3654 | return !is_idle_priority || memorystatus_idle_snapshot; |
0a7de745 | 3655 | |
f427ee49 | 3656 | #else /* CONFIG_JETSAM */ |
cb323159 A |
3657 | /* |
3658 | * Don't generate logs for steady-state idle-exit kills, | |
3659 | * unless | |
3660 | * - it is overridden for debug or by the device | |
3661 | * tree. | |
3662 | * OR | |
3663 | * - the kill causes are important i.e. not kMemorystatusKilledIdleExit | |
3664 | */ | |
a39ff7e2 | 3665 | |
cb323159 A |
3666 | boolean_t snapshot_eligible_kill_cause = (is_reason_thrashing(cause) || is_reason_zone_map_exhaustion(cause)); |
3667 | return !is_idle_priority || memorystatus_idle_snapshot || snapshot_eligible_kill_cause; | |
f427ee49 | 3668 | #endif /* CONFIG_JETSAM */ |
cb323159 | 3669 | } |
39037602 | 3670 | |
cb323159 A |
3671 | static boolean_t |
3672 | memorystatus_action_needed(void) | |
3673 | { | |
f427ee49 | 3674 | #if CONFIG_JETSAM |
cb323159 A |
3675 | return is_reason_thrashing(kill_under_pressure_cause) || |
3676 | is_reason_zone_map_exhaustion(kill_under_pressure_cause) || | |
3677 | memorystatus_available_pages <= memorystatus_available_pages_pressure; | |
f427ee49 | 3678 | #else /* CONFIG_JETSAM */ |
cb323159 A |
3679 | return is_reason_thrashing(kill_under_pressure_cause) || |
3680 | is_reason_zone_map_exhaustion(kill_under_pressure_cause); | |
f427ee49 | 3681 | #endif /* CONFIG_JETSAM */ |
cb323159 | 3682 | } |
d9a64523 | 3683 | |
cb323159 A |
3684 | static boolean_t |
3685 | memorystatus_act_on_hiwat_processes(uint32_t *errors, uint32_t *hwm_kill, boolean_t *post_snapshot, __unused boolean_t *is_critical, uint64_t *memory_reclaimed) | |
3686 | { | |
3687 | boolean_t purged = FALSE, killed = FALSE; | |
0a7de745 | 3688 | |
cb323159 A |
3689 | *memory_reclaimed = 0; |
3690 | killed = memorystatus_kill_hiwat_proc(errors, &purged, memory_reclaimed); | |
a39ff7e2 | 3691 | |
cb323159 A |
3692 | if (killed) { |
3693 | *hwm_kill = *hwm_kill + 1; | |
3694 | *post_snapshot = TRUE; | |
3695 | return TRUE; | |
3696 | } else { | |
3697 | if (purged == FALSE) { | |
3698 | /* couldn't purge and couldn't kill */ | |
3699 | memorystatus_hwm_candidates = FALSE; | |
b0d623f7 | 3700 | } |
b0d623f7 | 3701 | } |
0a7de745 | 3702 | |
cb323159 A |
3703 | #if CONFIG_JETSAM |
3704 | /* No highwater processes to kill. Continue or stop for now? */ | |
3705 | if (!is_reason_thrashing(kill_under_pressure_cause) && | |
3706 | !is_reason_zone_map_exhaustion(kill_under_pressure_cause) && | |
3707 | (memorystatus_available_pages > memorystatus_available_pages_critical)) { | |
3708 | /* | |
3709 | * We are _not_ out of pressure but we are above the critical threshold and there's: | |
3710 | * - no compressor thrashing | |
3711 | * - enough zone memory | |
3712 | * - no more HWM processes left. | |
3713 | * For now, don't kill any other processes. | |
3714 | */ | |
0a7de745 | 3715 | |
cb323159 A |
3716 | if (*hwm_kill == 0) { |
3717 | memorystatus_thread_wasted_wakeup++; | |
3718 | } | |
39037602 | 3719 | |
cb323159 | 3720 | *is_critical = FALSE; |
0a7de745 | 3721 | |
cb323159 A |
3722 | return TRUE; |
3723 | } | |
3724 | #endif /* CONFIG_JETSAM */ | |
b0d623f7 | 3725 | |
cb323159 | 3726 | return FALSE; |
316670eb A |
3727 | } |
3728 | ||
3e170ce0 | 3729 | /* |
cb323159 A |
3730 | * kJetsamHighRelaunchCandidatesThreshold defines the percentage of candidates |
3731 | * in the idle & deferred bands that need to be bad candidates in order to trigger | |
3732 | * aggressive jetsam. | |
3e170ce0 | 3733 | */ |
cb323159 A |
3734 | #define kJetsamHighRelaunchCandidatesThreshold (100) |
3735 | ||
3736 | /* kJetsamMinCandidatesThreshold defines the minimum number of candidates in the | |
3737 | * idle/deferred bands to trigger aggressive jetsam. This value basically decides | |
3738 | * how much memory the system is ready to hold in the lower bands without triggering | |
3739 | * aggressive jetsam. This number should ideally be tuned based on the memory config | |
3740 | * of the device. | |
3741 | */ | |
3742 | #define kJetsamMinCandidatesThreshold (5) | |
3743 | ||
39236c6e | 3744 | static boolean_t |
cb323159 | 3745 | memorystatus_aggressive_jetsam_needed_sysproc_aging(__unused int jld_eval_aggressive_count, __unused int *jld_idle_kills, __unused int jld_idle_kill_candidates, int *total_candidates, int *elevated_bucket_count) |
d1ecb069 | 3746 | { |
cb323159 | 3747 | boolean_t aggressive_jetsam_needed = false; |
3e170ce0 | 3748 | |
cb323159 A |
3749 | /* |
3750 | * For the kJetsamAgingPolicySysProcsReclaimedFirst aging policy, we maintain the jetsam | |
3751 | * relaunch behavior for all daemons. Also, daemons and apps are aged in deferred bands on | |
3752 | * every dirty->clean transition. For this aging policy, the best way to determine if | |
3753 | * aggressive jetsam is needed, is to see if the kill candidates are mostly bad candidates. | |
3754 | * If yes, then we need to go to higher bands to reclaim memory. | |
3755 | */ | |
3756 | proc_list_lock(); | |
3757 | /* Get total candidate counts for idle and idle deferred bands */ | |
3758 | *total_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].count + memstat_bucket[system_procs_aging_band].count; | |
3759 | /* Get counts of bad kill candidates in idle and idle deferred bands */ | |
3760 | int bad_candidates = memstat_bucket[JETSAM_PRIORITY_IDLE].relaunch_high_count + memstat_bucket[system_procs_aging_band].relaunch_high_count; | |
3e170ce0 | 3761 | |
cb323159 | 3762 | *elevated_bucket_count = memstat_bucket[JETSAM_PRIORITY_ELEVATED_INACTIVE].count; |
490019cf | 3763 | |
cb323159 | 3764 | proc_list_unlock(); |
5ba3f43e | 3765 | |
cb323159 A |
3766 | /* Check if the number of bad candidates is greater than kJetsamHighRelaunchCandidatesThreshold % */ |
3767 | aggressive_jetsam_needed = (((bad_candidates * 100) / *total_candidates) >= kJetsamHighRelaunchCandidatesThreshold); | |
3768 | ||
3769 | /* | |
3770 | * Since the new aging policy bases the aggressive jetsam trigger on percentage of | |
3771 | * bad candidates, it is prone to being overly aggressive. In order to mitigate that, | |
3772 | * make sure the system is really under memory pressure before triggering aggressive | |
3773 | * jetsam. | |
3774 | */ | |
3775 | if (memorystatus_available_pages > memorystatus_sysproc_aging_aggr_pages) { | |
3776 | aggressive_jetsam_needed = false; | |
3777 | } | |
3e170ce0 | 3778 | |
3e170ce0 | 3779 | #if DEVELOPMENT || DEBUG |
cb323159 A |
3780 | printf("memorystatus: aggressive%d: [%s] Bad Candidate Threshold Check (total: %d, bad: %d, threshold: %d %%); Memory Pressure Check (available_pgs: %llu, threshold_pgs: %llu)\n", |
3781 | jld_eval_aggressive_count, aggressive_jetsam_needed ? "PASSED" : "FAILED", *total_candidates, bad_candidates, | |
f427ee49 | 3782 | kJetsamHighRelaunchCandidatesThreshold, (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES, (uint64_t)memorystatus_sysproc_aging_aggr_pages); |
3e170ce0 | 3783 | #endif /* DEVELOPMENT || DEBUG */ |
cb323159 A |
3784 | return aggressive_jetsam_needed; |
3785 | } | |
39236c6e | 3786 | |
f427ee49 A |
3787 | /* |
3788 | * Gets memory back from various system caches. | |
3789 | * Called before jetsamming in the foreground band in the hope that we'll | |
3790 | * avoid a jetsam. | |
3791 | */ | |
3792 | static void | |
3793 | memorystatus_approaching_fg_band(boolean_t *corpse_list_purged) | |
3794 | { | |
3795 | assert(corpse_list_purged != NULL); | |
3796 | pmap_release_pages_fast(); | |
3797 | memorystatus_issue_fg_band_notify(); | |
3798 | if (total_corpses_count() > 0 && !*corpse_list_purged) { | |
3799 | task_purge_all_corpses(); | |
3800 | *corpse_list_purged = TRUE; | |
3801 | } | |
3802 | } | |
3803 | ||
cb323159 A |
3804 | static boolean_t |
3805 | memorystatus_aggressive_jetsam_needed_default(__unused int jld_eval_aggressive_count, int *jld_idle_kills, int jld_idle_kill_candidates, int *total_candidates, int *elevated_bucket_count) | |
3806 | { | |
3807 | boolean_t aggressive_jetsam_needed = false; | |
3808 | /* Jetsam Loop Detection - locals */ | |
3809 | memstat_bucket_t *bucket; | |
3810 | int jld_bucket_count = 0; | |
3e170ce0 | 3811 | |
cb323159 A |
3812 | proc_list_lock(); |
3813 | switch (jetsam_aging_policy) { | |
3814 | case kJetsamAgingPolicyLegacy: | |
3815 | bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; | |
3816 | jld_bucket_count = bucket->count; | |
3817 | bucket = &memstat_bucket[JETSAM_PRIORITY_AGING_BAND1]; | |
3818 | jld_bucket_count += bucket->count; | |
3819 | break; | |
3820 | case kJetsamAgingPolicyAppsReclaimedFirst: | |
3821 | bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; | |
3822 | jld_bucket_count = bucket->count; | |
3823 | bucket = &memstat_bucket[system_procs_aging_band]; | |
3824 | jld_bucket_count += bucket->count; | |
3825 | bucket = &memstat_bucket[applications_aging_band]; | |
3826 | jld_bucket_count += bucket->count; | |
3827 | break; | |
3828 | case kJetsamAgingPolicyNone: | |
3829 | default: | |
3830 | bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; | |
3831 | jld_bucket_count = bucket->count; | |
3832 | break; | |
3833 | } | |
3e170ce0 | 3834 | |
cb323159 A |
3835 | bucket = &memstat_bucket[JETSAM_PRIORITY_ELEVATED_INACTIVE]; |
3836 | *elevated_bucket_count = bucket->count; | |
3837 | *total_candidates = jld_bucket_count; | |
3838 | proc_list_unlock(); | |
3e170ce0 | 3839 | |
cb323159 | 3840 | aggressive_jetsam_needed = (*jld_idle_kills > jld_idle_kill_candidates); |
0a7de745 | 3841 | |
3e170ce0 | 3842 | #if DEVELOPMENT || DEBUG |
cb323159 A |
3843 | if (aggressive_jetsam_needed) { |
3844 | printf("memorystatus: aggressive%d: idle candidates: %d, idle kills: %d\n", | |
3845 | jld_eval_aggressive_count, | |
3846 | jld_idle_kill_candidates, | |
3847 | *jld_idle_kills); | |
3848 | } | |
3e170ce0 | 3849 | #endif /* DEVELOPMENT || DEBUG */ |
cb323159 A |
3850 | return aggressive_jetsam_needed; |
3851 | } | |
0a7de745 | 3852 | |
cb323159 A |
3853 | static boolean_t |
3854 | memorystatus_act_aggressive(uint32_t cause, os_reason_t jetsam_reason, int *jld_idle_kills, boolean_t *corpse_list_purged, boolean_t *post_snapshot, uint64_t *memory_reclaimed) | |
3855 | { | |
3856 | boolean_t aggressive_jetsam_needed = false; | |
3857 | boolean_t killed; | |
3858 | uint32_t errors = 0; | |
3859 | uint64_t footprint_of_killed_proc = 0; | |
3860 | int elevated_bucket_count = 0; | |
3861 | int total_candidates = 0; | |
3862 | *memory_reclaimed = 0; | |
0a7de745 | 3863 | |
cb323159 A |
3864 | /* |
3865 | * The aggressive jetsam logic looks at the number of times it has been in the | |
3866 | * aggressive loop to determine the max priority band it should kill upto. The | |
3867 | * static variables below are used to track that property. | |
3868 | * | |
3869 | * To reset those values, the implementation checks if it has been | |
3870 | * memorystatus_jld_eval_period_msecs since the parameters were reset. | |
3871 | */ | |
3872 | static int jld_eval_aggressive_count = 0; | |
3873 | static int32_t jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT; | |
3874 | static uint64_t jld_timestamp_msecs = 0; | |
3875 | static int jld_idle_kill_candidates = 0; | |
39037602 | 3876 | |
cb323159 A |
3877 | if (memorystatus_jld_enabled == FALSE) { |
3878 | /* If aggressive jetsam is disabled, nothing to do here */ | |
3879 | return FALSE; | |
3880 | } | |
0a7de745 | 3881 | |
cb323159 A |
3882 | /* Get current timestamp (msecs only) */ |
3883 | struct timeval jld_now_tstamp = {0, 0}; | |
3884 | uint64_t jld_now_msecs = 0; | |
3885 | microuptime(&jld_now_tstamp); | |
3886 | jld_now_msecs = (jld_now_tstamp.tv_sec * 1000); | |
3e170ce0 | 3887 | |
cb323159 A |
3888 | /* |
3889 | * The aggressive jetsam logic looks at the number of candidates and their | |
3890 | * properties to decide if aggressive jetsam should be engaged. | |
3891 | */ | |
3892 | if (jetsam_aging_policy == kJetsamAgingPolicySysProcsReclaimedFirst) { | |
3e170ce0 | 3893 | /* |
cb323159 A |
3894 | * For the kJetsamAgingPolicySysProcsReclaimedFirst aging policy, the logic looks at the number of |
3895 | * candidates in the idle and deferred band and how many out of them are marked as high relaunch | |
3896 | * probability. | |
3897 | */ | |
3898 | aggressive_jetsam_needed = memorystatus_aggressive_jetsam_needed_sysproc_aging(jld_eval_aggressive_count, | |
3899 | jld_idle_kills, jld_idle_kill_candidates, &total_candidates, &elevated_bucket_count); | |
3900 | } else { | |
3901 | /* | |
3902 | * The other aging policies look at number of candidate processes over a specific time window and | |
3903 | * evaluate if the system is in a jetsam loop. If yes, aggressive jetsam is triggered. | |
3904 | */ | |
3905 | aggressive_jetsam_needed = memorystatus_aggressive_jetsam_needed_default(jld_eval_aggressive_count, | |
3906 | jld_idle_kills, jld_idle_kill_candidates, &total_candidates, &elevated_bucket_count); | |
3907 | } | |
490019cf | 3908 | |
cb323159 A |
3909 | /* |
3910 | * Check if its been really long since the aggressive jetsam evaluation | |
3911 | * parameters have been refreshed. This logic also resets the jld_eval_aggressive_count | |
3912 | * counter to make sure we reset the aggressive jetsam severity. | |
3913 | */ | |
3914 | boolean_t param_reval = false; | |
39037602 | 3915 | |
cb323159 A |
3916 | if ((total_candidates == 0) || |
3917 | (jld_now_msecs > (jld_timestamp_msecs + memorystatus_jld_eval_period_msecs))) { | |
3918 | jld_timestamp_msecs = jld_now_msecs; | |
3919 | jld_idle_kill_candidates = total_candidates; | |
3920 | *jld_idle_kills = 0; | |
3921 | jld_eval_aggressive_count = 0; | |
3922 | jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT; | |
3923 | param_reval = true; | |
3924 | } | |
3e170ce0 | 3925 | |
cb323159 A |
3926 | /* |
3927 | * If the parameters have been updated, re-evaluate the aggressive_jetsam_needed condition for | |
3928 | * the non kJetsamAgingPolicySysProcsReclaimedFirst policy since its based on jld_idle_kill_candidates etc. | |
3929 | */ | |
3930 | if ((param_reval == true) && (jetsam_aging_policy != kJetsamAgingPolicySysProcsReclaimedFirst)) { | |
3931 | aggressive_jetsam_needed = (*jld_idle_kills > jld_idle_kill_candidates); | |
3932 | } | |
490019cf | 3933 | |
cb323159 A |
3934 | /* |
3935 | * It is also possible that the system is down to a very small number of processes in the candidate | |
3936 | * bands. In that case, the decisions made by the memorystatus_aggressive_jetsam_needed_* routines | |
3937 | * would not be useful. In that case, do not trigger aggressive jetsam. | |
3938 | */ | |
3939 | if (total_candidates < kJetsamMinCandidatesThreshold) { | |
490019cf | 3940 | #if DEVELOPMENT || DEBUG |
cb323159 | 3941 | printf("memorystatus: aggressive: [FAILED] Low Candidate Count (current: %d, threshold: %d)\n", total_candidates, kJetsamMinCandidatesThreshold); |
490019cf | 3942 | #endif /* DEVELOPMENT || DEBUG */ |
cb323159 A |
3943 | aggressive_jetsam_needed = false; |
3944 | } | |
490019cf | 3945 | |
cb323159 A |
3946 | if (aggressive_jetsam_needed == false) { |
3947 | /* Either the aging policy or the candidate count decided that aggressive jetsam is not needed. Nothing more to do here. */ | |
3948 | return FALSE; | |
3949 | } | |
0a7de745 | 3950 | |
cb323159 A |
3951 | /* Looks like aggressive jetsam is needed */ |
3952 | jld_eval_aggressive_count++; | |
3953 | ||
3954 | if (jld_eval_aggressive_count == memorystatus_jld_eval_aggressive_count) { | |
f427ee49 | 3955 | memorystatus_approaching_fg_band(corpse_list_purged); |
cb323159 A |
3956 | } else if (jld_eval_aggressive_count > memorystatus_jld_eval_aggressive_count) { |
3957 | /* | |
3958 | * Bump up the jetsam priority limit (eg: the bucket index) | |
3959 | * Enforce bucket index sanity. | |
3960 | */ | |
3961 | if ((memorystatus_jld_eval_aggressive_priority_band_max < 0) || | |
3962 | (memorystatus_jld_eval_aggressive_priority_band_max >= MEMSTAT_BUCKET_COUNT)) { | |
3963 | /* | |
3964 | * Do nothing. Stick with the default level. | |
3965 | */ | |
3966 | } else { | |
3967 | jld_priority_band_max = memorystatus_jld_eval_aggressive_priority_band_max; | |
3968 | } | |
3e170ce0 | 3969 | } |
0a7de745 | 3970 | |
cb323159 A |
3971 | /* Visit elevated processes first */ |
3972 | while (elevated_bucket_count) { | |
3973 | elevated_bucket_count--; | |
0a7de745 | 3974 | |
cb323159 A |
3975 | /* |
3976 | * memorystatus_kill_elevated_process() drops a reference, | |
3977 | * so take another one so we can continue to use this exit reason | |
3978 | * even after it returns. | |
3979 | */ | |
39037602 | 3980 | |
cb323159 A |
3981 | os_reason_ref(jetsam_reason); |
3982 | killed = memorystatus_kill_elevated_process( | |
3983 | cause, | |
3984 | jetsam_reason, | |
3985 | JETSAM_PRIORITY_ELEVATED_INACTIVE, | |
3986 | jld_eval_aggressive_count, | |
3987 | &errors, &footprint_of_killed_proc); | |
3988 | if (killed) { | |
3989 | *post_snapshot = TRUE; | |
3990 | *memory_reclaimed += footprint_of_killed_proc; | |
3991 | if (memorystatus_avail_pages_below_pressure()) { | |
3992 | /* | |
3993 | * Still under pressure. | |
3994 | * Find another pinned processes. | |
3995 | */ | |
3996 | continue; | |
3997 | } else { | |
3998 | return TRUE; | |
3999 | } | |
4000 | } else { | |
4001 | /* | |
4002 | * No pinned processes left to kill. | |
4003 | * Abandon elevated band. | |
4004 | */ | |
4005 | break; | |
4006 | } | |
3e170ce0 | 4007 | } |
0a7de745 | 4008 | |
cb323159 A |
4009 | /* |
4010 | * memorystatus_kill_processes_aggressive() allocates its own | |
4011 | * jetsam_reason so the kMemorystatusKilledProcThrashing cause | |
4012 | * is consistent throughout the aggressive march. | |
4013 | */ | |
4014 | killed = memorystatus_kill_processes_aggressive( | |
4015 | kMemorystatusKilledProcThrashing, | |
4016 | jld_eval_aggressive_count, | |
4017 | jld_priority_band_max, | |
4018 | &errors, &footprint_of_killed_proc); | |
3e170ce0 | 4019 | |
cb323159 A |
4020 | if (killed) { |
4021 | /* Always generate logs after aggressive kill */ | |
4022 | *post_snapshot = TRUE; | |
4023 | *memory_reclaimed += footprint_of_killed_proc; | |
4024 | *jld_idle_kills = 0; | |
0a7de745 | 4025 | return TRUE; |
3e170ce0 | 4026 | } |
cb323159 A |
4027 | |
4028 | return FALSE; | |
3e170ce0 A |
4029 | } |
4030 | ||
cb323159 A |
4031 | |
4032 | static void | |
4033 | memorystatus_thread(void *param __unused, wait_result_t wr __unused) | |
3e170ce0 | 4034 | { |
cb323159 A |
4035 | boolean_t post_snapshot = FALSE; |
4036 | uint32_t errors = 0; | |
4037 | uint32_t hwm_kill = 0; | |
4038 | boolean_t sort_flag = TRUE; | |
4039 | boolean_t corpse_list_purged = FALSE; | |
4040 | int jld_idle_kills = 0; | |
4041 | struct jetsam_thread_state *jetsam_thread = jetsam_current_thread(); | |
4042 | uint64_t total_memory_reclaimed = 0; | |
0a7de745 | 4043 | |
cb323159 A |
4044 | assert(jetsam_thread != NULL); |
4045 | if (jetsam_thread->inited == FALSE) { | |
4046 | /* | |
4047 | * It's the first time the thread has run, so just mark the thread as privileged and block. | |
4048 | * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>. | |
4049 | */ | |
4050 | ||
4051 | char name[32]; | |
4052 | thread_wire(host_priv_self(), current_thread(), TRUE); | |
4053 | snprintf(name, 32, "VM_memorystatus_%d", jetsam_thread->index + 1); | |
4054 | ||
4055 | /* Limit all but one thread to the lower jetsam bands, as that's where most of the victims are. */ | |
4056 | if (jetsam_thread->index == 0) { | |
4057 | if (vm_pageout_state.vm_restricted_to_single_processor == TRUE) { | |
4058 | thread_vm_bind_group_add(); | |
4059 | } | |
4060 | jetsam_thread->limit_to_low_bands = FALSE; | |
4061 | } else { | |
4062 | jetsam_thread->limit_to_low_bands = TRUE; | |
4063 | } | |
f427ee49 A |
4064 | #if CONFIG_THREAD_GROUPS |
4065 | thread_group_vm_add(); | |
4066 | #endif | |
cb323159 A |
4067 | thread_set_thread_name(current_thread(), name); |
4068 | jetsam_thread->inited = TRUE; | |
4069 | memorystatus_thread_block(0, memorystatus_thread); | |
39037602 A |
4070 | } |
4071 | ||
cb323159 | 4072 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START, |
f427ee49 | 4073 | MEMORYSTATUS_LOG_AVAILABLE_PAGES, memorystatus_jld_enabled, memorystatus_jld_eval_period_msecs, memorystatus_jld_eval_aggressive_count, 0); |
0a7de745 | 4074 | |
cb323159 A |
4075 | /* |
4076 | * Jetsam aware version. | |
4077 | * | |
4078 | * The VM pressure notification thread is working it's way through clients in parallel. | |
4079 | * | |
4080 | * So, while the pressure notification thread is targeting processes in order of | |
4081 | * increasing jetsam priority, we can hopefully reduce / stop it's work by killing | |
4082 | * any processes that have exceeded their highwater mark. | |
4083 | * | |
4084 | * If we run out of HWM processes and our available pages drops below the critical threshold, then, | |
4085 | * we target the least recently used process in order of increasing jetsam priority (exception: the FG band). | |
4086 | */ | |
4087 | while (memorystatus_action_needed()) { | |
4088 | boolean_t killed; | |
4089 | int32_t priority; | |
4090 | uint32_t cause; | |
4091 | uint64_t memory_reclaimed = 0; | |
4092 | uint64_t jetsam_reason_code = JETSAM_REASON_INVALID; | |
4093 | os_reason_t jetsam_reason = OS_REASON_NULL; | |
3e170ce0 | 4094 | |
cb323159 A |
4095 | cause = kill_under_pressure_cause; |
4096 | switch (cause) { | |
4097 | case kMemorystatusKilledFCThrashing: | |
4098 | jetsam_reason_code = JETSAM_REASON_MEMORY_FCTHRASHING; | |
4099 | break; | |
4100 | case kMemorystatusKilledVMCompressorThrashing: | |
4101 | jetsam_reason_code = JETSAM_REASON_MEMORY_VMCOMPRESSOR_THRASHING; | |
4102 | break; | |
4103 | case kMemorystatusKilledVMCompressorSpaceShortage: | |
4104 | jetsam_reason_code = JETSAM_REASON_MEMORY_VMCOMPRESSOR_SPACE_SHORTAGE; | |
4105 | break; | |
4106 | case kMemorystatusKilledZoneMapExhaustion: | |
4107 | jetsam_reason_code = JETSAM_REASON_ZONE_MAP_EXHAUSTION; | |
4108 | break; | |
4109 | case kMemorystatusKilledVMPageShortage: | |
4110 | /* falls through */ | |
4111 | default: | |
4112 | jetsam_reason_code = JETSAM_REASON_MEMORY_VMPAGESHORTAGE; | |
4113 | cause = kMemorystatusKilledVMPageShortage; | |
4114 | break; | |
4115 | } | |
0a7de745 | 4116 | |
cb323159 A |
4117 | /* Highwater */ |
4118 | boolean_t is_critical = TRUE; | |
4119 | if (memorystatus_act_on_hiwat_processes(&errors, &hwm_kill, &post_snapshot, &is_critical, &memory_reclaimed)) { | |
4120 | total_memory_reclaimed += memory_reclaimed; | |
4121 | if (is_critical == FALSE) { | |
4122 | /* | |
4123 | * For now, don't kill any other processes. | |
4124 | */ | |
4125 | break; | |
4126 | } else { | |
4127 | goto done; | |
4128 | } | |
4129 | } | |
0a7de745 | 4130 | |
cb323159 A |
4131 | jetsam_reason = os_reason_create(OS_REASON_JETSAM, jetsam_reason_code); |
4132 | if (jetsam_reason == OS_REASON_NULL) { | |
4133 | printf("memorystatus_thread: failed to allocate jetsam reason\n"); | |
39236c6e | 4134 | } |
0a7de745 | 4135 | |
cb323159 A |
4136 | /* Only unlimited jetsam threads should act aggressive */ |
4137 | if (!jetsam_thread->limit_to_low_bands && | |
4138 | memorystatus_act_aggressive(cause, jetsam_reason, &jld_idle_kills, &corpse_list_purged, &post_snapshot, &memory_reclaimed)) { | |
4139 | total_memory_reclaimed += memory_reclaimed; | |
4140 | goto done; | |
d1ecb069 | 4141 | } |
3e170ce0 | 4142 | |
cb323159 A |
4143 | /* |
4144 | * memorystatus_kill_top_process() drops a reference, | |
4145 | * so take another one so we can continue to use this exit reason | |
4146 | * even after it returns | |
4147 | */ | |
4148 | os_reason_ref(jetsam_reason); | |
3e170ce0 | 4149 | |
cb323159 A |
4150 | /* LRU */ |
4151 | killed = memorystatus_kill_top_process(TRUE, sort_flag, cause, jetsam_reason, &priority, &errors, &memory_reclaimed); | |
4152 | sort_flag = FALSE; | |
4153 | ||
4154 | if (killed) { | |
4155 | total_memory_reclaimed += memory_reclaimed; | |
4156 | if (memorystatus_post_snapshot(priority, cause) == TRUE) { | |
4157 | post_snapshot = TRUE; | |
4158 | } | |
4159 | ||
4160 | /* Jetsam Loop Detection */ | |
4161 | if (memorystatus_jld_enabled == TRUE) { | |
4162 | if ((priority == JETSAM_PRIORITY_IDLE) || (priority == system_procs_aging_band) || (priority == applications_aging_band)) { | |
4163 | jld_idle_kills++; | |
4164 | } else { | |
4165 | /* | |
4166 | * We've reached into bands beyond idle deferred. | |
4167 | * We make no attempt to monitor them | |
4168 | */ | |
4169 | } | |
6d2010ae | 4170 | } |
316670eb | 4171 | |
cb323159 A |
4172 | /* |
4173 | * If we have jetsammed a process in or above JETSAM_PRIORITY_UI_SUPPORT | |
4174 | * then we attempt to relieve pressure by purging corpse memory and notifying | |
4175 | * anybody wanting to know this. | |
4176 | */ | |
4177 | if (priority >= JETSAM_PRIORITY_UI_SUPPORT) { | |
f427ee49 | 4178 | memorystatus_approaching_fg_band(&corpse_list_purged); |
0a7de745 | 4179 | } |
cb323159 | 4180 | goto done; |
39236c6e | 4181 | } |
316670eb | 4182 | |
cb323159 A |
4183 | if (memorystatus_avail_pages_below_critical()) { |
4184 | /* | |
4185 | * Still under pressure and unable to kill a process - purge corpse memory | |
f427ee49 | 4186 | * and get everything back from the pmap. |
cb323159 | 4187 | */ |
f427ee49 | 4188 | pmap_release_pages_fast(); |
cb323159 A |
4189 | if (total_corpses_count() > 0) { |
4190 | task_purge_all_corpses(); | |
4191 | corpse_list_purged = TRUE; | |
a39ff7e2 | 4192 | } |
0a7de745 | 4193 | |
cb323159 | 4194 | if (!jetsam_thread->limit_to_low_bands && memorystatus_avail_pages_below_critical()) { |
a39ff7e2 | 4195 | /* |
cb323159 | 4196 | * Still under pressure and unable to kill a process - panic |
a39ff7e2 | 4197 | */ |
f427ee49 | 4198 | panic("memorystatus_jetsam_thread: no victim! available pages:%llu\n", (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES); |
a39ff7e2 | 4199 | } |
cb323159 | 4200 | } |
0a7de745 | 4201 | |
cb323159 | 4202 | done: |
a39ff7e2 | 4203 | |
cb323159 A |
4204 | /* |
4205 | * We do not want to over-kill when thrashing has been detected. | |
4206 | * To avoid that, we reset the flag here and notify the | |
4207 | * compressor. | |
4208 | */ | |
4209 | if (is_reason_thrashing(kill_under_pressure_cause)) { | |
4210 | kill_under_pressure_cause = 0; | |
4211 | #if CONFIG_JETSAM | |
4212 | vm_thrashing_jetsam_done(); | |
4213 | #endif /* CONFIG_JETSAM */ | |
4214 | } else if (is_reason_zone_map_exhaustion(kill_under_pressure_cause)) { | |
4215 | kill_under_pressure_cause = 0; | |
6d2010ae | 4216 | } |
cb323159 A |
4217 | |
4218 | os_reason_free(jetsam_reason); | |
6d2010ae | 4219 | } |
0a7de745 | 4220 | |
cb323159 | 4221 | kill_under_pressure_cause = 0; |
0a7de745 | 4222 | |
cb323159 A |
4223 | if (errors) { |
4224 | memorystatus_clear_errors(); | |
4225 | } | |
39037602 | 4226 | |
cb323159 | 4227 | if (post_snapshot) { |
39037602 | 4228 | proc_list_lock(); |
cb323159 A |
4229 | size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + |
4230 | sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count); | |
4231 | uint64_t timestamp_now = mach_absolute_time(); | |
4232 | memorystatus_jetsam_snapshot->notification_time = timestamp_now; | |
4233 | memorystatus_jetsam_snapshot->js_gencount++; | |
4234 | if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 || | |
4235 | timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) { | |
4236 | proc_list_unlock(); | |
4237 | int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); | |
4238 | if (!ret) { | |
4239 | proc_list_lock(); | |
4240 | memorystatus_jetsam_snapshot_last_timestamp = timestamp_now; | |
4241 | proc_list_unlock(); | |
4242 | } | |
4243 | } else { | |
4244 | proc_list_unlock(); | |
4245 | } | |
39037602 | 4246 | } |
0a7de745 | 4247 | |
cb323159 | 4248 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END, |
f427ee49 | 4249 | MEMORYSTATUS_LOG_AVAILABLE_PAGES, total_memory_reclaimed, 0, 0, 0); |
39037602 | 4250 | |
cb323159 | 4251 | memorystatus_thread_block(0, memorystatus_thread); |
39037602 A |
4252 | } |
4253 | ||
4254 | /* | |
cb323159 A |
4255 | * Returns TRUE: |
4256 | * when an idle-exitable proc was killed | |
4257 | * Returns FALSE: | |
4258 | * when there are no more idle-exitable procs found | |
4259 | * when the attempt to kill an idle-exitable proc failed | |
39037602 | 4260 | */ |
cb323159 A |
4261 | boolean_t |
4262 | memorystatus_idle_exit_from_VM(void) | |
39037602 | 4263 | { |
cb323159 A |
4264 | /* |
4265 | * This routine should no longer be needed since we are | |
4266 | * now using jetsam bands on all platforms and so will deal | |
4267 | * with IDLE processes within the memorystatus thread itself. | |
4268 | * | |
4269 | * But we still use it because we observed that macos systems | |
4270 | * started heavy compression/swapping with a bunch of | |
4271 | * idle-exitable processes alive and doing nothing. We decided | |
4272 | * to rather kill those processes than start swapping earlier. | |
4273 | */ | |
d9a64523 | 4274 | |
cb323159 A |
4275 | return kill_idle_exit_proc(); |
4276 | } | |
39037602 | 4277 | |
cb323159 A |
4278 | /* |
4279 | * Callback invoked when allowable physical memory footprint exceeded | |
4280 | * (dirty pages + IOKit mappings) | |
4281 | * | |
4282 | * This is invoked for both advisory, non-fatal per-task high watermarks, | |
4283 | * as well as the fatal task memory limits. | |
4284 | */ | |
4285 | void | |
4286 | memorystatus_on_ledger_footprint_exceeded(boolean_t warning, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal) | |
4287 | { | |
4288 | os_reason_t jetsam_reason = OS_REASON_NULL; | |
39037602 | 4289 | |
cb323159 | 4290 | proc_t p = current_proc(); |
39037602 | 4291 | |
cb323159 A |
4292 | #if VM_PRESSURE_EVENTS |
4293 | if (warning == TRUE) { | |
39037602 | 4294 | /* |
cb323159 A |
4295 | * This is a warning path which implies that the current process is close, but has |
4296 | * not yet exceeded its per-process memory limit. | |
4297 | */ | |
f427ee49 | 4298 | if (memorystatus_warn_process(p, memlimit_is_active, memlimit_is_fatal, FALSE /* not exceeded */) != TRUE) { |
cb323159 A |
4299 | /* Print warning, since it's possible that task has not registered for pressure notifications */ |
4300 | os_log(OS_LOG_DEFAULT, "memorystatus_on_ledger_footprint_exceeded: failed to warn the current task (%d exiting, or no handler registered?).\n", p->p_pid); | |
39037602 | 4301 | } |
cb323159 A |
4302 | return; |
4303 | } | |
4304 | #endif /* VM_PRESSURE_EVENTS */ | |
39037602 | 4305 | |
cb323159 | 4306 | if (memlimit_is_fatal) { |
39037602 | 4307 | /* |
cb323159 A |
4308 | * If this process has no high watermark or has a fatal task limit, then we have been invoked because the task |
4309 | * has violated either the system-wide per-task memory limit OR its own task limit. | |
39037602 | 4310 | */ |
cb323159 A |
4311 | jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_PERPROCESSLIMIT); |
4312 | if (jetsam_reason == NULL) { | |
4313 | printf("task_exceeded footprint: failed to allocate jetsam reason\n"); | |
4314 | } else if (corpse_for_fatal_memkill != 0 && proc_send_synchronous_EXC_RESOURCE(p) == FALSE) { | |
4315 | /* Set OS_REASON_FLAG_GENERATE_CRASH_REPORT to generate corpse */ | |
4316 | jetsam_reason->osr_flags |= OS_REASON_FLAG_GENERATE_CRASH_REPORT; | |
4317 | } | |
39037602 | 4318 | |
cb323159 A |
4319 | if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit, jetsam_reason) != TRUE) { |
4320 | printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n"); | |
4321 | } | |
4322 | } else { | |
4323 | /* | |
4324 | * HWM offender exists. Done without locks or synchronization. | |
4325 | * See comment near its declaration for more details. | |
4326 | */ | |
4327 | memorystatus_hwm_candidates = TRUE; | |
39037602 | 4328 | |
cb323159 A |
4329 | #if VM_PRESSURE_EVENTS |
4330 | /* | |
4331 | * The current process is not in the warning path. | |
4332 | * This path implies the current process has exceeded a non-fatal (soft) memory limit. | |
4333 | * Failure to send note is ignored here. | |
4334 | */ | |
f427ee49 | 4335 | (void)memorystatus_warn_process(p, memlimit_is_active, memlimit_is_fatal, TRUE /* exceeded */); |
39037602 | 4336 | |
cb323159 | 4337 | #endif /* VM_PRESSURE_EVENTS */ |
316670eb | 4338 | } |
316670eb | 4339 | } |
2d21ac55 | 4340 | |
cb323159 A |
4341 | void |
4342 | memorystatus_log_exception(const int max_footprint_mb, boolean_t memlimit_is_active, boolean_t memlimit_is_fatal) | |
0a7de745 | 4343 | { |
cb323159 A |
4344 | proc_t p = current_proc(); |
4345 | ||
39037602 | 4346 | /* |
cb323159 A |
4347 | * The limit violation is logged here, but only once per process per limit. |
4348 | * Soft memory limit is a non-fatal high-water-mark | |
4349 | * Hard memory limit is a fatal custom-task-limit or system-wide per-task memory limit. | |
39037602 | 4350 | */ |
0a7de745 | 4351 | |
cb323159 A |
4352 | os_log_with_startup_serial(OS_LOG_DEFAULT, "EXC_RESOURCE -> %s[%d] exceeded mem limit: %s%s %d MB (%s)\n", |
4353 | ((p && *p->p_name) ? p->p_name : "unknown"), (p ? p->p_pid : -1), (memlimit_is_active ? "Active" : "Inactive"), | |
4354 | (memlimit_is_fatal ? "Hard" : "Soft"), max_footprint_mb, | |
4355 | (memlimit_is_fatal ? "fatal" : "non-fatal")); | |
4356 | ||
4357 | return; | |
39236c6e | 4358 | } |
2d21ac55 | 4359 | |
39037602 | 4360 | |
cb323159 A |
4361 | /* |
4362 | * Description: | |
4363 | * Evaluates process state to determine which limit | |
4364 | * should be applied (active vs. inactive limit). | |
4365 | * | |
4366 | * Processes that have the 'elevated inactive jetsam band' attribute | |
4367 | * are first evaluated based on their current priority band. | |
4368 | * presently elevated ==> active | |
4369 | * | |
4370 | * Processes that opt into dirty tracking are evaluated | |
4371 | * based on clean vs dirty state. | |
4372 | * dirty ==> active | |
4373 | * clean ==> inactive | |
4374 | * | |
4375 | * Process that do not opt into dirty tracking are | |
4376 | * evalulated based on priority level. | |
4377 | * Foreground or above ==> active | |
4378 | * Below Foreground ==> inactive | |
4379 | * | |
4380 | * Return: TRUE if active | |
4381 | * False if inactive | |
4382 | */ | |
2d21ac55 | 4383 | |
cb323159 A |
4384 | static boolean_t |
4385 | proc_jetsam_state_is_active_locked(proc_t p) | |
0a7de745 | 4386 | { |
cb323159 A |
4387 | if ((p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND) && |
4388 | (p->p_memstat_effectivepriority == JETSAM_PRIORITY_ELEVATED_INACTIVE)) { | |
4389 | /* | |
4390 | * process has the 'elevated inactive jetsam band' attribute | |
4391 | * and process is present in the elevated band | |
4392 | * implies active state | |
4393 | */ | |
4394 | return TRUE; | |
4395 | } else if (p->p_memstat_dirty & P_DIRTY_TRACK) { | |
4396 | /* | |
4397 | * process has opted into dirty tracking | |
4398 | * active state is based on dirty vs. clean | |
4399 | */ | |
4400 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { | |
4401 | /* | |
4402 | * process is dirty | |
4403 | * implies active state | |
4404 | */ | |
4405 | return TRUE; | |
4406 | } else { | |
4407 | /* | |
4408 | * process is clean | |
4409 | * implies inactive state | |
4410 | */ | |
4411 | return FALSE; | |
d9a64523 | 4412 | } |
cb323159 A |
4413 | } else if (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND) { |
4414 | /* | |
4415 | * process is Foreground or higher | |
4416 | * implies active state | |
4417 | */ | |
4418 | return TRUE; | |
4419 | } else { | |
4420 | /* | |
4421 | * process found below Foreground | |
4422 | * implies inactive state | |
4423 | */ | |
4424 | return FALSE; | |
d9a64523 A |
4425 | } |
4426 | } | |
4427 | ||
cb323159 A |
4428 | static boolean_t |
4429 | memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause, os_reason_t jetsam_reason) | |
0a7de745 | 4430 | { |
cb323159 | 4431 | boolean_t res; |
39037602 | 4432 | |
cb323159 A |
4433 | uint32_t errors = 0; |
4434 | uint64_t memory_reclaimed = 0; | |
b0d623f7 | 4435 | |
cb323159 A |
4436 | if (victim_pid == -1) { |
4437 | /* No pid, so kill first process */ | |
4438 | res = memorystatus_kill_top_process(TRUE, TRUE, cause, jetsam_reason, NULL, &errors, &memory_reclaimed); | |
fe8ab488 | 4439 | } else { |
cb323159 A |
4440 | res = memorystatus_kill_specific_process(victim_pid, cause, jetsam_reason); |
4441 | } | |
39037602 | 4442 | |
cb323159 A |
4443 | if (errors) { |
4444 | memorystatus_clear_errors(); | |
fe8ab488 | 4445 | } |
fe8ab488 | 4446 | |
cb323159 A |
4447 | if (res == TRUE) { |
4448 | /* Fire off snapshot notification */ | |
4449 | proc_list_lock(); | |
4450 | size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + | |
4451 | sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count; | |
4452 | uint64_t timestamp_now = mach_absolute_time(); | |
4453 | memorystatus_jetsam_snapshot->notification_time = timestamp_now; | |
4454 | if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 || | |
4455 | timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) { | |
4456 | proc_list_unlock(); | |
4457 | int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); | |
4458 | if (!ret) { | |
4459 | proc_list_lock(); | |
4460 | memorystatus_jetsam_snapshot_last_timestamp = timestamp_now; | |
4461 | proc_list_unlock(); | |
4462 | } | |
4463 | } else { | |
4464 | proc_list_unlock(); | |
4465 | } | |
39037602 A |
4466 | } |
4467 | ||
cb323159 | 4468 | return res; |
39236c6e A |
4469 | } |
4470 | ||
cb323159 A |
4471 | /* |
4472 | * Jetsam a specific process. | |
4473 | */ | |
4474 | static boolean_t | |
4475 | memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause, os_reason_t jetsam_reason) | |
0a7de745 | 4476 | { |
cb323159 A |
4477 | boolean_t killed; |
4478 | proc_t p; | |
4479 | uint64_t killtime = 0; | |
4480 | uint64_t footprint_of_killed_proc; | |
4481 | clock_sec_t tv_sec; | |
4482 | clock_usec_t tv_usec; | |
4483 | uint32_t tv_msec; | |
4484 | ||
4485 | /* TODO - add a victim queue and push this into the main jetsam thread */ | |
5ba3f43e | 4486 | |
cb323159 A |
4487 | p = proc_find(victim_pid); |
4488 | if (!p) { | |
4489 | os_reason_free(jetsam_reason); | |
4490 | return FALSE; | |
5ba3f43e | 4491 | } |
5ba3f43e | 4492 | |
cb323159 | 4493 | proc_list_lock(); |
6d2010ae | 4494 | |
cb323159 A |
4495 | if (memorystatus_jetsam_snapshot_count == 0) { |
4496 | memorystatus_init_jetsam_snapshot_locked(NULL, 0); | |
4497 | } | |
3e170ce0 | 4498 | |
cb323159 A |
4499 | killtime = mach_absolute_time(); |
4500 | absolutetime_to_microtime(killtime, &tv_sec, &tv_usec); | |
4501 | tv_msec = tv_usec / 1000; | |
3e170ce0 | 4502 | |
cb323159 | 4503 | memorystatus_update_jetsam_snapshot_entry_locked(p, cause, killtime); |
d9a64523 | 4504 | |
cb323159 | 4505 | proc_list_unlock(); |
d9a64523 | 4506 | |
cb323159 | 4507 | killed = memorystatus_do_kill(p, cause, jetsam_reason, &footprint_of_killed_proc); |
d9a64523 | 4508 | |
cb323159 A |
4509 | os_log_with_startup_serial(OS_LOG_DEFAULT, "%lu.%03d memorystatus: killing_specific_process pid %d [%s] (%s %d) %lluKB - memorystatus_available_pages: %llu\n", |
4510 | (unsigned long)tv_sec, tv_msec, victim_pid, ((p && *p->p_name) ? p->p_name : "unknown"), | |
4511 | memorystatus_kill_cause_name[cause], (p ? p->p_memstat_effectivepriority: -1), | |
f427ee49 | 4512 | footprint_of_killed_proc >> 10, (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES); |
cb323159 A |
4513 | |
4514 | proc_rele(p); | |
4515 | ||
4516 | return killed; | |
6d2010ae A |
4517 | } |
4518 | ||
cb323159 A |
4519 | |
4520 | /* | |
4521 | * Toggle the P_MEMSTAT_TERMINATED state. | |
4522 | * Takes the proc_list_lock. | |
4523 | */ | |
4524 | void | |
4525 | proc_memstat_terminated(proc_t p, boolean_t set) | |
d9a64523 | 4526 | { |
cb323159 A |
4527 | #if DEVELOPMENT || DEBUG |
4528 | if (p) { | |
4529 | proc_list_lock(); | |
4530 | if (set == TRUE) { | |
4531 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; | |
4532 | } else { | |
4533 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
4534 | } | |
4535 | proc_list_unlock(); | |
4536 | } | |
4537 | #else | |
4538 | #pragma unused(p, set) | |
d9a64523 | 4539 | /* |
cb323159 | 4540 | * do nothing |
d9a64523 | 4541 | */ |
cb323159 A |
4542 | #endif /* DEVELOPMENT || DEBUG */ |
4543 | return; | |
4544 | } | |
d9a64523 | 4545 | |
d9a64523 | 4546 | |
cb323159 A |
4547 | #if CONFIG_JETSAM |
4548 | /* | |
4549 | * This is invoked when cpulimits have been exceeded while in fatal mode. | |
4550 | * The jetsam_flags do not apply as those are for memory related kills. | |
4551 | * We call this routine so that the offending process is killed with | |
4552 | * a non-zero exit status. | |
4553 | */ | |
4554 | void | |
4555 | jetsam_on_ledger_cpulimit_exceeded(void) | |
4556 | { | |
4557 | int retval = 0; | |
4558 | int jetsam_flags = 0; /* make it obvious */ | |
4559 | proc_t p = current_proc(); | |
4560 | os_reason_t jetsam_reason = OS_REASON_NULL; | |
4561 | ||
4562 | printf("task_exceeded_cpulimit: killing pid %d [%s]\n", | |
4563 | p->p_pid, (*p->p_name ? p->p_name : "(unknown)")); | |
d9a64523 | 4564 | |
cb323159 A |
4565 | jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_CPULIMIT); |
4566 | if (jetsam_reason == OS_REASON_NULL) { | |
4567 | printf("task_exceeded_cpulimit: unable to allocate memory for jetsam reason\n"); | |
d9a64523 A |
4568 | } |
4569 | ||
cb323159 | 4570 | retval = jetsam_do_kill(p, jetsam_flags, jetsam_reason); |
d9a64523 | 4571 | |
cb323159 A |
4572 | if (retval) { |
4573 | printf("task_exceeded_cpulimit: failed to kill current task (exiting?).\n"); | |
d9a64523 | 4574 | } |
cb323159 | 4575 | } |
d9a64523 | 4576 | |
cb323159 | 4577 | #endif /* CONFIG_JETSAM */ |
d9a64523 | 4578 | |
cb323159 A |
4579 | static void |
4580 | memorystatus_get_task_memory_region_count(task_t task, uint64_t *count) | |
4581 | { | |
4582 | assert(task); | |
4583 | assert(count); | |
d9a64523 | 4584 | |
cb323159 A |
4585 | *count = get_task_memory_region_count(task); |
4586 | } | |
d9a64523 | 4587 | |
d9a64523 | 4588 | |
cb323159 A |
4589 | #define MEMORYSTATUS_VM_MAP_FORK_ALLOWED 0x100000000 |
4590 | #define MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED 0x200000000 | |
4591 | ||
4592 | #if DEVELOPMENT || DEBUG | |
d9a64523 | 4593 | |
3e170ce0 | 4594 | /* |
cb323159 A |
4595 | * Sysctl only used to test memorystatus_allowed_vm_map_fork() path. |
4596 | * set a new pidwatch value | |
4597 | * or | |
4598 | * get the current pidwatch value | |
d9a64523 | 4599 | * |
cb323159 A |
4600 | * The pidwatch_val starts out with a PID to watch for in the map_fork path. |
4601 | * Its value is: | |
4602 | * - OR'd with MEMORYSTATUS_VM_MAP_FORK_ALLOWED if we allow the map_fork. | |
4603 | * - OR'd with MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED if we disallow the map_fork. | |
4604 | * - set to -1ull if the map_fork() is aborted for other reasons. | |
3e170ce0 | 4605 | */ |
3e170ce0 | 4606 | |
cb323159 | 4607 | uint64_t memorystatus_vm_map_fork_pidwatch_val = 0; |
3e170ce0 | 4608 | |
cb323159 A |
4609 | static int sysctl_memorystatus_vm_map_fork_pidwatch SYSCTL_HANDLER_ARGS { |
4610 | #pragma unused(oidp, arg1, arg2) | |
3e170ce0 | 4611 | |
cb323159 A |
4612 | uint64_t new_value = 0; |
4613 | uint64_t old_value = 0; | |
4614 | int error = 0; | |
3e170ce0 | 4615 | |
cb323159 A |
4616 | /* |
4617 | * The pid is held in the low 32 bits. | |
4618 | * The 'allowed' flags are in the upper 32 bits. | |
4619 | */ | |
4620 | old_value = memorystatus_vm_map_fork_pidwatch_val; | |
3e170ce0 | 4621 | |
cb323159 A |
4622 | error = sysctl_io_number(req, old_value, sizeof(old_value), &new_value, NULL); |
4623 | ||
4624 | if (error || !req->newptr) { | |
4625 | /* | |
4626 | * No new value passed in. | |
4627 | */ | |
4628 | return error; | |
3e170ce0 A |
4629 | } |
4630 | ||
cb323159 A |
4631 | /* |
4632 | * A new pid was passed in via req->newptr. | |
4633 | * Ignore any attempt to set the higher order bits. | |
4634 | */ | |
4635 | memorystatus_vm_map_fork_pidwatch_val = new_value & 0xFFFFFFFF; | |
4636 | printf("memorystatus: pidwatch old_value = 0x%llx, new_value = 0x%llx \n", old_value, new_value); | |
3e170ce0 | 4637 | |
cb323159 A |
4638 | return error; |
4639 | } | |
3e170ce0 | 4640 | |
cb323159 A |
4641 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_map_fork_pidwatch, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED, |
4642 | 0, 0, sysctl_memorystatus_vm_map_fork_pidwatch, "Q", "get/set pid watched for in vm_map_fork"); | |
3e170ce0 | 4643 | |
3e170ce0 | 4644 | |
cb323159 A |
4645 | /* |
4646 | * Record if a watched process fails to qualify for a vm_map_fork(). | |
4647 | */ | |
4648 | void | |
4649 | memorystatus_abort_vm_map_fork(task_t task) | |
4650 | { | |
4651 | if (memorystatus_vm_map_fork_pidwatch_val != 0) { | |
4652 | proc_t p = get_bsdtask_info(task); | |
4653 | if (p != NULL && memorystatus_vm_map_fork_pidwatch_val == (uint64_t)p->p_pid) { | |
4654 | memorystatus_vm_map_fork_pidwatch_val = -1ull; | |
3e170ce0 | 4655 | } |
cb323159 A |
4656 | } |
4657 | } | |
3e170ce0 | 4658 | |
cb323159 A |
4659 | static void |
4660 | set_vm_map_fork_pidwatch(task_t task, uint64_t x) | |
4661 | { | |
4662 | if (memorystatus_vm_map_fork_pidwatch_val != 0) { | |
4663 | proc_t p = get_bsdtask_info(task); | |
4664 | if (p && (memorystatus_vm_map_fork_pidwatch_val == (uint64_t)p->p_pid)) { | |
4665 | memorystatus_vm_map_fork_pidwatch_val |= x; | |
4666 | } | |
4667 | } | |
4668 | } | |
d9a64523 | 4669 | |
cb323159 | 4670 | #else /* DEVELOPMENT || DEBUG */ |
3e170ce0 | 4671 | |
39037602 | 4672 | |
cb323159 A |
4673 | static void |
4674 | set_vm_map_fork_pidwatch(task_t task, uint64_t x) | |
4675 | { | |
4676 | #pragma unused(task) | |
4677 | #pragma unused(x) | |
4678 | } | |
3e170ce0 | 4679 | |
cb323159 | 4680 | #endif /* DEVELOPMENT || DEBUG */ |
3e170ce0 | 4681 | |
cb323159 A |
4682 | /* |
4683 | * Called during EXC_RESOURCE handling when a process exceeds a soft | |
4684 | * memory limit. This is the corpse fork path and here we decide if | |
4685 | * vm_map_fork will be allowed when creating the corpse. | |
4686 | * The task being considered is suspended. | |
4687 | * | |
4688 | * By default, a vm_map_fork is allowed to proceed. | |
4689 | * | |
4690 | * A few simple policy assumptions: | |
cb323159 | 4691 | * If the device has a zero system-wide task limit, |
f427ee49 A |
4692 | * then the vm_map_fork is allowed. macOS always has a zero |
4693 | * system wide task limit (unless overriden by a boot-arg). | |
cb323159 A |
4694 | * |
4695 | * And if a process's memory footprint calculates less | |
94ff46dc | 4696 | * than or equal to quarter of the system-wide task limit, |
cb323159 A |
4697 | * then the vm_map_fork is allowed. This calculation |
4698 | * is based on the assumption that a process can | |
4699 | * munch memory up to the system-wide task limit. | |
4700 | */ | |
94ff46dc | 4701 | extern boolean_t corpse_threshold_system_limit; |
cb323159 A |
4702 | boolean_t |
4703 | memorystatus_allowed_vm_map_fork(task_t task) | |
4704 | { | |
4705 | boolean_t is_allowed = TRUE; /* default */ | |
d9a64523 | 4706 | |
cb323159 A |
4707 | uint64_t footprint_in_bytes; |
4708 | uint64_t max_allowed_bytes; | |
3e170ce0 | 4709 | |
cb323159 A |
4710 | if (max_task_footprint_mb == 0) { |
4711 | set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_ALLOWED); | |
4712 | return is_allowed; | |
4713 | } | |
3e170ce0 | 4714 | |
cb323159 | 4715 | footprint_in_bytes = get_task_phys_footprint(task); |
3e170ce0 | 4716 | |
cb323159 | 4717 | /* |
94ff46dc | 4718 | * Maximum is 1/4 of the system-wide task limit by default. |
cb323159 A |
4719 | */ |
4720 | max_allowed_bytes = ((uint64_t)max_task_footprint_mb * 1024 * 1024) >> 2; | |
3e170ce0 | 4721 | |
94ff46dc A |
4722 | #if DEBUG || DEVELOPMENT |
4723 | if (corpse_threshold_system_limit) { | |
4724 | max_allowed_bytes = (uint64_t)max_task_footprint_mb * (1UL << 20); | |
4725 | } | |
4726 | #endif /* DEBUG || DEVELOPMENT */ | |
4727 | ||
cb323159 A |
4728 | if (footprint_in_bytes > max_allowed_bytes) { |
4729 | printf("memorystatus disallowed vm_map_fork %lld %lld\n", footprint_in_bytes, max_allowed_bytes); | |
4730 | set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_NOT_ALLOWED); | |
4731 | return !is_allowed; | |
4732 | } | |
3e170ce0 | 4733 | |
cb323159 A |
4734 | set_vm_map_fork_pidwatch(task, MEMORYSTATUS_VM_MAP_FORK_ALLOWED); |
4735 | return is_allowed; | |
4736 | } | |
d9a64523 | 4737 | |
cb323159 A |
4738 | void |
4739 | memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages) | |
4740 | { | |
4741 | assert(task); | |
4742 | assert(footprint); | |
d9a64523 | 4743 | |
cb323159 | 4744 | uint64_t pages; |
d9a64523 | 4745 | |
cb323159 A |
4746 | pages = (get_task_phys_footprint(task) / PAGE_SIZE_64); |
4747 | assert(((uint32_t)pages) == pages); | |
4748 | *footprint = (uint32_t)pages; | |
d9a64523 | 4749 | |
cb323159 A |
4750 | if (max_footprint_lifetime) { |
4751 | pages = (get_task_phys_footprint_lifetime_max(task) / PAGE_SIZE_64); | |
4752 | assert(((uint32_t)pages) == pages); | |
4753 | *max_footprint_lifetime = (uint32_t)pages; | |
4754 | } | |
4755 | if (purgeable_pages) { | |
4756 | pages = (get_task_purgeable_size(task) / PAGE_SIZE_64); | |
4757 | assert(((uint32_t)pages) == pages); | |
4758 | *purgeable_pages = (uint32_t)pages; | |
4759 | } | |
4760 | } | |
d9a64523 | 4761 | |
cb323159 A |
4762 | static void |
4763 | memorystatus_get_task_phys_footprint_page_counts(task_t task, | |
4764 | uint64_t *internal_pages, uint64_t *internal_compressed_pages, | |
4765 | uint64_t *purgeable_nonvolatile_pages, uint64_t *purgeable_nonvolatile_compressed_pages, | |
4766 | uint64_t *alternate_accounting_pages, uint64_t *alternate_accounting_compressed_pages, | |
f427ee49 | 4767 | uint64_t *iokit_mapped_pages, uint64_t *page_table_pages, uint64_t *frozen_to_swap_pages) |
cb323159 A |
4768 | { |
4769 | assert(task); | |
d9a64523 | 4770 | |
cb323159 A |
4771 | if (internal_pages) { |
4772 | *internal_pages = (get_task_internal(task) / PAGE_SIZE_64); | |
4773 | } | |
d9a64523 | 4774 | |
cb323159 A |
4775 | if (internal_compressed_pages) { |
4776 | *internal_compressed_pages = (get_task_internal_compressed(task) / PAGE_SIZE_64); | |
4777 | } | |
d9a64523 | 4778 | |
cb323159 A |
4779 | if (purgeable_nonvolatile_pages) { |
4780 | *purgeable_nonvolatile_pages = (get_task_purgeable_nonvolatile(task) / PAGE_SIZE_64); | |
4781 | } | |
d9a64523 | 4782 | |
cb323159 A |
4783 | if (purgeable_nonvolatile_compressed_pages) { |
4784 | *purgeable_nonvolatile_compressed_pages = (get_task_purgeable_nonvolatile_compressed(task) / PAGE_SIZE_64); | |
4785 | } | |
d9a64523 | 4786 | |
cb323159 A |
4787 | if (alternate_accounting_pages) { |
4788 | *alternate_accounting_pages = (get_task_alternate_accounting(task) / PAGE_SIZE_64); | |
4789 | } | |
d9a64523 | 4790 | |
cb323159 A |
4791 | if (alternate_accounting_compressed_pages) { |
4792 | *alternate_accounting_compressed_pages = (get_task_alternate_accounting_compressed(task) / PAGE_SIZE_64); | |
3e170ce0 A |
4793 | } |
4794 | ||
cb323159 A |
4795 | if (iokit_mapped_pages) { |
4796 | *iokit_mapped_pages = (get_task_iokit_mapped(task) / PAGE_SIZE_64); | |
4797 | } | |
3e170ce0 | 4798 | |
cb323159 A |
4799 | if (page_table_pages) { |
4800 | *page_table_pages = (get_task_page_table(task) / PAGE_SIZE_64); | |
4801 | } | |
f427ee49 A |
4802 | |
4803 | #if CONFIG_FREEZE | |
4804 | if (frozen_to_swap_pages) { | |
4805 | *frozen_to_swap_pages = (get_task_frozen_to_swap(task) / PAGE_SIZE_64); | |
4806 | } | |
4807 | #else /* CONFIG_FREEZE */ | |
4808 | #pragma unused(frozen_to_swap_pages) | |
4809 | #endif /* CONFIG_FREEZE */ | |
4810 | } | |
4811 | ||
4812 | #if CONFIG_FREEZE | |
4813 | /* | |
4814 | * Copies the source entry into the destination snapshot. | |
4815 | * Returns true on success. Fails if the destination snapshot is full. | |
4816 | * Caller must hold the proc list lock. | |
4817 | */ | |
4818 | static bool | |
4819 | memorystatus_jetsam_snapshot_copy_entry_locked(memorystatus_jetsam_snapshot_t *dst_snapshot, unsigned int dst_snapshot_size, const memorystatus_jetsam_snapshot_entry_t *src_entry) | |
4820 | { | |
c3c9b80d | 4821 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED); |
f427ee49 A |
4822 | assert(dst_snapshot); |
4823 | ||
4824 | if (dst_snapshot->entry_count == dst_snapshot_size) { | |
4825 | /* Destination snapshot is full. Can not be updated until it is consumed. */ | |
4826 | return false; | |
4827 | } | |
4828 | if (dst_snapshot->entry_count == 0) { | |
4829 | memorystatus_init_jetsam_snapshot_header(dst_snapshot); | |
4830 | } | |
4831 | memorystatus_jetsam_snapshot_entry_t *dst_entry = &dst_snapshot->entries[dst_snapshot->entry_count++]; | |
4832 | memcpy(dst_entry, src_entry, sizeof(memorystatus_jetsam_snapshot_entry_t)); | |
4833 | return true; | |
4834 | } | |
4835 | #endif /* CONFIG_FREEZE */ | |
4836 | ||
4837 | static bool | |
4838 | memorystatus_init_jetsam_snapshot_entry_with_kill_locked(memorystatus_jetsam_snapshot_t *snapshot, proc_t p, uint32_t kill_cause, uint64_t killtime, memorystatus_jetsam_snapshot_entry_t **entry) | |
4839 | { | |
c3c9b80d | 4840 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED); |
f427ee49 A |
4841 | memorystatus_jetsam_snapshot_entry_t *snapshot_list = snapshot->entries; |
4842 | size_t i = snapshot->entry_count; | |
4843 | ||
4844 | if (memorystatus_init_jetsam_snapshot_entry_locked(p, &snapshot_list[i], (snapshot->js_gencount)) == TRUE) { | |
4845 | *entry = &snapshot_list[i]; | |
4846 | (*entry)->killed = kill_cause; | |
4847 | (*entry)->jse_killtime = killtime; | |
4848 | ||
4849 | snapshot->entry_count = i + 1; | |
4850 | return true; | |
4851 | } | |
4852 | return false; | |
3e170ce0 A |
4853 | } |
4854 | ||
cb323159 A |
4855 | /* |
4856 | * This routine only acts on the global jetsam event snapshot. | |
4857 | * Updating the process's entry can race when the memorystatus_thread | |
4858 | * has chosen to kill a process that is racing to exit on another core. | |
4859 | */ | |
4860 | static void | |
4861 | memorystatus_update_jetsam_snapshot_entry_locked(proc_t p, uint32_t kill_cause, uint64_t killtime) | |
6d2010ae | 4862 | { |
cb323159 A |
4863 | memorystatus_jetsam_snapshot_entry_t *entry = NULL; |
4864 | memorystatus_jetsam_snapshot_t *snapshot = NULL; | |
4865 | memorystatus_jetsam_snapshot_entry_t *snapshot_list = NULL; | |
39236c6e | 4866 | |
cb323159 | 4867 | unsigned int i; |
f427ee49 A |
4868 | #if CONFIG_FREEZE |
4869 | bool copied_to_freezer_snapshot = false; | |
4870 | #endif /* CONFIG_FREEZE */ | |
cb323159 | 4871 | |
c3c9b80d | 4872 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED); |
d9a64523 | 4873 | |
cb323159 | 4874 | if (memorystatus_jetsam_snapshot_count == 0) { |
d9a64523 | 4875 | /* |
cb323159 A |
4876 | * No active snapshot. |
4877 | * Nothing to do. | |
d9a64523 | 4878 | */ |
f427ee49 | 4879 | goto exit; |
d9a64523 A |
4880 | } |
4881 | ||
cb323159 A |
4882 | /* |
4883 | * Sanity check as this routine should only be called | |
4884 | * from a jetsam kill path. | |
4885 | */ | |
4886 | assert(kill_cause != 0 && killtime != 0); | |
6d2010ae | 4887 | |
cb323159 A |
4888 | snapshot = memorystatus_jetsam_snapshot; |
4889 | snapshot_list = memorystatus_jetsam_snapshot->entries; | |
6d2010ae | 4890 | |
cb323159 A |
4891 | for (i = 0; i < memorystatus_jetsam_snapshot_count; i++) { |
4892 | if (snapshot_list[i].pid == p->p_pid) { | |
4893 | entry = &snapshot_list[i]; | |
6d2010ae | 4894 | |
cb323159 A |
4895 | if (entry->killed || entry->jse_killtime) { |
4896 | /* | |
4897 | * We apparently raced on the exit path | |
4898 | * for this process, as it's snapshot entry | |
4899 | * has already recorded a kill. | |
4900 | */ | |
4901 | assert(entry->killed && entry->jse_killtime); | |
4902 | break; | |
d9a64523 A |
4903 | } |
4904 | ||
4905 | /* | |
cb323159 | 4906 | * Update the entry we just found in the snapshot. |
d9a64523 | 4907 | */ |
d9a64523 | 4908 | |
cb323159 A |
4909 | entry->killed = kill_cause; |
4910 | entry->jse_killtime = killtime; | |
4911 | entry->jse_gencount = snapshot->js_gencount; | |
4912 | entry->jse_idle_delta = p->p_memstat_idle_delta; | |
4913 | #if CONFIG_FREEZE | |
4914 | entry->jse_thaw_count = p->p_memstat_thaw_count; | |
2a1bd2d3 | 4915 | entry->jse_freeze_skip_reason = p->p_memstat_freeze_skip_reason; |
cb323159 A |
4916 | #else /* CONFIG_FREEZE */ |
4917 | entry->jse_thaw_count = 0; | |
2a1bd2d3 | 4918 | entry->jse_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone; |
cb323159 | 4919 | #endif /* CONFIG_FREEZE */ |
3e170ce0 | 4920 | |
d9a64523 | 4921 | /* |
cb323159 A |
4922 | * If a process has moved between bands since snapshot was |
4923 | * initialized, then likely these fields changed too. | |
d9a64523 | 4924 | */ |
cb323159 A |
4925 | if (entry->priority != p->p_memstat_effectivepriority) { |
4926 | strlcpy(entry->name, p->p_name, sizeof(entry->name)); | |
4927 | entry->priority = p->p_memstat_effectivepriority; | |
4928 | entry->state = memorystatus_build_state(p); | |
4929 | entry->user_data = p->p_memstat_userdata; | |
4930 | entry->fds = p->p_fd->fd_nfiles; | |
d9a64523 | 4931 | } |
3e170ce0 | 4932 | |
3e170ce0 | 4933 | /* |
cb323159 | 4934 | * Always update the page counts on a kill. |
3e170ce0 | 4935 | */ |
d9a64523 | 4936 | |
cb323159 A |
4937 | uint32_t pages = 0; |
4938 | uint32_t max_pages_lifetime = 0; | |
4939 | uint32_t purgeable_pages = 0; | |
0a7de745 | 4940 | |
cb323159 A |
4941 | memorystatus_get_task_page_counts(p->task, &pages, &max_pages_lifetime, &purgeable_pages); |
4942 | entry->pages = (uint64_t)pages; | |
4943 | entry->max_pages_lifetime = (uint64_t)max_pages_lifetime; | |
4944 | entry->purgeable_pages = (uint64_t)purgeable_pages; | |
d9a64523 | 4945 | |
cb323159 A |
4946 | uint64_t internal_pages = 0; |
4947 | uint64_t internal_compressed_pages = 0; | |
4948 | uint64_t purgeable_nonvolatile_pages = 0; | |
4949 | uint64_t purgeable_nonvolatile_compressed_pages = 0; | |
4950 | uint64_t alternate_accounting_pages = 0; | |
4951 | uint64_t alternate_accounting_compressed_pages = 0; | |
4952 | uint64_t iokit_mapped_pages = 0; | |
4953 | uint64_t page_table_pages = 0; | |
f427ee49 | 4954 | uint64_t frozen_to_swap_pages = 0; |
0a7de745 | 4955 | |
cb323159 A |
4956 | memorystatus_get_task_phys_footprint_page_counts(p->task, &internal_pages, &internal_compressed_pages, |
4957 | &purgeable_nonvolatile_pages, &purgeable_nonvolatile_compressed_pages, | |
4958 | &alternate_accounting_pages, &alternate_accounting_compressed_pages, | |
f427ee49 | 4959 | &iokit_mapped_pages, &page_table_pages, &frozen_to_swap_pages); |
d9a64523 | 4960 | |
cb323159 A |
4961 | entry->jse_internal_pages = internal_pages; |
4962 | entry->jse_internal_compressed_pages = internal_compressed_pages; | |
4963 | entry->jse_purgeable_nonvolatile_pages = purgeable_nonvolatile_pages; | |
4964 | entry->jse_purgeable_nonvolatile_compressed_pages = purgeable_nonvolatile_compressed_pages; | |
4965 | entry->jse_alternate_accounting_pages = alternate_accounting_pages; | |
4966 | entry->jse_alternate_accounting_compressed_pages = alternate_accounting_compressed_pages; | |
4967 | entry->jse_iokit_mapped_pages = iokit_mapped_pages; | |
4968 | entry->jse_page_table_pages = page_table_pages; | |
f427ee49 | 4969 | entry->jse_frozen_to_swap_pages = frozen_to_swap_pages; |
d9a64523 | 4970 | |
cb323159 A |
4971 | uint64_t region_count = 0; |
4972 | memorystatus_get_task_memory_region_count(p->task, ®ion_count); | |
4973 | entry->jse_memory_region_count = region_count; | |
d9a64523 | 4974 | |
cb323159 A |
4975 | goto exit; |
4976 | } | |
4977 | } | |
d9a64523 | 4978 | |
cb323159 A |
4979 | if (entry == NULL) { |
4980 | /* | |
4981 | * The entry was not found in the snapshot, so the process must have | |
4982 | * launched after the snapshot was initialized. | |
4983 | * Let's try to append the new entry. | |
4984 | */ | |
4985 | if (memorystatus_jetsam_snapshot_count < memorystatus_jetsam_snapshot_max) { | |
d9a64523 | 4986 | /* |
cb323159 A |
4987 | * A populated snapshot buffer exists |
4988 | * and there is room to init a new entry. | |
d9a64523 | 4989 | */ |
cb323159 | 4990 | assert(memorystatus_jetsam_snapshot_count == snapshot->entry_count); |
6d2010ae | 4991 | |
f427ee49 A |
4992 | if (memorystatus_init_jetsam_snapshot_entry_with_kill_locked(snapshot, p, kill_cause, killtime, &entry)) { |
4993 | memorystatus_jetsam_snapshot_count++; | |
d9a64523 | 4994 | |
cb323159 | 4995 | if (memorystatus_jetsam_snapshot_count >= memorystatus_jetsam_snapshot_max) { |
d9a64523 | 4996 | /* |
cb323159 A |
4997 | * We just used the last slot in the snapshot buffer. |
4998 | * We only want to log it once... so we do it here | |
4999 | * when we notice we've hit the max. | |
d9a64523 | 5000 | */ |
cb323159 A |
5001 | printf("memorystatus: WARNING snapshot buffer is full, count %d\n", |
5002 | memorystatus_jetsam_snapshot_count); | |
d9a64523 | 5003 | } |
d9a64523 | 5004 | } |
cb323159 A |
5005 | } |
5006 | } | |
d9a64523 | 5007 | |
cb323159 | 5008 | exit: |
f427ee49 A |
5009 | if (entry) { |
5010 | #if CONFIG_FREEZE | |
5011 | if (memorystatus_jetsam_use_freezer_snapshot && isApp(p)) { | |
5012 | /* This is an app kill. Record it in the freezer snapshot so dasd can incorporate this in its recommendations. */ | |
5013 | copied_to_freezer_snapshot = memorystatus_jetsam_snapshot_copy_entry_locked(memorystatus_jetsam_snapshot_freezer, memorystatus_jetsam_snapshot_freezer_max, entry); | |
5014 | if (copied_to_freezer_snapshot && memorystatus_jetsam_snapshot_freezer->entry_count == memorystatus_jetsam_snapshot_freezer_max) { | |
5015 | /* | |
5016 | * We just used the last slot in the freezer snapshot buffer. | |
5017 | * We only want to log it once... so we do it here | |
5018 | * when we notice we've hit the max. | |
5019 | */ | |
5020 | os_log_error(OS_LOG_DEFAULT, "memorystatus: WARNING freezer snapshot buffer is full, count %zu", | |
5021 | memorystatus_jetsam_snapshot_freezer->entry_count); | |
5022 | } | |
5023 | } | |
5024 | #endif /* CONFIG_FREEZE */ | |
5025 | } else { | |
cb323159 A |
5026 | /* |
5027 | * If we reach here, the snapshot buffer could not be updated. | |
5028 | * Most likely, the buffer is full, in which case we would have | |
5029 | * logged a warning in the previous call. | |
5030 | * | |
5031 | * For now, we will stop appending snapshot entries. | |
5032 | * When the buffer is consumed, the snapshot state will reset. | |
5033 | */ | |
d9a64523 | 5034 | |
cb323159 A |
5035 | MEMORYSTATUS_DEBUG(4, "memorystatus_update_jetsam_snapshot_entry_locked: failed to update pid %d, priority %d, count %d\n", |
5036 | p->p_pid, p->p_memstat_effectivepriority, memorystatus_jetsam_snapshot_count); | |
f427ee49 A |
5037 | |
5038 | #if CONFIG_FREEZE | |
5039 | /* We still attempt to record this in the freezer snapshot */ | |
5040 | if (memorystatus_jetsam_use_freezer_snapshot && isApp(p)) { | |
5041 | snapshot = memorystatus_jetsam_snapshot_freezer; | |
5042 | if (snapshot->entry_count < memorystatus_jetsam_snapshot_freezer_max) { | |
5043 | copied_to_freezer_snapshot = memorystatus_init_jetsam_snapshot_entry_with_kill_locked(snapshot, p, kill_cause, killtime, &entry); | |
5044 | if (copied_to_freezer_snapshot && memorystatus_jetsam_snapshot_freezer->entry_count == memorystatus_jetsam_snapshot_freezer_max) { | |
5045 | /* | |
5046 | * We just used the last slot in the freezer snapshot buffer. | |
5047 | * We only want to log it once... so we do it here | |
5048 | * when we notice we've hit the max. | |
5049 | */ | |
5050 | os_log_error(OS_LOG_DEFAULT, "memorystatus: WARNING freezer snapshot buffer is full, count %zu", | |
5051 | memorystatus_jetsam_snapshot_freezer->entry_count); | |
5052 | } | |
5053 | } | |
5054 | } | |
5055 | #endif /* CONFIG_FREEZE */ | |
cb323159 | 5056 | } |
d9a64523 | 5057 | |
cb323159 A |
5058 | return; |
5059 | } | |
d9a64523 | 5060 | |
cb323159 A |
5061 | #if CONFIG_JETSAM |
5062 | void | |
5063 | memorystatus_pages_update(unsigned int pages_avail) | |
5064 | { | |
5065 | memorystatus_available_pages = pages_avail; | |
d9a64523 | 5066 | |
cb323159 A |
5067 | #if VM_PRESSURE_EVENTS |
5068 | /* | |
5069 | * Since memorystatus_available_pages changes, we should | |
5070 | * re-evaluate the pressure levels on the system and | |
5071 | * check if we need to wake the pressure thread. | |
5072 | * We also update memorystatus_level in that routine. | |
5073 | */ | |
5074 | vm_pressure_response(); | |
d9a64523 | 5075 | |
cb323159 A |
5076 | if (memorystatus_available_pages <= memorystatus_available_pages_pressure) { |
5077 | if (memorystatus_hwm_candidates || (memorystatus_available_pages <= memorystatus_available_pages_critical)) { | |
5078 | memorystatus_thread_wake(); | |
316670eb | 5079 | } |
d9a64523 | 5080 | } |
cb323159 A |
5081 | #if CONFIG_FREEZE |
5082 | /* | |
5083 | * We can't grab the freezer_mutex here even though that synchronization would be correct to inspect | |
5084 | * the # of frozen processes and wakeup the freezer thread. Reason being that we come here into this | |
5085 | * code with (possibly) the page-queue locks held and preemption disabled. So trying to grab a mutex here | |
5086 | * will result in the "mutex with preemption disabled" panic. | |
5087 | */ | |
d9a64523 | 5088 | |
cb323159 | 5089 | if (memorystatus_freeze_thread_should_run() == TRUE) { |
d9a64523 | 5090 | /* |
cb323159 A |
5091 | * The freezer thread is usually woken up by some user-space call i.e. pid_hibernate(any process). |
5092 | * That trigger isn't invoked often enough and so we are enabling this explicit wakeup here. | |
d9a64523 | 5093 | */ |
cb323159 A |
5094 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { |
5095 | thread_wakeup((event_t)&memorystatus_freeze_wakeup); | |
5096 | } | |
5097 | } | |
5098 | #endif /* CONFIG_FREEZE */ | |
d9a64523 | 5099 | |
cb323159 | 5100 | #else /* VM_PRESSURE_EVENTS */ |
d9a64523 | 5101 | |
cb323159 | 5102 | boolean_t critical, delta; |
d9a64523 | 5103 | |
cb323159 A |
5104 | if (!memorystatus_delta) { |
5105 | return; | |
6d2010ae | 5106 | } |
0a7de745 | 5107 | |
cb323159 A |
5108 | critical = (pages_avail < memorystatus_available_pages_critical) ? TRUE : FALSE; |
5109 | delta = ((pages_avail >= (memorystatus_available_pages + memorystatus_delta)) | |
5110 | || (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) ? TRUE : FALSE; | |
0a7de745 | 5111 | |
cb323159 A |
5112 | if (critical || delta) { |
5113 | unsigned int total_pages; | |
5114 | ||
5115 | total_pages = (unsigned int) atop_64(max_mem); | |
5116 | #if CONFIG_SECLUDED_MEMORY | |
5117 | total_pages -= vm_page_secluded_count; | |
5118 | #endif /* CONFIG_SECLUDED_MEMORY */ | |
5119 | memorystatus_level = memorystatus_available_pages * 100 / total_pages; | |
5120 | memorystatus_thread_wake(); | |
5121 | } | |
5122 | #endif /* VM_PRESSURE_EVENTS */ | |
6d2010ae | 5123 | } |
cb323159 | 5124 | #endif /* CONFIG_JETSAM */ |
6d2010ae | 5125 | |
cb323159 A |
5126 | static boolean_t |
5127 | memorystatus_init_jetsam_snapshot_entry_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry, uint64_t gencount) | |
6d2010ae | 5128 | { |
cb323159 A |
5129 | clock_sec_t tv_sec; |
5130 | clock_usec_t tv_usec; | |
5131 | uint32_t pages = 0; | |
5132 | uint32_t max_pages_lifetime = 0; | |
5133 | uint32_t purgeable_pages = 0; | |
5134 | uint64_t internal_pages = 0; | |
5135 | uint64_t internal_compressed_pages = 0; | |
5136 | uint64_t purgeable_nonvolatile_pages = 0; | |
5137 | uint64_t purgeable_nonvolatile_compressed_pages = 0; | |
5138 | uint64_t alternate_accounting_pages = 0; | |
5139 | uint64_t alternate_accounting_compressed_pages = 0; | |
5140 | uint64_t iokit_mapped_pages = 0; | |
5141 | uint64_t page_table_pages = 0; | |
f427ee49 | 5142 | uint64_t frozen_to_swap_pages = 0; |
cb323159 A |
5143 | uint64_t region_count = 0; |
5144 | uint64_t cids[COALITION_NUM_TYPES]; | |
0a7de745 | 5145 | |
cb323159 | 5146 | memset(entry, 0, sizeof(memorystatus_jetsam_snapshot_entry_t)); |
6d2010ae | 5147 | |
cb323159 A |
5148 | entry->pid = p->p_pid; |
5149 | strlcpy(&entry->name[0], p->p_name, sizeof(entry->name)); | |
5150 | entry->priority = p->p_memstat_effectivepriority; | |
0a7de745 | 5151 | |
cb323159 A |
5152 | memorystatus_get_task_page_counts(p->task, &pages, &max_pages_lifetime, &purgeable_pages); |
5153 | entry->pages = (uint64_t)pages; | |
5154 | entry->max_pages_lifetime = (uint64_t)max_pages_lifetime; | |
5155 | entry->purgeable_pages = (uint64_t)purgeable_pages; | |
0a7de745 | 5156 | |
cb323159 A |
5157 | memorystatus_get_task_phys_footprint_page_counts(p->task, &internal_pages, &internal_compressed_pages, |
5158 | &purgeable_nonvolatile_pages, &purgeable_nonvolatile_compressed_pages, | |
5159 | &alternate_accounting_pages, &alternate_accounting_compressed_pages, | |
f427ee49 | 5160 | &iokit_mapped_pages, &page_table_pages, &frozen_to_swap_pages); |
0a7de745 | 5161 | |
cb323159 A |
5162 | entry->jse_internal_pages = internal_pages; |
5163 | entry->jse_internal_compressed_pages = internal_compressed_pages; | |
5164 | entry->jse_purgeable_nonvolatile_pages = purgeable_nonvolatile_pages; | |
5165 | entry->jse_purgeable_nonvolatile_compressed_pages = purgeable_nonvolatile_compressed_pages; | |
5166 | entry->jse_alternate_accounting_pages = alternate_accounting_pages; | |
5167 | entry->jse_alternate_accounting_compressed_pages = alternate_accounting_compressed_pages; | |
5168 | entry->jse_iokit_mapped_pages = iokit_mapped_pages; | |
5169 | entry->jse_page_table_pages = page_table_pages; | |
f427ee49 | 5170 | entry->jse_frozen_to_swap_pages = frozen_to_swap_pages; |
6d2010ae | 5171 | |
cb323159 A |
5172 | memorystatus_get_task_memory_region_count(p->task, ®ion_count); |
5173 | entry->jse_memory_region_count = region_count; | |
3e170ce0 | 5174 | |
cb323159 A |
5175 | entry->state = memorystatus_build_state(p); |
5176 | entry->user_data = p->p_memstat_userdata; | |
5177 | memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid)); | |
5178 | entry->fds = p->p_fd->fd_nfiles; | |
0a7de745 | 5179 | |
cb323159 A |
5180 | absolutetime_to_microtime(get_task_cpu_time(p->task), &tv_sec, &tv_usec); |
5181 | entry->cpu_time.tv_sec = (int64_t)tv_sec; | |
5182 | entry->cpu_time.tv_usec = (int64_t)tv_usec; | |
6d2010ae | 5183 | |
cb323159 A |
5184 | assert(p->p_stats != NULL); |
5185 | entry->jse_starttime = p->p_stats->ps_start; /* abstime process started */ | |
5186 | entry->jse_killtime = 0; /* abstime jetsam chose to kill process */ | |
5187 | entry->killed = 0; /* the jetsam kill cause */ | |
5188 | entry->jse_gencount = gencount; /* indicates a pass through jetsam thread, when process was targeted to be killed */ | |
3e170ce0 | 5189 | |
cb323159 | 5190 | entry->jse_idle_delta = p->p_memstat_idle_delta; /* Most recent timespan spent in idle-band */ |
3e170ce0 | 5191 | |
cb323159 | 5192 | #if CONFIG_FREEZE |
2a1bd2d3 | 5193 | entry->jse_freeze_skip_reason = p->p_memstat_freeze_skip_reason; |
cb323159 A |
5194 | entry->jse_thaw_count = p->p_memstat_thaw_count; |
5195 | #else /* CONFIG_FREEZE */ | |
5196 | entry->jse_thaw_count = 0; | |
2a1bd2d3 | 5197 | entry->jse_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone; |
cb323159 A |
5198 | #endif /* CONFIG_FREEZE */ |
5199 | ||
5200 | proc_coalitionids(p, cids); | |
5201 | entry->jse_coalition_jetsam_id = cids[COALITION_TYPE_JETSAM]; | |
5202 | ||
5203 | return TRUE; | |
5204 | } | |
5205 | ||
5206 | static void | |
5207 | memorystatus_init_snapshot_vmstats(memorystatus_jetsam_snapshot_t *snapshot) | |
5208 | { | |
5209 | kern_return_t kr = KERN_SUCCESS; | |
5210 | mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; | |
5211 | vm_statistics64_data_t vm_stat; | |
5212 | ||
5213 | if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count)) != KERN_SUCCESS) { | |
5214 | printf("memorystatus_init_jetsam_snapshot_stats: host_statistics64 failed with %d\n", kr); | |
5215 | memset(&snapshot->stats, 0, sizeof(snapshot->stats)); | |
3e170ce0 | 5216 | } else { |
cb323159 A |
5217 | snapshot->stats.free_pages = vm_stat.free_count; |
5218 | snapshot->stats.active_pages = vm_stat.active_count; | |
5219 | snapshot->stats.inactive_pages = vm_stat.inactive_count; | |
5220 | snapshot->stats.throttled_pages = vm_stat.throttled_count; | |
5221 | snapshot->stats.purgeable_pages = vm_stat.purgeable_count; | |
5222 | snapshot->stats.wired_pages = vm_stat.wire_count; | |
3e170ce0 | 5223 | |
cb323159 A |
5224 | snapshot->stats.speculative_pages = vm_stat.speculative_count; |
5225 | snapshot->stats.filebacked_pages = vm_stat.external_page_count; | |
5226 | snapshot->stats.anonymous_pages = vm_stat.internal_page_count; | |
5227 | snapshot->stats.compressions = vm_stat.compressions; | |
5228 | snapshot->stats.decompressions = vm_stat.decompressions; | |
5229 | snapshot->stats.compressor_pages = vm_stat.compressor_page_count; | |
5230 | snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor; | |
6d2010ae | 5231 | } |
0a7de745 | 5232 | |
cb323159 A |
5233 | get_zone_map_size(&snapshot->stats.zone_map_size, &snapshot->stats.zone_map_capacity); |
5234 | ||
5235 | bzero(snapshot->stats.largest_zone_name, sizeof(snapshot->stats.largest_zone_name)); | |
5236 | get_largest_zone_info(snapshot->stats.largest_zone_name, sizeof(snapshot->stats.largest_zone_name), | |
5237 | &snapshot->stats.largest_zone_size); | |
6d2010ae A |
5238 | } |
5239 | ||
d9a64523 | 5240 | /* |
cb323159 A |
5241 | * Collect vm statistics at boot. |
5242 | * Called only once (see kern_exec.c) | |
5243 | * Data can be consumed at any time. | |
d9a64523 | 5244 | */ |
cb323159 A |
5245 | void |
5246 | memorystatus_init_at_boot_snapshot() | |
5247 | { | |
5248 | memorystatus_init_snapshot_vmstats(&memorystatus_at_boot_snapshot); | |
5249 | memorystatus_at_boot_snapshot.entry_count = 0; | |
5250 | memorystatus_at_boot_snapshot.notification_time = 0; /* updated when consumed */ | |
5251 | memorystatus_at_boot_snapshot.snapshot_time = mach_absolute_time(); | |
5252 | } | |
5253 | ||
f427ee49 A |
5254 | static void |
5255 | memorystatus_init_jetsam_snapshot_header(memorystatus_jetsam_snapshot_t *snapshot) | |
5256 | { | |
5257 | memorystatus_init_snapshot_vmstats(snapshot); | |
5258 | snapshot->snapshot_time = mach_absolute_time(); | |
5259 | snapshot->notification_time = 0; | |
5260 | snapshot->js_gencount = 0; | |
5261 | } | |
5262 | ||
6d2010ae | 5263 | static void |
cb323159 | 5264 | memorystatus_init_jetsam_snapshot_locked(memorystatus_jetsam_snapshot_t *od_snapshot, uint32_t ods_list_count ) |
6d2010ae | 5265 | { |
cb323159 A |
5266 | proc_t p, next_p; |
5267 | unsigned int b = 0, i = 0; | |
d9a64523 | 5268 | |
cb323159 A |
5269 | memorystatus_jetsam_snapshot_t *snapshot = NULL; |
5270 | memorystatus_jetsam_snapshot_entry_t *snapshot_list = NULL; | |
5271 | unsigned int snapshot_max = 0; | |
5272 | ||
c3c9b80d | 5273 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED); |
d9a64523 | 5274 | |
cb323159 | 5275 | if (od_snapshot) { |
d9a64523 | 5276 | /* |
cb323159 | 5277 | * This is an on_demand snapshot |
d9a64523 | 5278 | */ |
cb323159 A |
5279 | snapshot = od_snapshot; |
5280 | snapshot_list = od_snapshot->entries; | |
5281 | snapshot_max = ods_list_count; | |
5282 | } else { | |
5283 | /* | |
5284 | * This is a jetsam event snapshot | |
5285 | */ | |
5286 | snapshot = memorystatus_jetsam_snapshot; | |
5287 | snapshot_list = memorystatus_jetsam_snapshot->entries; | |
5288 | snapshot_max = memorystatus_jetsam_snapshot_max; | |
d9a64523 A |
5289 | } |
5290 | ||
f427ee49 | 5291 | memorystatus_init_jetsam_snapshot_header(snapshot); |
cb323159 A |
5292 | |
5293 | next_p = memorystatus_get_first_proc_locked(&b, TRUE); | |
d9a64523 | 5294 | while (next_p) { |
d9a64523 | 5295 | p = next_p; |
cb323159 | 5296 | next_p = memorystatus_get_next_proc_locked(&b, p, TRUE); |
d9a64523 | 5297 | |
cb323159 | 5298 | if (FALSE == memorystatus_init_jetsam_snapshot_entry_locked(p, &snapshot_list[i], snapshot->js_gencount)) { |
d9a64523 | 5299 | continue; |
6d2010ae | 5300 | } |
316670eb | 5301 | |
cb323159 A |
5302 | MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", |
5303 | p->p_pid, | |
5304 | p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7], | |
5305 | p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]); | |
d9a64523 | 5306 | |
cb323159 | 5307 | if (++i == snapshot_max) { |
d9a64523 A |
5308 | break; |
5309 | } | |
5310 | } | |
5311 | ||
cb323159 A |
5312 | snapshot->entry_count = i; |
5313 | ||
5314 | if (!od_snapshot) { | |
5315 | /* update the system buffer count */ | |
5316 | memorystatus_jetsam_snapshot_count = i; | |
5317 | } | |
6d2010ae A |
5318 | } |
5319 | ||
cb323159 | 5320 | #if DEVELOPMENT || DEBUG |
d9a64523 | 5321 | |
cb323159 A |
5322 | #if CONFIG_JETSAM |
5323 | static int | |
f427ee49 | 5324 | memorystatus_cmd_set_panic_bits(user_addr_t buffer, size_t buffer_size) |
6d2010ae | 5325 | { |
cb323159 A |
5326 | int ret; |
5327 | memorystatus_jetsam_panic_options_t debug; | |
d9a64523 | 5328 | |
cb323159 A |
5329 | if (buffer_size != sizeof(memorystatus_jetsam_panic_options_t)) { |
5330 | return EINVAL; | |
5331 | } | |
6d2010ae | 5332 | |
cb323159 A |
5333 | ret = copyin(buffer, &debug, buffer_size); |
5334 | if (ret) { | |
5335 | return ret; | |
d9a64523 | 5336 | } |
6d2010ae | 5337 | |
cb323159 A |
5338 | /* Panic bits match kMemorystatusKilled* enum */ |
5339 | memorystatus_jetsam_panic_debug = (memorystatus_jetsam_panic_debug & ~debug.mask) | (debug.data & debug.mask); | |
5340 | ||
5341 | /* Copyout new value */ | |
5342 | debug.data = memorystatus_jetsam_panic_debug; | |
5343 | ret = copyout(&debug, buffer, sizeof(memorystatus_jetsam_panic_options_t)); | |
5344 | ||
5345 | return ret; | |
5346 | } | |
5347 | #endif /* CONFIG_JETSAM */ | |
6d2010ae | 5348 | |
f427ee49 A |
5349 | /* |
5350 | * Verify that the given bucket has been sorted correctly. | |
5351 | * | |
5352 | * Walks through the bucket and verifies that all pids in the | |
5353 | * expected_order buffer are in that bucket and in the same | |
5354 | * relative order. | |
5355 | * | |
5356 | * The proc_list_lock must be held by the caller. | |
5357 | */ | |
5358 | static int | |
5359 | memorystatus_verify_sort_order(unsigned int bucket_index, pid_t *expected_order, size_t num_pids) | |
5360 | { | |
c3c9b80d | 5361 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED); |
f427ee49 A |
5362 | |
5363 | int error = 0; | |
5364 | proc_t p = NULL; | |
5365 | size_t i = 0; | |
5366 | ||
5367 | /* | |
5368 | * NB: We allow other procs to be mixed in within the expected ones. | |
5369 | * We just need the expected procs to be in the right order relative to each other. | |
5370 | */ | |
5371 | p = memorystatus_get_first_proc_locked(&bucket_index, FALSE); | |
5372 | while (p) { | |
5373 | if (p->p_pid == expected_order[i]) { | |
5374 | i++; | |
5375 | } | |
5376 | if (i == num_pids) { | |
5377 | break; | |
5378 | } | |
5379 | p = memorystatus_get_next_proc_locked(&bucket_index, p, FALSE); | |
5380 | } | |
5381 | if (i != num_pids) { | |
5382 | char buffer[128]; | |
5383 | size_t len = sizeof(buffer); | |
5384 | size_t buffer_idx = 0; | |
5385 | os_log_error(OS_LOG_DEFAULT, "memorystatus_verify_sort_order: Processes in bucket %d were not sorted properly\n", bucket_index); | |
5386 | for (i = 0; i < num_pids; i++) { | |
5387 | int num_written = snprintf(buffer + buffer_idx, len - buffer_idx, "%d,", expected_order[i]); | |
5388 | if (num_written <= 0) { | |
5389 | break; | |
5390 | } | |
5391 | if (buffer_idx + (unsigned int) num_written >= len) { | |
5392 | break; | |
5393 | } | |
5394 | buffer_idx += num_written; | |
5395 | } | |
5396 | os_log_error(OS_LOG_DEFAULT, "memorystatus_verify_sort_order: Expected order [%s]", buffer); | |
5397 | memset(buffer, 0, len); | |
5398 | buffer_idx = 0; | |
5399 | p = memorystatus_get_first_proc_locked(&bucket_index, FALSE); | |
5400 | i = 0; | |
5401 | os_log_error(OS_LOG_DEFAULT, "memorystatus_verify_sort_order: Actual order:"); | |
5402 | while (p) { | |
5403 | int num_written; | |
5404 | if (buffer_idx == 0) { | |
5405 | num_written = snprintf(buffer + buffer_idx, len - buffer_idx, "%zu: %d,", i, p->p_pid); | |
5406 | } else { | |
5407 | num_written = snprintf(buffer + buffer_idx, len - buffer_idx, "%d,", p->p_pid); | |
5408 | } | |
5409 | if (num_written <= 0) { | |
5410 | break; | |
5411 | } | |
5412 | buffer_idx += (unsigned int) num_written; | |
5413 | assert(buffer_idx <= len); | |
5414 | if (i % 10 == 0) { | |
5415 | os_log_error(OS_LOG_DEFAULT, "memorystatus_verify_sort_order: %s", buffer); | |
5416 | buffer_idx = 0; | |
5417 | } | |
5418 | p = memorystatus_get_next_proc_locked(&bucket_index, p, FALSE); | |
5419 | i++; | |
5420 | } | |
5421 | if (buffer_idx != 0) { | |
5422 | os_log_error(OS_LOG_DEFAULT, "memorystatus_verify_sort_order: %s", buffer); | |
5423 | } | |
5424 | error = EINVAL; | |
5425 | } | |
5426 | return error; | |
5427 | } | |
5428 | ||
cb323159 A |
5429 | /* |
5430 | * Triggers a sort_order on a specified jetsam priority band. | |
5431 | * This is for testing only, used to force a path through the sort | |
5432 | * function. | |
5433 | */ | |
5434 | static int | |
f427ee49 A |
5435 | memorystatus_cmd_test_jetsam_sort(int priority, |
5436 | int sort_order, | |
5437 | user_addr_t expected_order_user, | |
5438 | size_t expected_order_user_len) | |
cb323159 A |
5439 | { |
5440 | int error = 0; | |
cb323159 | 5441 | unsigned int bucket_index = 0; |
f427ee49 A |
5442 | static size_t kMaxPids = 8; |
5443 | pid_t expected_order[kMaxPids]; | |
5444 | size_t copy_size = sizeof(expected_order); | |
5445 | size_t num_pids; | |
5446 | ||
5447 | if (expected_order_user_len < copy_size) { | |
5448 | copy_size = expected_order_user_len; | |
5449 | } | |
5450 | num_pids = copy_size / sizeof(pid_t); | |
5451 | ||
5452 | error = copyin(expected_order_user, expected_order, copy_size); | |
5453 | if (error != 0) { | |
5454 | return error; | |
5455 | } | |
d9a64523 | 5456 | |
cb323159 A |
5457 | if (priority == -1) { |
5458 | /* Use as shorthand for default priority */ | |
5459 | bucket_index = JETSAM_PRIORITY_DEFAULT; | |
5460 | } else { | |
5461 | bucket_index = (unsigned int)priority; | |
d9a64523 A |
5462 | } |
5463 | ||
f427ee49 A |
5464 | /* |
5465 | * Acquire lock before sorting so we can check the sort order | |
5466 | * while still holding the lock. | |
5467 | */ | |
5468 | proc_list_lock(); | |
5469 | ||
5470 | memorystatus_sort_bucket_locked(bucket_index, sort_order); | |
5471 | ||
5472 | if (expected_order_user != CAST_USER_ADDR_T(NULL) && expected_order_user_len > 0) { | |
5473 | error = memorystatus_verify_sort_order(bucket_index, expected_order, num_pids); | |
5474 | } | |
5475 | ||
5476 | proc_list_unlock(); | |
d9a64523 | 5477 | |
cb323159 A |
5478 | return error; |
5479 | } | |
d9a64523 | 5480 | |
cb323159 | 5481 | #endif /* DEVELOPMENT || DEBUG */ |
d9a64523 | 5482 | |
cb323159 A |
5483 | /* |
5484 | * Prepare the process to be killed (set state, update snapshot) and kill it. | |
5485 | */ | |
5486 | static uint64_t memorystatus_purge_before_jetsam_success = 0; | |
d9a64523 | 5487 | |
cb323159 A |
5488 | static boolean_t |
5489 | memorystatus_kill_proc(proc_t p, uint32_t cause, os_reason_t jetsam_reason, boolean_t *killed, uint64_t *footprint_of_killed_proc) | |
5490 | { | |
5491 | pid_t aPid = 0; | |
5492 | uint32_t aPid_ep = 0; | |
d9a64523 | 5493 | |
cb323159 A |
5494 | uint64_t killtime = 0; |
5495 | clock_sec_t tv_sec; | |
5496 | clock_usec_t tv_usec; | |
5497 | uint32_t tv_msec; | |
5498 | boolean_t retval = FALSE; | |
d9a64523 | 5499 | |
cb323159 A |
5500 | aPid = p->p_pid; |
5501 | aPid_ep = p->p_memstat_effectivepriority; | |
d9a64523 | 5502 | |
cb323159 | 5503 | if (cause != kMemorystatusKilledVnodes && cause != kMemorystatusKilledZoneMapExhaustion) { |
d9a64523 | 5504 | /* |
cb323159 | 5505 | * Genuine memory pressure and not other (vnode/zone) resource exhaustion. |
0a7de745 | 5506 | */ |
cb323159 A |
5507 | boolean_t success = FALSE; |
5508 | uint64_t num_pages_purged; | |
5509 | uint64_t num_pages_reclaimed = 0; | |
5510 | uint64_t num_pages_unsecluded = 0; | |
d9a64523 | 5511 | |
cb323159 A |
5512 | networking_memstatus_callout(p, cause); |
5513 | num_pages_purged = vm_purgeable_purge_task_owned(p->task); | |
5514 | num_pages_reclaimed += num_pages_purged; | |
5515 | #if CONFIG_SECLUDED_MEMORY | |
5516 | if (cause == kMemorystatusKilledVMPageShortage && | |
5517 | vm_page_secluded_count > 0 && | |
5518 | task_can_use_secluded_mem(p->task, FALSE)) { | |
d9a64523 | 5519 | /* |
cb323159 A |
5520 | * We're about to kill a process that has access |
5521 | * to the secluded pool. Drain that pool into the | |
5522 | * free or active queues to make these pages re-appear | |
5523 | * as "available", which might make us no longer need | |
5524 | * to kill that process. | |
5525 | * Since the secluded pool does not get refilled while | |
5526 | * a process has access to it, it should remain | |
5527 | * drained. | |
d9a64523 | 5528 | */ |
cb323159 A |
5529 | num_pages_unsecluded = vm_page_secluded_drain(); |
5530 | num_pages_reclaimed += num_pages_unsecluded; | |
d9a64523 | 5531 | } |
cb323159 | 5532 | #endif /* CONFIG_SECLUDED_MEMORY */ |
d9a64523 | 5533 | |
cb323159 A |
5534 | if (num_pages_reclaimed) { |
5535 | /* | |
5536 | * We actually reclaimed something and so let's | |
5537 | * check if we need to continue with the kill. | |
5538 | */ | |
5539 | if (cause == kMemorystatusKilledHiwat) { | |
5540 | uint64_t footprint_in_bytes = get_task_phys_footprint(p->task); | |
5541 | uint64_t memlimit_in_bytes = (((uint64_t)p->p_memstat_memlimit) * 1024ULL * 1024ULL); /* convert MB to bytes */ | |
5542 | success = (footprint_in_bytes <= memlimit_in_bytes); | |
d9a64523 | 5543 | } else { |
cb323159 A |
5544 | success = (memorystatus_avail_pages_below_pressure() == FALSE); |
5545 | #if CONFIG_SECLUDED_MEMORY | |
5546 | if (!success && num_pages_unsecluded) { | |
d9a64523 | 5547 | /* |
cb323159 A |
5548 | * We just drained the secluded pool |
5549 | * because we're about to kill a | |
5550 | * process that has access to it. | |
5551 | * This is an important process and | |
5552 | * we'd rather not kill it unless | |
5553 | * absolutely necessary, so declare | |
5554 | * success even if draining the pool | |
5555 | * did not quite get us out of the | |
5556 | * "pressure" level but still got | |
5557 | * us out of the "critical" level. | |
d9a64523 | 5558 | */ |
cb323159 A |
5559 | success = (memorystatus_avail_pages_below_critical() == FALSE); |
5560 | } | |
5561 | #endif /* CONFIG_SECLUDED_MEMORY */ | |
5562 | } | |
d9a64523 | 5563 | |
cb323159 A |
5564 | if (success) { |
5565 | memorystatus_purge_before_jetsam_success++; | |
d9a64523 | 5566 | |
cb323159 A |
5567 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: reclaimed %llu pages (%llu purged, %llu unsecluded) from pid %d [%s] and avoided %s\n", |
5568 | num_pages_reclaimed, num_pages_purged, num_pages_unsecluded, aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_kill_cause_name[cause]); | |
d9a64523 | 5569 | |
cb323159 | 5570 | *killed = FALSE; |
d9a64523 | 5571 | |
cb323159 | 5572 | return TRUE; |
d9a64523 A |
5573 | } |
5574 | } | |
5575 | } | |
5576 | ||
cb323159 A |
5577 | #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) |
5578 | MEMORYSTATUS_DEBUG(1, "jetsam: killing pid %d [%s] - %lld Mb > 1 (%d Mb)\n", | |
5579 | aPid, (*p->p_name ? p->p_name : "unknown"), | |
5580 | (footprint_in_bytes / (1024ULL * 1024ULL)), /* converted bytes to MB */ | |
5581 | p->p_memstat_memlimit); | |
5582 | #endif /* CONFIG_JETSAM && (DEVELOPMENT || DEBUG) */ | |
6d2010ae | 5583 | |
cb323159 A |
5584 | killtime = mach_absolute_time(); |
5585 | absolutetime_to_microtime(killtime, &tv_sec, &tv_usec); | |
5586 | tv_msec = tv_usec / 1000; | |
3e170ce0 | 5587 | |
cb323159 A |
5588 | proc_list_lock(); |
5589 | memorystatus_update_jetsam_snapshot_entry_locked(p, cause, killtime); | |
5590 | proc_list_unlock(); | |
d9a64523 | 5591 | |
cb323159 | 5592 | char kill_reason_string[128]; |
d9a64523 | 5593 | |
cb323159 A |
5594 | if (cause == kMemorystatusKilledHiwat) { |
5595 | strlcpy(kill_reason_string, "killing_highwater_process", 128); | |
5596 | } else { | |
5597 | if (aPid_ep == JETSAM_PRIORITY_IDLE) { | |
5598 | strlcpy(kill_reason_string, "killing_idle_process", 128); | |
5599 | } else { | |
5600 | strlcpy(kill_reason_string, "killing_top_process", 128); | |
316670eb A |
5601 | } |
5602 | } | |
d9a64523 A |
5603 | |
5604 | /* | |
cb323159 A |
5605 | * memorystatus_do_kill drops a reference, so take another one so we can |
5606 | * continue to use this exit reason even after memorystatus_do_kill() | |
5607 | * returns | |
d9a64523 | 5608 | */ |
cb323159 | 5609 | os_reason_ref(jetsam_reason); |
6d2010ae | 5610 | |
cb323159 A |
5611 | retval = memorystatus_do_kill(p, cause, jetsam_reason, footprint_of_killed_proc); |
5612 | *killed = retval; | |
d9a64523 | 5613 | |
cb323159 A |
5614 | os_log_with_startup_serial(OS_LOG_DEFAULT, "%lu.%03d memorystatus: %s pid %d [%s] (%s %d) %lluKB - memorystatus_available_pages: %llu", |
5615 | (unsigned long)tv_sec, tv_msec, kill_reason_string, | |
5616 | aPid, ((p && *p->p_name) ? p->p_name : "unknown"), | |
5617 | memorystatus_kill_cause_name[cause], aPid_ep, | |
f427ee49 | 5618 | (*footprint_of_killed_proc) >> 10, (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES); |
cb323159 A |
5619 | |
5620 | return retval; | |
316670eb A |
5621 | } |
5622 | ||
cb323159 A |
5623 | /* |
5624 | * Jetsam the first process in the queue. | |
5625 | */ | |
d9a64523 | 5626 | static boolean_t |
cb323159 A |
5627 | memorystatus_kill_top_process(boolean_t any, boolean_t sort_flag, uint32_t cause, os_reason_t jetsam_reason, |
5628 | int32_t *priority, uint32_t *errors, uint64_t *memory_reclaimed) | |
d9a64523 | 5629 | { |
cb323159 A |
5630 | pid_t aPid; |
5631 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
5632 | boolean_t new_snapshot = FALSE, force_new_snapshot = FALSE, killed = FALSE, freed_mem = FALSE; | |
5633 | unsigned int i = 0; | |
5634 | uint32_t aPid_ep; | |
5635 | int32_t local_max_kill_prio = JETSAM_PRIORITY_IDLE; | |
5636 | uint64_t footprint_of_killed_proc = 0; | |
d9a64523 | 5637 | |
cb323159 A |
5638 | #ifndef CONFIG_FREEZE |
5639 | #pragma unused(any) | |
5640 | #endif | |
d9a64523 | 5641 | |
cb323159 | 5642 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, |
f427ee49 | 5643 | MEMORYSTATUS_LOG_AVAILABLE_PAGES, 0, 0, 0, 0); |
d9a64523 | 5644 | |
d9a64523 | 5645 | |
cb323159 A |
5646 | #if CONFIG_JETSAM |
5647 | if (sort_flag == TRUE) { | |
5648 | (void)memorystatus_sort_bucket(JETSAM_PRIORITY_FOREGROUND, JETSAM_SORT_DEFAULT); | |
d9a64523 A |
5649 | } |
5650 | ||
cb323159 | 5651 | local_max_kill_prio = max_kill_priority; |
d9a64523 | 5652 | |
cb323159 | 5653 | force_new_snapshot = FALSE; |
d9a64523 | 5654 | |
cb323159 | 5655 | #else /* CONFIG_JETSAM */ |
d190cdc3 | 5656 | |
cb323159 A |
5657 | if (sort_flag == TRUE) { |
5658 | (void)memorystatus_sort_bucket(JETSAM_PRIORITY_IDLE, JETSAM_SORT_DEFAULT); | |
d190cdc3 A |
5659 | } |
5660 | ||
cb323159 A |
5661 | /* |
5662 | * On macos, we currently only have 2 reasons to be here: | |
5663 | * | |
5664 | * kMemorystatusKilledZoneMapExhaustion | |
5665 | * AND | |
5666 | * kMemorystatusKilledVMCompressorSpaceShortage | |
5667 | * | |
5668 | * If we are here because of kMemorystatusKilledZoneMapExhaustion, we will consider | |
5669 | * any and all processes as eligible kill candidates since we need to avoid a panic. | |
5670 | * | |
5671 | * Since this function can be called async. it is harder to toggle the max_kill_priority | |
5672 | * value before and after a call. And so we use this local variable to set the upper band | |
5673 | * on the eligible kill bands. | |
5674 | */ | |
5675 | if (cause == kMemorystatusKilledZoneMapExhaustion) { | |
5676 | local_max_kill_prio = JETSAM_PRIORITY_MAX; | |
5677 | } else { | |
5678 | local_max_kill_prio = max_kill_priority; | |
d190cdc3 A |
5679 | } |
5680 | ||
cb323159 A |
5681 | /* |
5682 | * And, because we are here under extreme circumstances, we force a snapshot even for | |
5683 | * IDLE kills. | |
5684 | */ | |
5685 | force_new_snapshot = TRUE; | |
fe8ab488 | 5686 | |
cb323159 | 5687 | #endif /* CONFIG_JETSAM */ |
fe8ab488 | 5688 | |
cb323159 A |
5689 | if (cause != kMemorystatusKilledZoneMapExhaustion && |
5690 | jetsam_current_thread() != NULL && | |
5691 | jetsam_current_thread()->limit_to_low_bands && | |
5692 | local_max_kill_prio > JETSAM_PRIORITY_BACKGROUND) { | |
5693 | local_max_kill_prio = JETSAM_PRIORITY_BACKGROUND; | |
316670eb | 5694 | } |
0a7de745 | 5695 | |
cb323159 | 5696 | proc_list_lock(); |
316670eb | 5697 | |
cb323159 A |
5698 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
5699 | while (next_p && (next_p->p_memstat_effectivepriority <= local_max_kill_prio)) { | |
5700 | p = next_p; | |
5701 | next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
316670eb | 5702 | |
39236c6e | 5703 | |
cb323159 A |
5704 | aPid = p->p_pid; |
5705 | aPid_ep = p->p_memstat_effectivepriority; | |
3e170ce0 | 5706 | |
cb323159 A |
5707 | if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { |
5708 | continue; /* with lock held */ | |
5709 | } | |
3e170ce0 | 5710 | |
cb323159 | 5711 | if (cause == kMemorystatusKilledVnodes) { |
3e170ce0 | 5712 | /* |
cb323159 A |
5713 | * If the system runs out of vnodes, we systematically jetsam |
5714 | * processes in hopes of stumbling onto a vnode gain that helps | |
5715 | * the system recover. The process that happens to trigger | |
5716 | * this path has no known relationship to the vnode shortage. | |
5717 | * Deadlock avoidance: attempt to safeguard the caller. | |
3e170ce0 | 5718 | */ |
39037602 | 5719 | |
cb323159 A |
5720 | if (p == current_proc()) { |
5721 | /* do not jetsam the current process */ | |
5722 | continue; | |
5723 | } | |
5724 | } | |
5725 | ||
5726 | #if CONFIG_FREEZE | |
5727 | boolean_t skip; | |
5728 | boolean_t reclaim_proc = !(p->p_memstat_state & P_MEMSTAT_LOCKED); | |
5729 | if (any || reclaim_proc) { | |
5730 | skip = FALSE; | |
5731 | } else { | |
5732 | skip = TRUE; | |
5733 | } | |
5734 | ||
5735 | if (skip) { | |
5736 | continue; | |
5737 | } else | |
5738 | #endif | |
5739 | { | |
5740 | if (proc_ref_locked(p) == p) { | |
5ba3f43e | 5741 | /* |
cb323159 A |
5742 | * Mark as terminated so that if exit1() indicates success, but the process (for example) |
5743 | * is blocked in task_exception_notify(), it'll be skipped if encountered again - see | |
5744 | * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the | |
5745 | * acquisition of the proc lock. | |
5ba3f43e | 5746 | */ |
cb323159 | 5747 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; |
5ba3f43e A |
5748 | } else { |
5749 | /* | |
cb323159 A |
5750 | * We need to restart the search again because |
5751 | * proc_ref_locked _can_ drop the proc_list lock | |
5752 | * and we could have lost our stored next_p via | |
5753 | * an exit() on another core. | |
5ba3f43e | 5754 | */ |
cb323159 A |
5755 | i = 0; |
5756 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
5757 | continue; | |
5ba3f43e | 5758 | } |
39037602 | 5759 | |
cb323159 A |
5760 | /* |
5761 | * Capture a snapshot if none exists and: | |
5762 | * - we are forcing a new snapshot creation, either because: | |
5763 | * - on a particular platform we need these snapshots every time, OR | |
5764 | * - a boot-arg/embedded device tree property has been set. | |
5765 | * - priority was not requested (this is something other than an ambient kill) | |
5766 | * - the priority was requested *and* the targeted process is not at idle priority | |
5767 | */ | |
5768 | if ((memorystatus_jetsam_snapshot_count == 0) && | |
5769 | (force_new_snapshot || memorystatus_idle_snapshot || ((!priority) || (priority && (aPid_ep != JETSAM_PRIORITY_IDLE))))) { | |
5770 | memorystatus_init_jetsam_snapshot_locked(NULL, 0); | |
5771 | new_snapshot = TRUE; | |
5772 | } | |
813fb2f6 | 5773 | |
cb323159 | 5774 | proc_list_unlock(); |
813fb2f6 | 5775 | |
cb323159 A |
5776 | freed_mem = memorystatus_kill_proc(p, cause, jetsam_reason, &killed, &footprint_of_killed_proc); /* purged and/or killed 'p' */ |
5777 | /* Success? */ | |
5778 | if (freed_mem) { | |
5779 | if (killed) { | |
5780 | *memory_reclaimed = footprint_of_killed_proc; | |
5781 | if (priority) { | |
5782 | *priority = aPid_ep; | |
813fb2f6 | 5783 | } |
cb323159 A |
5784 | } else { |
5785 | /* purged */ | |
5786 | proc_list_lock(); | |
5787 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
5788 | proc_list_unlock(); | |
3e170ce0 | 5789 | } |
cb323159 A |
5790 | proc_rele(p); |
5791 | goto exit; | |
3e170ce0 | 5792 | } |
3e170ce0 | 5793 | |
cb323159 A |
5794 | /* |
5795 | * Failure - first unwind the state, | |
5796 | * then fall through to restart the search. | |
5797 | */ | |
5798 | proc_list_lock(); | |
5799 | proc_rele_locked(p); | |
5800 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
5801 | p->p_memstat_state |= P_MEMSTAT_ERROR; | |
5802 | *errors += 1; | |
5803 | ||
5804 | i = 0; | |
5805 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
813fb2f6 | 5806 | } |
6d2010ae | 5807 | } |
3e170ce0 | 5808 | |
cb323159 | 5809 | proc_list_unlock(); |
3e170ce0 | 5810 | |
cb323159 A |
5811 | exit: |
5812 | os_reason_free(jetsam_reason); | |
3e170ce0 | 5813 | |
cb323159 A |
5814 | if (!killed) { |
5815 | *memory_reclaimed = 0; | |
6d2010ae | 5816 | |
cb323159 A |
5817 | /* Clear snapshot if freshly captured and no target was found */ |
5818 | if (new_snapshot) { | |
5819 | proc_list_lock(); | |
5820 | memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
5821 | proc_list_unlock(); | |
fe8ab488 A |
5822 | } |
5823 | } | |
3e170ce0 | 5824 | |
cb323159 | 5825 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, |
f427ee49 | 5826 | MEMORYSTATUS_LOG_AVAILABLE_PAGES, killed ? aPid : 0, killed, *memory_reclaimed, 0); |
cb323159 A |
5827 | |
5828 | return killed; | |
fe8ab488 A |
5829 | } |
5830 | ||
cb323159 A |
5831 | /* |
5832 | * Jetsam aggressively | |
5833 | */ | |
5834 | static boolean_t | |
5835 | memorystatus_kill_processes_aggressive(uint32_t cause, int aggr_count, | |
5836 | int32_t priority_max, uint32_t *errors, uint64_t *memory_reclaimed) | |
0a7de745 | 5837 | { |
cb323159 A |
5838 | pid_t aPid; |
5839 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
5840 | boolean_t new_snapshot = FALSE, killed = FALSE; | |
5841 | int kill_count = 0; | |
5842 | unsigned int i = 0; | |
5843 | int32_t aPid_ep = 0; | |
5844 | unsigned int memorystatus_level_snapshot = 0; | |
5845 | uint64_t killtime = 0; | |
5846 | clock_sec_t tv_sec; | |
5847 | clock_usec_t tv_usec; | |
5848 | uint32_t tv_msec; | |
5849 | os_reason_t jetsam_reason = OS_REASON_NULL; | |
5850 | uint64_t footprint_of_killed_proc = 0; | |
0a7de745 | 5851 | |
cb323159 | 5852 | *memory_reclaimed = 0; |
0a7de745 | 5853 | |
cb323159 | 5854 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, |
f427ee49 | 5855 | MEMORYSTATUS_LOG_AVAILABLE_PAGES, priority_max, 0, 0, 0); |
d9a64523 | 5856 | |
cb323159 | 5857 | if (priority_max >= JETSAM_PRIORITY_FOREGROUND) { |
d9a64523 | 5858 | /* |
cb323159 A |
5859 | * Check if aggressive jetsam has been asked to kill upto or beyond the |
5860 | * JETSAM_PRIORITY_FOREGROUND bucket. If yes, sort the FG band based on | |
5861 | * coalition footprint. | |
d9a64523 | 5862 | */ |
cb323159 | 5863 | memorystatus_sort_bucket(JETSAM_PRIORITY_FOREGROUND, JETSAM_SORT_DEFAULT); |
d9a64523 A |
5864 | } |
5865 | ||
cb323159 A |
5866 | jetsam_reason = os_reason_create(OS_REASON_JETSAM, cause); |
5867 | if (jetsam_reason == OS_REASON_NULL) { | |
5868 | printf("memorystatus_kill_processes_aggressive: failed to allocate exit reason\n"); | |
39037602 | 5869 | } |
39236c6e | 5870 | |
cb323159 | 5871 | proc_list_lock(); |
39236c6e | 5872 | |
cb323159 A |
5873 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
5874 | while (next_p) { | |
5875 | if (((next_p->p_listflag & P_LIST_EXITED) != 0) || | |
5876 | ((unsigned int)(next_p->p_memstat_effectivepriority) != i)) { | |
5877 | /* | |
5878 | * We have raced with next_p running on another core. | |
5879 | * It may be exiting or it may have moved to a different | |
5880 | * jetsam priority band. This means we have lost our | |
5881 | * place in line while traversing the jetsam list. We | |
5882 | * attempt to recover by rewinding to the beginning of the band | |
5883 | * we were already traversing. By doing this, we do not guarantee | |
5884 | * that no process escapes this aggressive march, but we can make | |
5885 | * skipping an entire range of processes less likely. (PR-21069019) | |
5886 | */ | |
39236c6e | 5887 | |
cb323159 A |
5888 | MEMORYSTATUS_DEBUG(1, "memorystatus: aggressive%d: rewinding band %d, %s(%d) moved or exiting.\n", |
5889 | aggr_count, i, (*next_p->p_name ? next_p->p_name : "unknown"), next_p->p_pid); | |
39236c6e | 5890 | |
cb323159 A |
5891 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
5892 | continue; | |
39037602 A |
5893 | } |
5894 | ||
cb323159 A |
5895 | p = next_p; |
5896 | next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
39037602 | 5897 | |
cb323159 A |
5898 | if (p->p_memstat_effectivepriority > priority_max) { |
5899 | /* | |
5900 | * Bail out of this killing spree if we have | |
5901 | * reached beyond the priority_max jetsam band. | |
5902 | * That is, we kill up to and through the | |
5903 | * priority_max jetsam band. | |
5904 | */ | |
39037602 | 5905 | proc_list_unlock(); |
cb323159 | 5906 | goto exit; |
39037602 | 5907 | } |
39037602 | 5908 | |
cb323159 A |
5909 | aPid = p->p_pid; |
5910 | aPid_ep = p->p_memstat_effectivepriority; | |
39037602 | 5911 | |
cb323159 A |
5912 | if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { |
5913 | continue; | |
5914 | } | |
39037602 | 5915 | |
39037602 | 5916 | /* |
cb323159 | 5917 | * Capture a snapshot if none exists. |
39037602 | 5918 | */ |
cb323159 A |
5919 | if (memorystatus_jetsam_snapshot_count == 0) { |
5920 | memorystatus_init_jetsam_snapshot_locked(NULL, 0); | |
5921 | new_snapshot = TRUE; | |
39037602 A |
5922 | } |
5923 | ||
39037602 | 5924 | /* |
cb323159 A |
5925 | * Mark as terminated so that if exit1() indicates success, but the process (for example) |
5926 | * is blocked in task_exception_notify(), it'll be skipped if encountered again - see | |
5927 | * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the | |
5928 | * acquisition of the proc lock. | |
39037602 | 5929 | */ |
cb323159 | 5930 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; |
39037602 | 5931 | |
cb323159 A |
5932 | killtime = mach_absolute_time(); |
5933 | absolutetime_to_microtime(killtime, &tv_sec, &tv_usec); | |
5934 | tv_msec = tv_usec / 1000; | |
39037602 | 5935 | |
cb323159 A |
5936 | /* Shift queue, update stats */ |
5937 | memorystatus_update_jetsam_snapshot_entry_locked(p, cause, killtime); | |
39037602 | 5938 | |
cb323159 A |
5939 | /* |
5940 | * In order to kill the target process, we will drop the proc_list_lock. | |
5941 | * To guaranteee that p and next_p don't disappear out from under the lock, | |
5942 | * we must take a ref on both. | |
5943 | * If we cannot get a reference, then it's likely we've raced with | |
5944 | * that process exiting on another core. | |
5945 | */ | |
5946 | if (proc_ref_locked(p) == p) { | |
5947 | if (next_p) { | |
5948 | while (next_p && (proc_ref_locked(next_p) != next_p)) { | |
5949 | proc_t temp_p; | |
39037602 | 5950 | |
cb323159 A |
5951 | /* |
5952 | * We must have raced with next_p exiting on another core. | |
5953 | * Recover by getting the next eligible process in the band. | |
5954 | */ | |
39037602 | 5955 | |
cb323159 A |
5956 | MEMORYSTATUS_DEBUG(1, "memorystatus: aggressive%d: skipping %d [%s] (exiting?)\n", |
5957 | aggr_count, next_p->p_pid, (*next_p->p_name ? next_p->p_name : "(unknown)")); | |
39037602 | 5958 | |
cb323159 A |
5959 | temp_p = next_p; |
5960 | next_p = memorystatus_get_next_proc_locked(&i, temp_p, TRUE); | |
5961 | } | |
5962 | } | |
5963 | proc_list_unlock(); | |
39037602 | 5964 | |
cb323159 A |
5965 | printf("%lu.%03d memorystatus: %s%d pid %d [%s] (%s %d) - memorystatus_available_pages: %llu\n", |
5966 | (unsigned long)tv_sec, tv_msec, | |
5967 | ((aPid_ep == JETSAM_PRIORITY_IDLE) ? "killing_idle_process_aggressive" : "killing_top_process_aggressive"), | |
5968 | aggr_count, aPid, (*p->p_name ? p->p_name : "unknown"), | |
f427ee49 | 5969 | memorystatus_kill_cause_name[cause], aPid_ep, (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES); |
39037602 | 5970 | |
cb323159 | 5971 | memorystatus_level_snapshot = memorystatus_level; |
39037602 | 5972 | |
cb323159 A |
5973 | /* |
5974 | * memorystatus_do_kill() drops a reference, so take another one so we can | |
5975 | * continue to use this exit reason even after memorystatus_do_kill() | |
5976 | * returns. | |
5977 | */ | |
5978 | os_reason_ref(jetsam_reason); | |
5979 | killed = memorystatus_do_kill(p, cause, jetsam_reason, &footprint_of_killed_proc); | |
39037602 | 5980 | |
cb323159 A |
5981 | /* Success? */ |
5982 | if (killed) { | |
5983 | *memory_reclaimed += footprint_of_killed_proc; | |
5984 | proc_rele(p); | |
5985 | kill_count++; | |
5986 | p = NULL; | |
5987 | killed = FALSE; | |
39037602 | 5988 | |
39037602 | 5989 | /* |
cb323159 | 5990 | * Continue the killing spree. |
39037602 | 5991 | */ |
cb323159 A |
5992 | proc_list_lock(); |
5993 | if (next_p) { | |
5994 | proc_rele_locked(next_p); | |
5995 | } | |
5996 | ||
5997 | if (aPid_ep == JETSAM_PRIORITY_FOREGROUND && memorystatus_aggressive_jetsam_lenient == TRUE) { | |
5998 | if (memorystatus_level > memorystatus_level_snapshot && ((memorystatus_level - memorystatus_level_snapshot) >= AGGRESSIVE_JETSAM_LENIENT_MODE_THRESHOLD)) { | |
5999 | #if DEVELOPMENT || DEBUG | |
6000 | printf("Disabling Lenient mode after one-time deployment.\n"); | |
6001 | #endif /* DEVELOPMENT || DEBUG */ | |
6002 | memorystatus_aggressive_jetsam_lenient = FALSE; | |
6003 | break; | |
39037602 A |
6004 | } |
6005 | } | |
cb323159 | 6006 | |
39037602 A |
6007 | continue; |
6008 | } | |
cb323159 | 6009 | |
39037602 | 6010 | /* |
cb323159 A |
6011 | * Failure - first unwind the state, |
6012 | * then fall through to restart the search. | |
39037602 | 6013 | */ |
cb323159 A |
6014 | proc_list_lock(); |
6015 | proc_rele_locked(p); | |
6016 | if (next_p) { | |
6017 | proc_rele_locked(next_p); | |
39037602 | 6018 | } |
cb323159 A |
6019 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; |
6020 | p->p_memstat_state |= P_MEMSTAT_ERROR; | |
6021 | *errors += 1; | |
6022 | p = NULL; | |
39037602 A |
6023 | } |
6024 | ||
6025 | /* | |
cb323159 A |
6026 | * Failure - restart the search at the beginning of |
6027 | * the band we were already traversing. | |
6028 | * | |
6029 | * We might have raced with "p" exiting on another core, resulting in no | |
6030 | * ref on "p". Or, we may have failed to kill "p". | |
6031 | * | |
6032 | * Either way, we fall thru to here, leaving the proc in the | |
6033 | * P_MEMSTAT_TERMINATED or P_MEMSTAT_ERROR state. | |
6034 | * | |
6035 | * And, we hold the the proc_list_lock at this point. | |
0a7de745 | 6036 | */ |
39037602 | 6037 | |
cb323159 A |
6038 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
6039 | } | |
39236c6e | 6040 | |
cb323159 | 6041 | proc_list_unlock(); |
39236c6e | 6042 | |
cb323159 A |
6043 | exit: |
6044 | os_reason_free(jetsam_reason); | |
39037602 | 6045 | |
cb323159 A |
6046 | /* Clear snapshot if freshly captured and no target was found */ |
6047 | if (new_snapshot && (kill_count == 0)) { | |
6048 | proc_list_lock(); | |
6049 | memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
6050 | proc_list_unlock(); | |
0a7de745 | 6051 | } |
39037602 | 6052 | |
cb323159 | 6053 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, |
f427ee49 | 6054 | MEMORYSTATUS_LOG_AVAILABLE_PAGES, 0, kill_count, *memory_reclaimed, 0); |
39236c6e | 6055 | |
cb323159 A |
6056 | if (kill_count > 0) { |
6057 | return TRUE; | |
6058 | } else { | |
6059 | return FALSE; | |
6060 | } | |
39236c6e A |
6061 | } |
6062 | ||
cb323159 A |
6063 | static boolean_t |
6064 | memorystatus_kill_hiwat_proc(uint32_t *errors, boolean_t *purged, uint64_t *memory_reclaimed) | |
6065 | { | |
6066 | pid_t aPid = 0; | |
6067 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
6068 | boolean_t new_snapshot = FALSE, killed = FALSE, freed_mem = FALSE; | |
6069 | unsigned int i = 0; | |
6070 | uint32_t aPid_ep; | |
6071 | os_reason_t jetsam_reason = OS_REASON_NULL; | |
6072 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START, | |
f427ee49 | 6073 | MEMORYSTATUS_LOG_AVAILABLE_PAGES, 0, 0, 0, 0); |
39037602 | 6074 | |
cb323159 A |
6075 | jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_HIGHWATER); |
6076 | if (jetsam_reason == OS_REASON_NULL) { | |
6077 | printf("memorystatus_kill_hiwat_proc: failed to allocate exit reason\n"); | |
6078 | } | |
39236c6e | 6079 | |
cb323159 | 6080 | proc_list_lock(); |
0a7de745 | 6081 | |
cb323159 A |
6082 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
6083 | while (next_p) { | |
6084 | uint64_t footprint_in_bytes = 0; | |
6085 | uint64_t memlimit_in_bytes = 0; | |
6086 | boolean_t skip = 0; | |
fe8ab488 | 6087 | |
cb323159 A |
6088 | p = next_p; |
6089 | next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
fe8ab488 | 6090 | |
cb323159 A |
6091 | aPid = p->p_pid; |
6092 | aPid_ep = p->p_memstat_effectivepriority; | |
fe8ab488 | 6093 | |
cb323159 A |
6094 | if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { |
6095 | continue; | |
fe8ab488 | 6096 | } |
3e170ce0 | 6097 | |
cb323159 A |
6098 | /* skip if no limit set */ |
6099 | if (p->p_memstat_memlimit <= 0) { | |
6100 | continue; | |
3e170ce0 | 6101 | } |
fe8ab488 | 6102 | |
cb323159 A |
6103 | footprint_in_bytes = get_task_phys_footprint(p->task); |
6104 | memlimit_in_bytes = (((uint64_t)p->p_memstat_memlimit) * 1024ULL * 1024ULL); /* convert MB to bytes */ | |
6105 | skip = (footprint_in_bytes <= memlimit_in_bytes); | |
6106 | ||
6107 | #if CONFIG_FREEZE | |
6108 | if (!skip) { | |
6109 | if (p->p_memstat_state & P_MEMSTAT_LOCKED) { | |
6110 | skip = TRUE; | |
6111 | } else { | |
6112 | skip = FALSE; | |
6113 | } | |
6114 | } | |
6115 | #endif | |
6116 | ||
6117 | if (skip) { | |
6118 | continue; | |
6119 | } else { | |
6120 | if (memorystatus_jetsam_snapshot_count == 0) { | |
6121 | memorystatus_init_jetsam_snapshot_locked(NULL, 0); | |
6122 | new_snapshot = TRUE; | |
6123 | } | |
6124 | ||
6125 | if (proc_ref_locked(p) == p) { | |
6126 | /* | |
6127 | * Mark as terminated so that if exit1() indicates success, but the process (for example) | |
6128 | * is blocked in task_exception_notify(), it'll be skipped if encountered again - see | |
6129 | * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the | |
6130 | * acquisition of the proc lock. | |
6131 | */ | |
6132 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; | |
6133 | ||
6134 | proc_list_unlock(); | |
6135 | } else { | |
6136 | /* | |
6137 | * We need to restart the search again because | |
6138 | * proc_ref_locked _can_ drop the proc_list lock | |
6139 | * and we could have lost our stored next_p via | |
6140 | * an exit() on another core. | |
6141 | */ | |
6142 | i = 0; | |
6143 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
6144 | continue; | |
6145 | } | |
39037602 | 6146 | |
cb323159 A |
6147 | footprint_in_bytes = 0; |
6148 | freed_mem = memorystatus_kill_proc(p, kMemorystatusKilledHiwat, jetsam_reason, &killed, &footprint_in_bytes); /* purged and/or killed 'p' */ | |
6149 | ||
6150 | /* Success? */ | |
6151 | if (freed_mem) { | |
6152 | if (killed == FALSE) { | |
6153 | /* purged 'p'..don't reset HWM candidate count */ | |
6154 | *purged = TRUE; | |
39037602 | 6155 | |
cb323159 A |
6156 | proc_list_lock(); |
6157 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
6158 | proc_list_unlock(); | |
6159 | } else { | |
6160 | *memory_reclaimed = footprint_in_bytes; | |
5ba3f43e | 6161 | } |
cb323159 A |
6162 | proc_rele(p); |
6163 | goto exit; | |
6164 | } | |
6165 | /* | |
6166 | * Failure - first unwind the state, | |
6167 | * then fall through to restart the search. | |
6168 | */ | |
6169 | proc_list_lock(); | |
6170 | proc_rele_locked(p); | |
6171 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
6172 | p->p_memstat_state |= P_MEMSTAT_ERROR; | |
6173 | *errors += 1; | |
5ba3f43e | 6174 | |
cb323159 A |
6175 | i = 0; |
6176 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
39037602 A |
6177 | } |
6178 | } | |
6179 | ||
cb323159 | 6180 | proc_list_unlock(); |
39236c6e | 6181 | |
cb323159 A |
6182 | exit: |
6183 | os_reason_free(jetsam_reason); | |
fe8ab488 | 6184 | |
cb323159 A |
6185 | if (!killed) { |
6186 | *memory_reclaimed = 0; | |
fe8ab488 | 6187 | |
cb323159 A |
6188 | /* Clear snapshot if freshly captured and no target was found */ |
6189 | if (new_snapshot) { | |
6190 | proc_list_lock(); | |
6191 | memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
6192 | proc_list_unlock(); | |
fe8ab488 | 6193 | } |
cb323159 A |
6194 | } |
6195 | ||
6196 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END, | |
f427ee49 | 6197 | MEMORYSTATUS_LOG_AVAILABLE_PAGES, killed ? aPid : 0, killed, *memory_reclaimed, 0); |
fe8ab488 | 6198 | |
cb323159 A |
6199 | return killed; |
6200 | } | |
fe8ab488 | 6201 | |
cb323159 A |
6202 | /* |
6203 | * Jetsam a process pinned in the elevated band. | |
6204 | * | |
6205 | * Return: true -- a pinned process was jetsammed | |
6206 | * false -- no pinned process was jetsammed | |
6207 | */ | |
6208 | boolean_t | |
6209 | memorystatus_kill_elevated_process(uint32_t cause, os_reason_t jetsam_reason, unsigned int band, int aggr_count, uint32_t *errors, uint64_t *memory_reclaimed) | |
6210 | { | |
6211 | pid_t aPid = 0; | |
6212 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
6213 | boolean_t new_snapshot = FALSE, killed = FALSE; | |
6214 | int kill_count = 0; | |
6215 | uint32_t aPid_ep; | |
6216 | uint64_t killtime = 0; | |
6217 | clock_sec_t tv_sec; | |
6218 | clock_usec_t tv_usec; | |
6219 | uint32_t tv_msec; | |
6220 | uint64_t footprint_of_killed_proc = 0; | |
39236c6e | 6221 | |
39236c6e | 6222 | |
cb323159 | 6223 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, |
f427ee49 | 6224 | MEMORYSTATUS_LOG_AVAILABLE_PAGES, 0, 0, 0, 0); |
39236c6e | 6225 | |
cb323159 A |
6226 | #if CONFIG_FREEZE |
6227 | boolean_t consider_frozen_only = FALSE; | |
39037602 | 6228 | |
cb323159 A |
6229 | if (band == (unsigned int) memorystatus_freeze_jetsam_band) { |
6230 | consider_frozen_only = TRUE; | |
6231 | } | |
6232 | #endif /* CONFIG_FREEZE */ | |
39037602 | 6233 | |
cb323159 | 6234 | proc_list_lock(); |
39037602 | 6235 | |
cb323159 A |
6236 | next_p = memorystatus_get_first_proc_locked(&band, FALSE); |
6237 | while (next_p) { | |
6238 | p = next_p; | |
6239 | next_p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
0a7de745 | 6240 | |
cb323159 A |
6241 | aPid = p->p_pid; |
6242 | aPid_ep = p->p_memstat_effectivepriority; | |
0a7de745 | 6243 | |
cb323159 A |
6244 | /* |
6245 | * Only pick a process pinned in this elevated band | |
6246 | */ | |
6247 | if (!(p->p_memstat_state & P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND)) { | |
39236c6e A |
6248 | continue; |
6249 | } | |
39236c6e | 6250 | |
cb323159 A |
6251 | if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { |
6252 | continue; | |
6d2010ae | 6253 | } |
39236c6e | 6254 | |
cb323159 A |
6255 | #if CONFIG_FREEZE |
6256 | if (consider_frozen_only && !(p->p_memstat_state & P_MEMSTAT_FROZEN)) { | |
39236c6e A |
6257 | continue; |
6258 | } | |
6259 | ||
cb323159 A |
6260 | if (p->p_memstat_state & P_MEMSTAT_LOCKED) { |
6261 | continue; | |
3e170ce0 | 6262 | } |
cb323159 | 6263 | #endif /* CONFIG_FREEZE */ |
39236c6e | 6264 | |
cb323159 A |
6265 | #if DEVELOPMENT || DEBUG |
6266 | MEMORYSTATUS_DEBUG(1, "jetsam: elevated%d process pid %d [%s] - memorystatus_available_pages: %d\n", | |
6267 | aggr_count, | |
6268 | aPid, (*p->p_name ? p->p_name : "unknown"), | |
f427ee49 | 6269 | MEMORYSTATUS_LOG_AVAILABLE_PAGES); |
cb323159 | 6270 | #endif /* DEVELOPMENT || DEBUG */ |
39236c6e | 6271 | |
cb323159 A |
6272 | if (memorystatus_jetsam_snapshot_count == 0) { |
6273 | memorystatus_init_jetsam_snapshot_locked(NULL, 0); | |
6274 | new_snapshot = TRUE; | |
3e170ce0 A |
6275 | } |
6276 | ||
cb323159 A |
6277 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; |
6278 | ||
6279 | killtime = mach_absolute_time(); | |
6280 | absolutetime_to_microtime(killtime, &tv_sec, &tv_usec); | |
6281 | tv_msec = tv_usec / 1000; | |
39236c6e | 6282 | |
cb323159 | 6283 | memorystatus_update_jetsam_snapshot_entry_locked(p, cause, killtime); |
39236c6e | 6284 | |
cb323159 A |
6285 | if (proc_ref_locked(p) == p) { |
6286 | proc_list_unlock(); | |
39236c6e | 6287 | |
fe8ab488 | 6288 | /* |
cb323159 A |
6289 | * memorystatus_do_kill drops a reference, so take another one so we can |
6290 | * continue to use this exit reason even after memorystatus_do_kill() | |
6291 | * returns | |
fe8ab488 | 6292 | */ |
cb323159 A |
6293 | os_reason_ref(jetsam_reason); |
6294 | killed = memorystatus_do_kill(p, cause, jetsam_reason, &footprint_of_killed_proc); | |
0a7de745 | 6295 | |
cb323159 A |
6296 | os_log_with_startup_serial(OS_LOG_DEFAULT, "%lu.%03d memorystatus: killing_top_process_elevated%d pid %d [%s] (%s %d) %lluKB - memorystatus_available_pages: %llu\n", |
6297 | (unsigned long)tv_sec, tv_msec, | |
6298 | aggr_count, | |
6299 | aPid, ((p && *p->p_name) ? p->p_name : "unknown"), | |
6300 | memorystatus_kill_cause_name[cause], aPid_ep, | |
f427ee49 | 6301 | footprint_of_killed_proc >> 10, (uint64_t)MEMORYSTATUS_LOG_AVAILABLE_PAGES); |
fe8ab488 | 6302 | |
cb323159 A |
6303 | /* Success? */ |
6304 | if (killed) { | |
6305 | *memory_reclaimed = footprint_of_killed_proc; | |
6306 | proc_rele(p); | |
6307 | kill_count++; | |
6308 | goto exit; | |
fe8ab488 | 6309 | } |
0a7de745 | 6310 | |
cb323159 A |
6311 | /* |
6312 | * Failure - first unwind the state, | |
6313 | * then fall through to restart the search. | |
6314 | */ | |
6315 | proc_list_lock(); | |
6316 | proc_rele_locked(p); | |
6317 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
6318 | p->p_memstat_state |= P_MEMSTAT_ERROR; | |
6319 | *errors += 1; | |
39236c6e | 6320 | } |
6d2010ae | 6321 | |
cb323159 A |
6322 | /* |
6323 | * Failure - restart the search. | |
6324 | * | |
6325 | * We might have raced with "p" exiting on another core, resulting in no | |
6326 | * ref on "p". Or, we may have failed to kill "p". | |
6327 | * | |
6328 | * Either way, we fall thru to here, leaving the proc in the | |
6329 | * P_MEMSTAT_TERMINATED state or P_MEMSTAT_ERROR state. | |
6330 | * | |
6331 | * And, we hold the the proc_list_lock at this point. | |
6332 | */ | |
39236c6e | 6333 | |
cb323159 | 6334 | next_p = memorystatus_get_first_proc_locked(&band, FALSE); |
0a7de745 | 6335 | } |
39236c6e | 6336 | |
cb323159 | 6337 | proc_list_unlock(); |
39236c6e | 6338 | |
cb323159 A |
6339 | exit: |
6340 | os_reason_free(jetsam_reason); | |
39236c6e | 6341 | |
cb323159 A |
6342 | if (kill_count == 0) { |
6343 | *memory_reclaimed = 0; | |
6344 | ||
6345 | /* Clear snapshot if freshly captured and no target was found */ | |
6346 | if (new_snapshot) { | |
6347 | proc_list_lock(); | |
6348 | memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
6349 | proc_list_unlock(); | |
6350 | } | |
39236c6e | 6351 | } |
316670eb | 6352 | |
cb323159 | 6353 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, |
f427ee49 | 6354 | MEMORYSTATUS_LOG_AVAILABLE_PAGES, killed ? aPid : 0, kill_count, *memory_reclaimed, 0); |
cb323159 A |
6355 | |
6356 | return killed; | |
39236c6e | 6357 | } |
6d2010ae | 6358 | |
cb323159 A |
6359 | static boolean_t |
6360 | memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause) | |
b0d623f7 | 6361 | { |
cb323159 A |
6362 | /* |
6363 | * TODO: allow a general async path | |
6364 | * | |
6365 | * NOTE: If a new async kill cause is added, make sure to update memorystatus_thread() to | |
6366 | * add the appropriate exit reason code mapping. | |
6367 | */ | |
6368 | if ((victim_pid != -1) || | |
6369 | (cause != kMemorystatusKilledVMPageShortage && | |
6370 | cause != kMemorystatusKilledVMCompressorThrashing && | |
6371 | cause != kMemorystatusKilledVMCompressorSpaceShortage && | |
6372 | cause != kMemorystatusKilledFCThrashing && | |
6373 | cause != kMemorystatusKilledZoneMapExhaustion)) { | |
6374 | return FALSE; | |
0a7de745 | 6375 | } |
5ba3f43e | 6376 | |
cb323159 A |
6377 | kill_under_pressure_cause = cause; |
6378 | memorystatus_thread_wake(); | |
6379 | return TRUE; | |
39236c6e A |
6380 | } |
6381 | ||
cb323159 A |
6382 | boolean_t |
6383 | memorystatus_kill_on_VM_compressor_space_shortage(boolean_t async) | |
39236c6e | 6384 | { |
cb323159 A |
6385 | if (async) { |
6386 | return memorystatus_kill_process_async(-1, kMemorystatusKilledVMCompressorSpaceShortage); | |
6387 | } else { | |
6388 | os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_VMCOMPRESSOR_SPACE_SHORTAGE); | |
6389 | if (jetsam_reason == OS_REASON_NULL) { | |
6390 | printf("memorystatus_kill_on_VM_compressor_space_shortage -- sync: failed to allocate jetsam reason\n"); | |
6391 | } | |
39236c6e | 6392 | |
cb323159 | 6393 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMCompressorSpaceShortage, jetsam_reason); |
39236c6e | 6394 | } |
cb323159 | 6395 | } |
39236c6e | 6396 | |
cb323159 A |
6397 | #if CONFIG_JETSAM |
6398 | boolean_t | |
6399 | memorystatus_kill_on_VM_compressor_thrashing(boolean_t async) | |
6400 | { | |
6401 | if (async) { | |
6402 | return memorystatus_kill_process_async(-1, kMemorystatusKilledVMCompressorThrashing); | |
6403 | } else { | |
6404 | os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_VMCOMPRESSOR_THRASHING); | |
6405 | if (jetsam_reason == OS_REASON_NULL) { | |
6406 | printf("memorystatus_kill_on_VM_compressor_thrashing -- sync: failed to allocate jetsam reason\n"); | |
6407 | } | |
39236c6e | 6408 | |
cb323159 | 6409 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMCompressorThrashing, jetsam_reason); |
b0d623f7 | 6410 | } |
cb323159 | 6411 | } |
b0d623f7 | 6412 | |
cb323159 A |
6413 | boolean_t |
6414 | memorystatus_kill_on_VM_page_shortage(boolean_t async) | |
6415 | { | |
6416 | if (async) { | |
6417 | return memorystatus_kill_process_async(-1, kMemorystatusKilledVMPageShortage); | |
6418 | } else { | |
6419 | os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_VMPAGESHORTAGE); | |
6420 | if (jetsam_reason == OS_REASON_NULL) { | |
6421 | printf("memorystatus_kill_on_VM_page_shortage -- sync: failed to allocate jetsam reason\n"); | |
0a7de745 | 6422 | } |
39236c6e | 6423 | |
cb323159 | 6424 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMPageShortage, jetsam_reason); |
39236c6e | 6425 | } |
cb323159 | 6426 | } |
39236c6e | 6427 | |
cb323159 A |
6428 | boolean_t |
6429 | memorystatus_kill_on_FC_thrashing(boolean_t async) | |
6430 | { | |
6431 | if (async) { | |
6432 | return memorystatus_kill_process_async(-1, kMemorystatusKilledFCThrashing); | |
6433 | } else { | |
6434 | os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_FCTHRASHING); | |
6435 | if (jetsam_reason == OS_REASON_NULL) { | |
6436 | printf("memorystatus_kill_on_FC_thrashing -- sync: failed to allocate jetsam reason\n"); | |
39236c6e | 6437 | } |
cb323159 A |
6438 | |
6439 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledFCThrashing, jetsam_reason); | |
39236c6e | 6440 | } |
cb323159 | 6441 | } |
0a7de745 | 6442 | |
cb323159 A |
6443 | boolean_t |
6444 | memorystatus_kill_on_vnode_limit(void) | |
6445 | { | |
6446 | os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_VNODE); | |
6447 | if (jetsam_reason == OS_REASON_NULL) { | |
6448 | printf("memorystatus_kill_on_vnode_limit: failed to allocate jetsam reason\n"); | |
39236c6e A |
6449 | } |
6450 | ||
cb323159 | 6451 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes, jetsam_reason); |
b0d623f7 A |
6452 | } |
6453 | ||
cb323159 | 6454 | #endif /* CONFIG_JETSAM */ |
39236c6e | 6455 | |
cb323159 A |
6456 | boolean_t |
6457 | memorystatus_kill_on_zone_map_exhaustion(pid_t pid) | |
6458 | { | |
6459 | boolean_t res = FALSE; | |
6460 | if (pid == -1) { | |
6461 | res = memorystatus_kill_process_async(-1, kMemorystatusKilledZoneMapExhaustion); | |
6462 | } else { | |
6463 | os_reason_t jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_ZONE_MAP_EXHAUSTION); | |
6464 | if (jetsam_reason == OS_REASON_NULL) { | |
6465 | printf("memorystatus_kill_on_zone_map_exhaustion: failed to allocate jetsam reason\n"); | |
6466 | } | |
39236c6e | 6467 | |
cb323159 A |
6468 | res = memorystatus_kill_process_sync(pid, kMemorystatusKilledZoneMapExhaustion, jetsam_reason); |
6469 | } | |
6470 | return res; | |
6471 | } | |
39236c6e | 6472 | |
cb323159 A |
6473 | void |
6474 | memorystatus_on_pageout_scan_end(void) | |
6475 | { | |
6476 | /* No-op */ | |
6477 | } | |
39236c6e A |
6478 | |
6479 | /* Return both allocated and actual size, since there's a race between allocation and list compilation */ | |
b0d623f7 | 6480 | static int |
0a7de745 | 6481 | memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t *buffer_size, size_t *list_size, boolean_t size_only) |
b0d623f7 | 6482 | { |
0a7de745 | 6483 | uint32_t list_count, i = 0; |
39236c6e A |
6484 | memorystatus_priority_entry_t *list_entry; |
6485 | proc_t p; | |
6486 | ||
0a7de745 | 6487 | list_count = memorystatus_list_count; |
39236c6e A |
6488 | *list_size = sizeof(memorystatus_priority_entry_t) * list_count; |
6489 | ||
6490 | /* Just a size check? */ | |
6491 | if (size_only) { | |
6492 | return 0; | |
6493 | } | |
0a7de745 | 6494 | |
39236c6e A |
6495 | /* Otherwise, validate the size of the buffer */ |
6496 | if (*buffer_size < *list_size) { | |
6497 | return EINVAL; | |
6498 | } | |
6499 | ||
f427ee49 | 6500 | *list_ptr = kheap_alloc(KHEAP_TEMP, *list_size, Z_WAITOK | Z_ZERO); |
a39ff7e2 | 6501 | if (!*list_ptr) { |
316670eb A |
6502 | return ENOMEM; |
6503 | } | |
6504 | ||
39236c6e A |
6505 | *buffer_size = *list_size; |
6506 | *list_size = 0; | |
6507 | ||
6508 | list_entry = *list_ptr; | |
6509 | ||
6510 | proc_list_lock(); | |
6511 | ||
6512 | p = memorystatus_get_first_proc_locked(&i, TRUE); | |
6513 | while (p && (*list_size < *buffer_size)) { | |
6514 | list_entry->pid = p->p_pid; | |
6515 | list_entry->priority = p->p_memstat_effectivepriority; | |
6516 | list_entry->user_data = p->p_memstat_userdata; | |
3e170ce0 | 6517 | |
3e170ce0 | 6518 | if (p->p_memstat_memlimit <= 0) { |
0a7de745 A |
6519 | task_get_phys_footprint_limit(p->task, &list_entry->limit); |
6520 | } else { | |
6521 | list_entry->limit = p->p_memstat_memlimit; | |
6522 | } | |
39037602 | 6523 | |
39236c6e A |
6524 | list_entry->state = memorystatus_build_state(p); |
6525 | list_entry++; | |
6526 | ||
6527 | *list_size += sizeof(memorystatus_priority_entry_t); | |
0a7de745 | 6528 | |
39236c6e | 6529 | p = memorystatus_get_next_proc_locked(&i, p, TRUE); |
316670eb | 6530 | } |
0a7de745 | 6531 | |
39236c6e | 6532 | proc_list_unlock(); |
0a7de745 | 6533 | |
39236c6e | 6534 | MEMORYSTATUS_DEBUG(1, "memorystatus_get_priority_list: returning %lu for size\n", (unsigned long)*list_size); |
0a7de745 | 6535 | |
39236c6e A |
6536 | return 0; |
6537 | } | |
b0d623f7 | 6538 | |
39236c6e | 6539 | static int |
0a7de745 A |
6540 | memorystatus_get_priority_pid(pid_t pid, user_addr_t buffer, size_t buffer_size) |
6541 | { | |
6542 | int error = 0; | |
6543 | memorystatus_priority_entry_t mp_entry; | |
cb323159 | 6544 | kern_return_t ret; |
5ba3f43e | 6545 | |
0a7de745 A |
6546 | /* Validate inputs */ |
6547 | if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_priority_entry_t))) { | |
6548 | return EINVAL; | |
6549 | } | |
5ba3f43e A |
6550 | |
6551 | proc_t p = proc_find(pid); | |
0a7de745 A |
6552 | if (!p) { |
6553 | return ESRCH; | |
6554 | } | |
5ba3f43e | 6555 | |
0a7de745 | 6556 | memset(&mp_entry, 0, sizeof(memorystatus_priority_entry_t)); |
5ba3f43e | 6557 | |
0a7de745 A |
6558 | mp_entry.pid = p->p_pid; |
6559 | mp_entry.priority = p->p_memstat_effectivepriority; | |
6560 | mp_entry.user_data = p->p_memstat_userdata; | |
6561 | if (p->p_memstat_memlimit <= 0) { | |
cb323159 A |
6562 | ret = task_get_phys_footprint_limit(p->task, &mp_entry.limit); |
6563 | if (ret != KERN_SUCCESS) { | |
6564 | proc_rele(p); | |
6565 | return EINVAL; | |
6566 | } | |
0a7de745 A |
6567 | } else { |
6568 | mp_entry.limit = p->p_memstat_memlimit; | |
6569 | } | |
6570 | mp_entry.state = memorystatus_build_state(p); | |
5ba3f43e | 6571 | |
0a7de745 | 6572 | proc_rele(p); |
5ba3f43e | 6573 | |
0a7de745 | 6574 | error = copyout(&mp_entry, buffer, buffer_size); |
5ba3f43e | 6575 | |
0a7de745 | 6576 | return error; |
5ba3f43e A |
6577 | } |
6578 | ||
6579 | static int | |
0a7de745 A |
6580 | memorystatus_cmd_get_priority_list(pid_t pid, user_addr_t buffer, size_t buffer_size, int32_t *retval) |
6581 | { | |
5ba3f43e | 6582 | int error = 0; |
39236c6e | 6583 | boolean_t size_only; |
39236c6e | 6584 | size_t list_size; |
5ba3f43e A |
6585 | |
6586 | /* | |
6587 | * When a non-zero pid is provided, the 'list' has only one entry. | |
6588 | */ | |
0a7de745 | 6589 | |
39236c6e | 6590 | size_only = ((buffer == USER_ADDR_NULL) ? TRUE: FALSE); |
39236c6e | 6591 | |
5ba3f43e A |
6592 | if (pid != 0) { |
6593 | list_size = sizeof(memorystatus_priority_entry_t) * 1; | |
6594 | if (!size_only) { | |
6595 | error = memorystatus_get_priority_pid(pid, buffer, buffer_size); | |
6596 | } | |
6597 | } else { | |
6598 | memorystatus_priority_entry_t *list = NULL; | |
6599 | error = memorystatus_get_priority_list(&list, &buffer_size, &list_size, size_only); | |
6600 | ||
6601 | if (error == 0) { | |
6602 | if (!size_only) { | |
6603 | error = copyout(list, buffer, list_size); | |
6604 | } | |
6605 | } | |
6606 | ||
6607 | if (list) { | |
f427ee49 | 6608 | kheap_free(KHEAP_TEMP, list, buffer_size); |
5ba3f43e | 6609 | } |
39236c6e | 6610 | } |
5ba3f43e | 6611 | |
39236c6e | 6612 | if (error == 0) { |
f427ee49 A |
6613 | assert(list_size <= INT32_MAX); |
6614 | *retval = (int32_t) list_size; | |
39236c6e | 6615 | } |
39236c6e | 6616 | |
0a7de745 | 6617 | return error; |
316670eb | 6618 | } |
b0d623f7 | 6619 | |
0a7de745 | 6620 | static void |
39236c6e A |
6621 | memorystatus_clear_errors(void) |
6622 | { | |
6623 | proc_t p; | |
6624 | unsigned int i = 0; | |
6625 | ||
6626 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_START, 0, 0, 0, 0, 0); | |
0a7de745 | 6627 | |
39236c6e | 6628 | proc_list_lock(); |
0a7de745 | 6629 | |
39236c6e A |
6630 | p = memorystatus_get_first_proc_locked(&i, TRUE); |
6631 | while (p) { | |
6632 | if (p->p_memstat_state & P_MEMSTAT_ERROR) { | |
6633 | p->p_memstat_state &= ~P_MEMSTAT_ERROR; | |
6634 | } | |
6635 | p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
6636 | } | |
0a7de745 | 6637 | |
39236c6e A |
6638 | proc_list_unlock(); |
6639 | ||
6640 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_END, 0, 0, 0, 0, 0); | |
6641 | } | |
b0d623f7 | 6642 | |
5ba3f43e | 6643 | #if CONFIG_JETSAM |
316670eb | 6644 | static void |
0a7de745 A |
6645 | memorystatus_update_levels_locked(boolean_t critical_only) |
6646 | { | |
39236c6e | 6647 | memorystatus_available_pages_critical = memorystatus_available_pages_critical_base; |
fe8ab488 A |
6648 | |
6649 | /* | |
6650 | * If there's an entry in the first bucket, we have idle processes. | |
6651 | */ | |
39037602 | 6652 | |
fe8ab488 A |
6653 | memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; |
6654 | if (first_bucket->count) { | |
6655 | memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset; | |
6656 | ||
0a7de745 A |
6657 | if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure) { |
6658 | /* | |
fe8ab488 A |
6659 | * The critical threshold must never exceed the pressure threshold |
6660 | */ | |
6661 | memorystatus_available_pages_critical = memorystatus_available_pages_pressure; | |
39236c6e A |
6662 | } |
6663 | } | |
fe8ab488 | 6664 | |
39037602 A |
6665 | if (memorystatus_jetsam_policy & kPolicyMoreFree) { |
6666 | memorystatus_available_pages_critical += memorystatus_policy_more_free_offset_pages; | |
6667 | } | |
6668 | ||
39236c6e A |
6669 | if (critical_only) { |
6670 | return; | |
6671 | } | |
0a7de745 | 6672 | |
316670eb | 6673 | #if VM_PRESSURE_EVENTS |
f427ee49 | 6674 | memorystatus_available_pages_pressure = (int32_t)(pressure_threshold_percentage * (atop_64(max_mem) / 100)); |
39236c6e A |
6675 | #endif |
6676 | } | |
6677 | ||
d9a64523 A |
6678 | void |
6679 | memorystatus_fast_jetsam_override(boolean_t enable_override) | |
6680 | { | |
6681 | /* If fast jetsam is not enabled, simply return */ | |
0a7de745 | 6682 | if (!fast_jetsam_enabled) { |
d9a64523 | 6683 | return; |
0a7de745 | 6684 | } |
d9a64523 A |
6685 | |
6686 | if (enable_override) { | |
0a7de745 | 6687 | if ((memorystatus_jetsam_policy & kPolicyMoreFree) == kPolicyMoreFree) { |
d9a64523 | 6688 | return; |
0a7de745 | 6689 | } |
d9a64523 A |
6690 | proc_list_lock(); |
6691 | memorystatus_jetsam_policy |= kPolicyMoreFree; | |
6692 | memorystatus_thread_pool_max(); | |
6693 | memorystatus_update_levels_locked(TRUE); | |
6694 | proc_list_unlock(); | |
6695 | } else { | |
0a7de745 | 6696 | if ((memorystatus_jetsam_policy & kPolicyMoreFree) == 0) { |
d9a64523 | 6697 | return; |
0a7de745 | 6698 | } |
d9a64523 A |
6699 | proc_list_lock(); |
6700 | memorystatus_jetsam_policy &= ~kPolicyMoreFree; | |
6701 | memorystatus_thread_pool_default(); | |
6702 | memorystatus_update_levels_locked(TRUE); | |
6703 | proc_list_unlock(); | |
6704 | } | |
6705 | } | |
6706 | ||
5ba3f43e | 6707 | |
39037602 A |
6708 | static int |
6709 | sysctl_kern_memorystatus_policy_more_free SYSCTL_HANDLER_ARGS | |
6710 | { | |
6711 | #pragma unused(arg1, arg2, oidp) | |
6712 | int error = 0, more_free = 0; | |
6713 | ||
6714 | /* | |
6715 | * TODO: Enable this privilege check? | |
6716 | * | |
6717 | * error = priv_check_cred(kauth_cred_get(), PRIV_VM_JETSAM, 0); | |
6718 | * if (error) | |
6719 | * return (error); | |
6720 | */ | |
6721 | ||
6722 | error = sysctl_handle_int(oidp, &more_free, 0, req); | |
0a7de745 A |
6723 | if (error || !req->newptr) { |
6724 | return error; | |
6725 | } | |
39037602 | 6726 | |
39037602 | 6727 | if (more_free) { |
d9a64523 | 6728 | memorystatus_fast_jetsam_override(true); |
39037602 | 6729 | } else { |
d9a64523 | 6730 | memorystatus_fast_jetsam_override(false); |
39037602 A |
6731 | } |
6732 | ||
39037602 A |
6733 | return 0; |
6734 | } | |
0a7de745 | 6735 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_policy_more_free, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, |
39037602 A |
6736 | 0, 0, &sysctl_kern_memorystatus_policy_more_free, "I", ""); |
6737 | ||
5ba3f43e A |
6738 | #endif /* CONFIG_JETSAM */ |
6739 | ||
3e170ce0 A |
6740 | /* |
6741 | * Get the at_boot snapshot | |
6742 | */ | |
39236c6e | 6743 | static int |
0a7de745 A |
6744 | memorystatus_get_at_boot_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) |
6745 | { | |
39236c6e | 6746 | size_t input_size = *snapshot_size; |
3e170ce0 A |
6747 | |
6748 | /* | |
6749 | * The at_boot snapshot has no entry list. | |
6750 | */ | |
6751 | *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t); | |
6752 | ||
6753 | if (size_only) { | |
6754 | return 0; | |
6755 | } | |
6756 | ||
6757 | /* | |
6758 | * Validate the size of the snapshot buffer | |
6759 | */ | |
6760 | if (input_size < *snapshot_size) { | |
6761 | return EINVAL; | |
6762 | } | |
6763 | ||
6764 | /* | |
6765 | * Update the notification_time only | |
6766 | */ | |
6767 | memorystatus_at_boot_snapshot.notification_time = mach_absolute_time(); | |
6768 | *snapshot = &memorystatus_at_boot_snapshot; | |
6769 | ||
6770 | MEMORYSTATUS_DEBUG(7, "memorystatus_get_at_boot_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%d)\n", | |
0a7de745 | 6771 | (long)input_size, (long)*snapshot_size, 0); |
3e170ce0 A |
6772 | return 0; |
6773 | } | |
6774 | ||
d9a64523 A |
6775 | /* |
6776 | * Get the previous fully populated snapshot | |
6777 | */ | |
6778 | static int | |
0a7de745 A |
6779 | memorystatus_get_jetsam_snapshot_copy(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) |
6780 | { | |
d9a64523 A |
6781 | size_t input_size = *snapshot_size; |
6782 | ||
6783 | if (memorystatus_jetsam_snapshot_copy_count > 0) { | |
6784 | *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_copy_count)); | |
6785 | } else { | |
6786 | *snapshot_size = 0; | |
6787 | } | |
6788 | ||
6789 | if (size_only) { | |
6790 | return 0; | |
6791 | } | |
6792 | ||
6793 | if (input_size < *snapshot_size) { | |
6794 | return EINVAL; | |
6795 | } | |
6796 | ||
6797 | *snapshot = memorystatus_jetsam_snapshot_copy; | |
6798 | ||
6799 | MEMORYSTATUS_DEBUG(7, "memorystatus_get_jetsam_snapshot_copy: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n", | |
0a7de745 | 6800 | (long)input_size, (long)*snapshot_size, (long)memorystatus_jetsam_snapshot_copy_count); |
d9a64523 A |
6801 | |
6802 | return 0; | |
6803 | } | |
6804 | ||
f427ee49 A |
6805 | #if CONFIG_FREEZE |
6806 | static int | |
6807 | memorystatus_get_jetsam_snapshot_freezer(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) | |
6808 | { | |
6809 | size_t input_size = *snapshot_size; | |
6810 | ||
6811 | if (memorystatus_jetsam_snapshot_freezer->entry_count > 0) { | |
6812 | *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_freezer->entry_count)); | |
6813 | } else { | |
6814 | *snapshot_size = 0; | |
6815 | } | |
6816 | assert(*snapshot_size <= memorystatus_jetsam_snapshot_freezer_size); | |
6817 | ||
6818 | if (size_only) { | |
6819 | return 0; | |
6820 | } | |
6821 | ||
6822 | if (input_size < *snapshot_size) { | |
6823 | return EINVAL; | |
6824 | } | |
6825 | ||
6826 | *snapshot = memorystatus_jetsam_snapshot_freezer; | |
6827 | ||
6828 | MEMORYSTATUS_DEBUG(7, "memorystatus_get_jetsam_snapshot_freezer: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n", | |
6829 | (long)input_size, (long)*snapshot_size, (long)memorystatus_jetsam_snapshot_freezer->entry_count); | |
6830 | ||
6831 | return 0; | |
6832 | } | |
6833 | #endif /* CONFIG_FREEZE */ | |
6834 | ||
3e170ce0 | 6835 | static int |
0a7de745 A |
6836 | memorystatus_get_on_demand_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) |
6837 | { | |
3e170ce0 A |
6838 | size_t input_size = *snapshot_size; |
6839 | uint32_t ods_list_count = memorystatus_list_count; | |
0a7de745 | 6840 | memorystatus_jetsam_snapshot_t *ods = NULL; /* The on_demand snapshot buffer */ |
3e170ce0 A |
6841 | |
6842 | *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (ods_list_count)); | |
6843 | ||
6844 | if (size_only) { | |
6845 | return 0; | |
6846 | } | |
6847 | ||
6848 | /* | |
6849 | * Validate the size of the snapshot buffer. | |
6850 | * This is inherently racey. May want to revisit | |
6851 | * this error condition and trim the output when | |
6852 | * it doesn't fit. | |
6853 | */ | |
6854 | if (input_size < *snapshot_size) { | |
6855 | return EINVAL; | |
6856 | } | |
6857 | ||
6858 | /* | |
6859 | * Allocate and initialize a snapshot buffer. | |
6860 | */ | |
f427ee49 | 6861 | ods = kalloc(*snapshot_size); |
3e170ce0 | 6862 | if (!ods) { |
0a7de745 | 6863 | return ENOMEM; |
3e170ce0 A |
6864 | } |
6865 | ||
6866 | memset(ods, 0, *snapshot_size); | |
6867 | ||
6868 | proc_list_lock(); | |
6869 | memorystatus_init_jetsam_snapshot_locked(ods, ods_list_count); | |
6870 | proc_list_unlock(); | |
6871 | ||
6872 | /* | |
6873 | * Return the kernel allocated, on_demand buffer. | |
6874 | * The caller of this routine will copy the data out | |
6875 | * to user space and then free the kernel allocated | |
6876 | * buffer. | |
6877 | */ | |
6878 | *snapshot = ods; | |
6879 | ||
6880 | MEMORYSTATUS_DEBUG(7, "memorystatus_get_on_demand_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n", | |
0a7de745 A |
6881 | (long)input_size, (long)*snapshot_size, (long)ods_list_count); |
6882 | ||
3e170ce0 A |
6883 | return 0; |
6884 | } | |
6885 | ||
6886 | static int | |
0a7de745 A |
6887 | memorystatus_get_jetsam_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) |
6888 | { | |
3e170ce0 A |
6889 | size_t input_size = *snapshot_size; |
6890 | ||
39236c6e A |
6891 | if (memorystatus_jetsam_snapshot_count > 0) { |
6892 | *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count)); | |
6893 | } else { | |
6894 | *snapshot_size = 0; | |
6895 | } | |
6896 | ||
6897 | if (size_only) { | |
6898 | return 0; | |
316670eb | 6899 | } |
39236c6e A |
6900 | |
6901 | if (input_size < *snapshot_size) { | |
6902 | return EINVAL; | |
6903 | } | |
6904 | ||
6905 | *snapshot = memorystatus_jetsam_snapshot; | |
3e170ce0 A |
6906 | |
6907 | MEMORYSTATUS_DEBUG(7, "memorystatus_get_jetsam_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n", | |
0a7de745 | 6908 | (long)input_size, (long)*snapshot_size, (long)memorystatus_jetsam_snapshot_count); |
3e170ce0 | 6909 | |
39236c6e | 6910 | return 0; |
316670eb A |
6911 | } |
6912 | ||
fe8ab488 | 6913 | |
316670eb | 6914 | static int |
0a7de745 A |
6915 | memorystatus_cmd_get_jetsam_snapshot(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval) |
6916 | { | |
39236c6e A |
6917 | int error = EINVAL; |
6918 | boolean_t size_only; | |
3e170ce0 A |
6919 | boolean_t is_default_snapshot = FALSE; |
6920 | boolean_t is_on_demand_snapshot = FALSE; | |
6921 | boolean_t is_at_boot_snapshot = FALSE; | |
f427ee49 A |
6922 | #if CONFIG_FREEZE |
6923 | bool is_freezer_snapshot = false; | |
6924 | #endif /* CONFIG_FREEZE */ | |
39236c6e | 6925 | memorystatus_jetsam_snapshot_t *snapshot; |
3e170ce0 | 6926 | |
39236c6e | 6927 | size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE); |
3e170ce0 A |
6928 | |
6929 | if (flags == 0) { | |
6930 | /* Default */ | |
6931 | is_default_snapshot = TRUE; | |
6932 | error = memorystatus_get_jetsam_snapshot(&snapshot, &buffer_size, size_only); | |
6933 | } else { | |
f427ee49 | 6934 | if (flags & ~(MEMORYSTATUS_SNAPSHOT_ON_DEMAND | MEMORYSTATUS_SNAPSHOT_AT_BOOT | MEMORYSTATUS_SNAPSHOT_COPY | MEMORYSTATUS_FLAGS_SNAPSHOT_FREEZER)) { |
3e170ce0 A |
6935 | /* |
6936 | * Unsupported bit set in flag. | |
6937 | */ | |
6938 | return EINVAL; | |
6939 | } | |
6940 | ||
d9a64523 | 6941 | if (flags & (flags - 0x1)) { |
3e170ce0 | 6942 | /* |
d9a64523 | 6943 | * Can't have multiple flags set at the same time. |
3e170ce0 A |
6944 | */ |
6945 | return EINVAL; | |
6946 | } | |
6947 | ||
6948 | if (flags & MEMORYSTATUS_SNAPSHOT_ON_DEMAND) { | |
6949 | is_on_demand_snapshot = TRUE; | |
6950 | /* | |
6951 | * When not requesting the size only, the following call will allocate | |
6952 | * an on_demand snapshot buffer, which is freed below. | |
6953 | */ | |
6954 | error = memorystatus_get_on_demand_snapshot(&snapshot, &buffer_size, size_only); | |
3e170ce0 A |
6955 | } else if (flags & MEMORYSTATUS_SNAPSHOT_AT_BOOT) { |
6956 | is_at_boot_snapshot = TRUE; | |
6957 | error = memorystatus_get_at_boot_snapshot(&snapshot, &buffer_size, size_only); | |
d9a64523 A |
6958 | } else if (flags & MEMORYSTATUS_SNAPSHOT_COPY) { |
6959 | error = memorystatus_get_jetsam_snapshot_copy(&snapshot, &buffer_size, size_only); | |
f427ee49 A |
6960 | #if CONFIG_FREEZE |
6961 | } else if (flags & MEMORYSTATUS_FLAGS_SNAPSHOT_FREEZER) { | |
6962 | is_freezer_snapshot = true; | |
6963 | error = memorystatus_get_jetsam_snapshot_freezer(&snapshot, &buffer_size, size_only); | |
6964 | #endif /* CONFIG_FREEZE */ | |
3e170ce0 A |
6965 | } else { |
6966 | /* | |
6967 | * Invalid flag setting. | |
6968 | */ | |
6969 | return EINVAL; | |
6970 | } | |
6971 | } | |
6972 | ||
39236c6e A |
6973 | if (error) { |
6974 | goto out; | |
6975 | } | |
316670eb | 6976 | |
3e170ce0 A |
6977 | /* |
6978 | * Copy the data out to user space and clear the snapshot buffer. | |
6979 | * If working with the jetsam snapshot, | |
6980 | * clearing the buffer means, reset the count. | |
6981 | * If working with an on_demand snapshot | |
6982 | * clearing the buffer means, free it. | |
6983 | * If working with the at_boot snapshot | |
6984 | * there is nothing to clear or update. | |
d9a64523 A |
6985 | * If working with a copy of the snapshot |
6986 | * there is nothing to clear or update. | |
f427ee49 A |
6987 | * If working with the freezer snapshot |
6988 | * clearing the buffer means, reset the count. | |
3e170ce0 | 6989 | */ |
39236c6e A |
6990 | if (!size_only) { |
6991 | if ((error = copyout(snapshot, buffer, buffer_size)) == 0) { | |
f427ee49 A |
6992 | #if CONFIG_FREEZE |
6993 | if (is_default_snapshot || is_freezer_snapshot) { | |
6994 | #else | |
3e170ce0 | 6995 | if (is_default_snapshot) { |
f427ee49 | 6996 | #endif /* CONFIG_FREEZE */ |
3e170ce0 A |
6997 | /* |
6998 | * The jetsam snapshot is never freed, its count is simply reset. | |
d9a64523 A |
6999 | * However, we make a copy for any parties that might be interested |
7000 | * in the previous fully populated snapshot. | |
3e170ce0 | 7001 | */ |
3e170ce0 | 7002 | proc_list_lock(); |
f427ee49 | 7003 | #if DEVELOPMENT || DEBUG |
c3c9b80d | 7004 | if (memorystatus_testing_pid != 0 && memorystatus_testing_pid != current_proc()->p_pid) { |
f427ee49 A |
7005 | /* Snapshot is currently owned by someone else. Don't consume it. */ |
7006 | proc_list_unlock(); | |
7007 | goto out; | |
7008 | } | |
7009 | #endif /* (DEVELOPMENT || DEBUG)*/ | |
7010 | if (is_default_snapshot) { | |
7011 | memcpy(memorystatus_jetsam_snapshot_copy, memorystatus_jetsam_snapshot, memorystatus_jetsam_snapshot_size); | |
7012 | memorystatus_jetsam_snapshot_copy_count = memorystatus_jetsam_snapshot_count; | |
7013 | snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
7014 | memorystatus_jetsam_snapshot_last_timestamp = 0; | |
7015 | } | |
7016 | #if CONFIG_FREEZE | |
7017 | else if (is_freezer_snapshot) { | |
7018 | memorystatus_jetsam_snapshot_freezer->entry_count = 0; | |
7019 | } | |
7020 | #endif /* CONFIG_FREEZE */ | |
3e170ce0 A |
7021 | proc_list_unlock(); |
7022 | } | |
7023 | } | |
7024 | ||
7025 | if (is_on_demand_snapshot) { | |
7026 | /* | |
7027 | * The on_demand snapshot is always freed, | |
7028 | * even if the copyout failed. | |
7029 | */ | |
0a7de745 | 7030 | if (snapshot) { |
3e170ce0 A |
7031 | kfree(snapshot, buffer_size); |
7032 | } | |
39236c6e A |
7033 | } |
7034 | } | |
316670eb | 7035 | |
f427ee49 | 7036 | out: |
39236c6e | 7037 | if (error == 0) { |
f427ee49 A |
7038 | assert(buffer_size <= INT32_MAX); |
7039 | *retval = (int32_t) buffer_size; | |
39236c6e | 7040 | } |
39236c6e A |
7041 | return error; |
7042 | } | |
316670eb | 7043 | |
f427ee49 A |
7044 | #if DEVELOPMENT || DEBUG |
7045 | static int | |
c3c9b80d | 7046 | memorystatus_cmd_set_testing_pid(int32_t flags) |
f427ee49 A |
7047 | { |
7048 | int error = EINVAL; | |
7049 | proc_t caller = current_proc(); | |
7050 | assert(caller != kernproc); | |
7051 | proc_list_lock(); | |
c3c9b80d A |
7052 | if (flags & MEMORYSTATUS_FLAGS_SET_TESTING_PID) { |
7053 | if (memorystatus_testing_pid == 0) { | |
7054 | memorystatus_testing_pid = caller->p_pid; | |
f427ee49 | 7055 | error = 0; |
c3c9b80d | 7056 | } else if (memorystatus_testing_pid == caller->p_pid) { |
f427ee49 A |
7057 | error = 0; |
7058 | } else { | |
7059 | /* We don't allow ownership to be taken from another proc. */ | |
7060 | error = EBUSY; | |
7061 | } | |
c3c9b80d A |
7062 | } else if (flags & MEMORYSTATUS_FLAGS_UNSET_TESTING_PID) { |
7063 | if (memorystatus_testing_pid == caller->p_pid) { | |
7064 | memorystatus_testing_pid = 0; | |
f427ee49 | 7065 | error = 0; |
c3c9b80d | 7066 | } else if (memorystatus_testing_pid != 0) { |
f427ee49 A |
7067 | /* We don't allow ownership to be taken from another proc. */ |
7068 | error = EPERM; | |
7069 | } | |
7070 | } | |
7071 | proc_list_unlock(); | |
7072 | ||
7073 | return error; | |
7074 | } | |
7075 | #endif /* DEVELOPMENT || DEBUG */ | |
7076 | ||
fe8ab488 | 7077 | /* |
0a7de745 | 7078 | * Routine: memorystatus_cmd_grp_set_priorities |
d9a64523 | 7079 | * Purpose: Update priorities for a group of processes. |
fe8ab488 | 7080 | * |
fe8ab488 A |
7081 | * [priority] |
7082 | * Move each process out of its effective priority | |
7083 | * band and into a new priority band. | |
7084 | * Maintains relative order from lowest to highest priority. | |
7085 | * In single band, maintains relative order from head to tail. | |
7086 | * | |
7087 | * eg: before [effectivepriority | pid] | |
7088 | * [18 | p101 ] | |
7089 | * [17 | p55, p67, p19 ] | |
7090 | * [12 | p103 p10 ] | |
7091 | * [ 7 | p25 ] | |
0a7de745 | 7092 | * [ 0 | p71, p82, ] |
fe8ab488 A |
7093 | * |
7094 | * after [ new band | pid] | |
7095 | * [ xxx | p71, p82, p25, p103, p10, p55, p67, p19, p101] | |
7096 | * | |
7097 | * Returns: 0 on success, else non-zero. | |
7098 | * | |
7099 | * Caveat: We know there is a race window regarding recycled pids. | |
7100 | * A process could be killed before the kernel can act on it here. | |
7101 | * If a pid cannot be found in any of the jetsam priority bands, | |
7102 | * then we simply ignore it. No harm. | |
7103 | * But, if the pid has been recycled then it could be an issue. | |
7104 | * In that scenario, we might move an unsuspecting process to the new | |
7105 | * priority band. It's not clear how the kernel can safeguard | |
7106 | * against this, but it would be an extremely rare case anyway. | |
7107 | * The caller of this api might avoid such race conditions by | |
7108 | * ensuring that the processes passed in the pid list are suspended. | |
7109 | */ | |
7110 | ||
7111 | ||
fe8ab488 | 7112 | static int |
d9a64523 A |
7113 | memorystatus_cmd_grp_set_priorities(user_addr_t buffer, size_t buffer_size) |
7114 | { | |
fe8ab488 A |
7115 | /* |
7116 | * We only handle setting priority | |
7117 | * per process | |
7118 | */ | |
7119 | ||
7120 | int error = 0; | |
d9a64523 | 7121 | memorystatus_properties_entry_v1_t *entries = NULL; |
f427ee49 | 7122 | size_t entry_count = 0; |
fe8ab488 A |
7123 | |
7124 | /* This will be the ordered proc list */ | |
d9a64523 A |
7125 | typedef struct memorystatus_internal_properties { |
7126 | proc_t proc; | |
7127 | int32_t priority; | |
7128 | } memorystatus_internal_properties_t; | |
7129 | ||
fe8ab488 A |
7130 | memorystatus_internal_properties_t *table = NULL; |
7131 | size_t table_size = 0; | |
7132 | uint32_t table_count = 0; | |
7133 | ||
f427ee49 | 7134 | size_t i = 0; |
fe8ab488 A |
7135 | uint32_t bucket_index = 0; |
7136 | boolean_t head_insert; | |
7137 | int32_t new_priority; | |
0a7de745 | 7138 | |
fe8ab488 A |
7139 | proc_t p; |
7140 | ||
7141 | /* Verify inputs */ | |
d9a64523 | 7142 | if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) { |
fe8ab488 A |
7143 | error = EINVAL; |
7144 | goto out; | |
7145 | } | |
7146 | ||
d9a64523 | 7147 | entry_count = (buffer_size / sizeof(memorystatus_properties_entry_v1_t)); |
f427ee49 A |
7148 | if (entry_count == 0) { |
7149 | /* buffer size was not large enough for a single entry */ | |
7150 | error = EINVAL; | |
7151 | goto out; | |
7152 | } | |
7153 | ||
7154 | if ((entries = kheap_alloc(KHEAP_TEMP, buffer_size, Z_WAITOK)) == NULL) { | |
fe8ab488 A |
7155 | error = ENOMEM; |
7156 | goto out; | |
7157 | } | |
7158 | ||
d9a64523 | 7159 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_START, MEMORYSTATUS_FLAGS_GRP_SET_PRIORITY, entry_count, 0, 0, 0); |
fe8ab488 A |
7160 | |
7161 | if ((error = copyin(buffer, entries, buffer_size)) != 0) { | |
7162 | goto out; | |
7163 | } | |
7164 | ||
7165 | /* Verify sanity of input priorities */ | |
d9a64523 A |
7166 | if (entries[0].version == MEMORYSTATUS_MPE_VERSION_1) { |
7167 | if ((buffer_size % MEMORYSTATUS_MPE_VERSION_1_SIZE) != 0) { | |
7168 | error = EINVAL; | |
7169 | goto out; | |
7170 | } | |
7171 | } else { | |
7172 | error = EINVAL; | |
7173 | goto out; | |
7174 | } | |
0a7de745 A |
7175 | |
7176 | for (i = 0; i < entry_count; i++) { | |
fe8ab488 A |
7177 | if (entries[i].priority == -1) { |
7178 | /* Use as shorthand for default priority */ | |
7179 | entries[i].priority = JETSAM_PRIORITY_DEFAULT; | |
39037602 A |
7180 | } else if ((entries[i].priority == system_procs_aging_band) || (entries[i].priority == applications_aging_band)) { |
7181 | /* Both the aging bands are reserved for internal use; | |
fe8ab488 A |
7182 | * if requested, adjust to JETSAM_PRIORITY_IDLE. */ |
7183 | entries[i].priority = JETSAM_PRIORITY_IDLE; | |
0a7de745 | 7184 | } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_HEAD) { |
fe8ab488 A |
7185 | /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle |
7186 | * queue */ | |
7187 | /* Deal with this later */ | |
7188 | } else if ((entries[i].priority < 0) || (entries[i].priority >= MEMSTAT_BUCKET_COUNT)) { | |
7189 | /* Sanity check */ | |
7190 | error = EINVAL; | |
7191 | goto out; | |
7192 | } | |
7193 | } | |
7194 | ||
7195 | table_size = sizeof(memorystatus_internal_properties_t) * entry_count; | |
f427ee49 | 7196 | if ((table = kheap_alloc(KHEAP_TEMP, table_size, Z_WAITOK | Z_ZERO)) == NULL) { |
fe8ab488 A |
7197 | error = ENOMEM; |
7198 | goto out; | |
7199 | } | |
fe8ab488 A |
7200 | |
7201 | ||
7202 | /* | |
7203 | * For each jetsam bucket entry, spin through the input property list. | |
7204 | * When a matching pid is found, populate an adjacent table with the | |
7205 | * appropriate proc pointer and new property values. | |
7206 | * This traversal automatically preserves order from lowest | |
7207 | * to highest priority. | |
7208 | */ | |
7209 | ||
0a7de745 A |
7210 | bucket_index = 0; |
7211 | ||
fe8ab488 A |
7212 | proc_list_lock(); |
7213 | ||
7214 | /* Create the ordered table */ | |
0a7de745 | 7215 | p = memorystatus_get_first_proc_locked(&bucket_index, TRUE); |
fe8ab488 | 7216 | while (p && (table_count < entry_count)) { |
0a7de745 | 7217 | for (i = 0; i < entry_count; i++) { |
fe8ab488 A |
7218 | if (p->p_pid == entries[i].pid) { |
7219 | /* Build the table data */ | |
7220 | table[table_count].proc = p; | |
7221 | table[table_count].priority = entries[i].priority; | |
7222 | table_count++; | |
7223 | break; | |
7224 | } | |
7225 | } | |
7226 | p = memorystatus_get_next_proc_locked(&bucket_index, p, TRUE); | |
7227 | } | |
0a7de745 | 7228 | |
fe8ab488 | 7229 | /* We now have ordered list of procs ready to move */ |
0a7de745 | 7230 | for (i = 0; i < table_count; i++) { |
fe8ab488 A |
7231 | p = table[i].proc; |
7232 | assert(p != NULL); | |
7233 | ||
7234 | /* Allow head inserts -- but relative order is now */ | |
7235 | if (table[i].priority == JETSAM_PRIORITY_IDLE_HEAD) { | |
7236 | new_priority = JETSAM_PRIORITY_IDLE; | |
7237 | head_insert = true; | |
7238 | } else { | |
7239 | new_priority = table[i].priority; | |
7240 | head_insert = false; | |
7241 | } | |
0a7de745 | 7242 | |
fe8ab488 A |
7243 | /* Not allowed */ |
7244 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { | |
7245 | continue; | |
7246 | } | |
7247 | ||
7248 | /* | |
39037602 A |
7249 | * Take appropriate steps if moving proc out of |
7250 | * either of the aging bands. | |
fe8ab488 | 7251 | */ |
39037602 | 7252 | if ((p->p_memstat_effectivepriority == system_procs_aging_band) || (p->p_memstat_effectivepriority == applications_aging_band)) { |
fe8ab488 A |
7253 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); |
7254 | } | |
7255 | ||
39037602 | 7256 | memorystatus_update_priority_locked(p, new_priority, head_insert, false); |
fe8ab488 A |
7257 | } |
7258 | ||
7259 | proc_list_unlock(); | |
7260 | ||
7261 | /* | |
7262 | * if (table_count != entry_count) | |
7263 | * then some pids were not found in a jetsam band. | |
7264 | * harmless but interesting... | |
7265 | */ | |
fe8ab488 | 7266 | out: |
d9a64523 | 7267 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_END, MEMORYSTATUS_FLAGS_GRP_SET_PRIORITY, entry_count, table_count, 0, 0); |
0a7de745 A |
7268 | |
7269 | if (entries) { | |
f427ee49 | 7270 | kheap_free(KHEAP_TEMP, entries, buffer_size); |
0a7de745 A |
7271 | } |
7272 | if (table) { | |
f427ee49 | 7273 | kheap_free(KHEAP_TEMP, table, table_size); |
0a7de745 | 7274 | } |
fe8ab488 | 7275 | |
0a7de745 | 7276 | return error; |
fe8ab488 A |
7277 | } |
7278 | ||
cb323159 A |
7279 | memorystatus_internal_probabilities_t *memorystatus_global_probabilities_table = NULL; |
7280 | size_t memorystatus_global_probabilities_size = 0; | |
7281 | ||
d9a64523 A |
7282 | static int |
7283 | memorystatus_cmd_grp_set_probabilities(user_addr_t buffer, size_t buffer_size) | |
7284 | { | |
7285 | int error = 0; | |
7286 | memorystatus_properties_entry_v1_t *entries = NULL; | |
f427ee49 | 7287 | size_t entry_count = 0, i = 0; |
d9a64523 A |
7288 | memorystatus_internal_probabilities_t *tmp_table_new = NULL, *tmp_table_old = NULL; |
7289 | size_t tmp_table_new_size = 0, tmp_table_old_size = 0; | |
c3c9b80d A |
7290 | #if DEVELOPMENT || DEBUG |
7291 | if (memorystatus_testing_pid != 0 && memorystatus_testing_pid != current_proc()->p_pid) { | |
7292 | /* probabilites are currently owned by someone else. Don't change them. */ | |
7293 | error = EPERM; | |
7294 | goto out; | |
7295 | } | |
7296 | #endif /* (DEVELOPMENT || DEBUG)*/ | |
d9a64523 A |
7297 | |
7298 | /* Verify inputs */ | |
7299 | if ((buffer == USER_ADDR_NULL) || (buffer_size == 0)) { | |
7300 | error = EINVAL; | |
7301 | goto out; | |
7302 | } | |
7303 | ||
7304 | entry_count = (buffer_size / sizeof(memorystatus_properties_entry_v1_t)); | |
7305 | ||
f427ee49 | 7306 | if ((entries = kheap_alloc(KHEAP_TEMP, buffer_size, Z_WAITOK)) == NULL) { |
d9a64523 A |
7307 | error = ENOMEM; |
7308 | goto out; | |
7309 | } | |
7310 | ||
7311 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_START, MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY, entry_count, 0, 0, 0); | |
7312 | ||
7313 | if ((error = copyin(buffer, entries, buffer_size)) != 0) { | |
7314 | goto out; | |
7315 | } | |
7316 | ||
7317 | if (entries[0].version == MEMORYSTATUS_MPE_VERSION_1) { | |
7318 | if ((buffer_size % MEMORYSTATUS_MPE_VERSION_1_SIZE) != 0) { | |
7319 | error = EINVAL; | |
7320 | goto out; | |
7321 | } | |
7322 | } else { | |
7323 | error = EINVAL; | |
7324 | goto out; | |
7325 | } | |
0a7de745 | 7326 | |
d9a64523 | 7327 | /* Verify sanity of input priorities */ |
0a7de745 | 7328 | for (i = 0; i < entry_count; i++) { |
d9a64523 A |
7329 | /* |
7330 | * 0 - low probability of use. | |
7331 | * 1 - high probability of use. | |
7332 | * | |
0a7de745 | 7333 | * Keeping this field an int (& not a bool) to allow |
d9a64523 A |
7334 | * us to experiment with different values/approaches |
7335 | * later on. | |
7336 | */ | |
7337 | if (entries[i].use_probability > 1) { | |
7338 | error = EINVAL; | |
7339 | goto out; | |
7340 | } | |
7341 | } | |
7342 | ||
7343 | tmp_table_new_size = sizeof(memorystatus_internal_probabilities_t) * entry_count; | |
7344 | ||
f427ee49 | 7345 | if ((tmp_table_new = kalloc_flags(tmp_table_new_size, Z_WAITOK | Z_ZERO)) == NULL) { |
d9a64523 A |
7346 | error = ENOMEM; |
7347 | goto out; | |
7348 | } | |
d9a64523 A |
7349 | |
7350 | proc_list_lock(); | |
7351 | ||
7352 | if (memorystatus_global_probabilities_table) { | |
7353 | tmp_table_old = memorystatus_global_probabilities_table; | |
7354 | tmp_table_old_size = memorystatus_global_probabilities_size; | |
7355 | } | |
7356 | ||
7357 | memorystatus_global_probabilities_table = tmp_table_new; | |
7358 | memorystatus_global_probabilities_size = tmp_table_new_size; | |
7359 | tmp_table_new = NULL; | |
7360 | ||
0a7de745 | 7361 | for (i = 0; i < entry_count; i++) { |
d9a64523 A |
7362 | /* Build the table data */ |
7363 | strlcpy(memorystatus_global_probabilities_table[i].proc_name, entries[i].proc_name, MAXCOMLEN + 1); | |
7364 | memorystatus_global_probabilities_table[i].use_probability = entries[i].use_probability; | |
7365 | } | |
7366 | ||
7367 | proc_list_unlock(); | |
0a7de745 | 7368 | |
d9a64523 A |
7369 | out: |
7370 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_END, MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY, entry_count, tmp_table_new_size, 0, 0); | |
7371 | ||
7372 | if (entries) { | |
f427ee49 | 7373 | kheap_free(KHEAP_TEMP, entries, buffer_size); |
d9a64523 A |
7374 | entries = NULL; |
7375 | } | |
7376 | ||
7377 | if (tmp_table_old) { | |
7378 | kfree(tmp_table_old, tmp_table_old_size); | |
7379 | tmp_table_old = NULL; | |
7380 | } | |
7381 | ||
0a7de745 | 7382 | return error; |
d9a64523 A |
7383 | } |
7384 | ||
7385 | static int | |
7386 | memorystatus_cmd_grp_set_properties(int32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) | |
7387 | { | |
7388 | int error = 0; | |
7389 | ||
7390 | if ((flags & MEMORYSTATUS_FLAGS_GRP_SET_PRIORITY) == MEMORYSTATUS_FLAGS_GRP_SET_PRIORITY) { | |
d9a64523 | 7391 | error = memorystatus_cmd_grp_set_priorities(buffer, buffer_size); |
d9a64523 | 7392 | } else if ((flags & MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY) == MEMORYSTATUS_FLAGS_GRP_SET_PROBABILITY) { |
d9a64523 | 7393 | error = memorystatus_cmd_grp_set_probabilities(buffer, buffer_size); |
d9a64523 A |
7394 | } else { |
7395 | error = EINVAL; | |
7396 | } | |
7397 | ||
7398 | return error; | |
7399 | } | |
fe8ab488 A |
7400 | |
7401 | /* | |
3e170ce0 A |
7402 | * This routine is used to update a process's jetsam priority position and stored user_data. |
7403 | * It is not used for the setting of memory limits, which is why the last 6 args to the | |
7404 | * memorystatus_update() call are 0 or FALSE. | |
cb323159 A |
7405 | * |
7406 | * Flags passed into this call are used to distinguish the motivation behind a jetsam priority | |
7407 | * transition. By default, the kernel updates the process's original requested priority when | |
7408 | * no flag is passed. But when the MEMORYSTATUS_SET_PRIORITY_ASSERTION flag is used, the kernel | |
7409 | * updates the process's assertion driven priority. | |
7410 | * | |
7411 | * The assertion flag was introduced for use by the device's assertion mediator (eg: runningboardd). | |
7412 | * When an assertion is controlling a process's jetsam priority, it may conflict with that process's | |
7413 | * dirty/clean (active/inactive) jetsam state. The kernel attempts to resolve a priority transition | |
7414 | * conflict by reviewing the process state and then choosing the maximum jetsam band at play, | |
7415 | * eg: requested priority versus assertion priority. | |
fe8ab488 | 7416 | */ |
0a7de745 | 7417 | |
39236c6e | 7418 | static int |
cb323159 | 7419 | memorystatus_cmd_set_priority_properties(pid_t pid, uint32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) |
0a7de745 | 7420 | { |
3e170ce0 | 7421 | int error = 0; |
cb323159 | 7422 | boolean_t is_assertion = FALSE; /* priority is driven by an assertion */ |
3e170ce0 A |
7423 | memorystatus_priority_properties_t mpp_entry; |
7424 | ||
39236c6e | 7425 | /* Validate inputs */ |
3e170ce0 | 7426 | if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_priority_properties_t))) { |
39236c6e A |
7427 | return EINVAL; |
7428 | } | |
0a7de745 | 7429 | |
cb323159 A |
7430 | /* Validate flags */ |
7431 | if (flags == 0) { | |
7432 | /* | |
7433 | * Default. This path updates requestedpriority. | |
7434 | */ | |
7435 | } else { | |
7436 | if (flags & ~(MEMORYSTATUS_SET_PRIORITY_ASSERTION)) { | |
7437 | /* | |
7438 | * Unsupported bit set in flag. | |
7439 | */ | |
7440 | return EINVAL; | |
7441 | } else if (flags & MEMORYSTATUS_SET_PRIORITY_ASSERTION) { | |
7442 | is_assertion = TRUE; | |
7443 | } | |
7444 | } | |
7445 | ||
3e170ce0 A |
7446 | error = copyin(buffer, &mpp_entry, buffer_size); |
7447 | ||
7448 | if (error == 0) { | |
39236c6e | 7449 | proc_t p; |
0a7de745 | 7450 | |
39236c6e A |
7451 | p = proc_find(pid); |
7452 | if (!p) { | |
3e170ce0 | 7453 | return ESRCH; |
39236c6e | 7454 | } |
0a7de745 | 7455 | |
39236c6e | 7456 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { |
39236c6e | 7457 | proc_rele(p); |
3e170ce0 | 7458 | return EPERM; |
39236c6e | 7459 | } |
0a7de745 | 7460 | |
cb323159 A |
7461 | if (is_assertion) { |
7462 | os_log(OS_LOG_DEFAULT, "memorystatus: set assertion priority(%d) target %s:%d\n", | |
7463 | mpp_entry.priority, (*p->p_name ? p->p_name : "unknown"), p->p_pid); | |
7464 | } | |
7465 | ||
7466 | error = memorystatus_update(p, mpp_entry.priority, mpp_entry.user_data, is_assertion, FALSE, FALSE, 0, 0, FALSE, FALSE); | |
39236c6e A |
7467 | proc_rele(p); |
7468 | } | |
0a7de745 A |
7469 | |
7470 | return error; | |
3e170ce0 A |
7471 | } |
7472 | ||
7473 | static int | |
0a7de745 A |
7474 | memorystatus_cmd_set_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) |
7475 | { | |
3e170ce0 A |
7476 | int error = 0; |
7477 | memorystatus_memlimit_properties_t mmp_entry; | |
7478 | ||
7479 | /* Validate inputs */ | |
7480 | if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_memlimit_properties_t))) { | |
7481 | return EINVAL; | |
7482 | } | |
7483 | ||
7484 | error = copyin(buffer, &mmp_entry, buffer_size); | |
7485 | ||
7486 | if (error == 0) { | |
7487 | error = memorystatus_set_memlimit_properties(pid, &mmp_entry); | |
7488 | } | |
7489 | ||
0a7de745 | 7490 | return error; |
3e170ce0 A |
7491 | } |
7492 | ||
cb323159 A |
7493 | static void |
7494 | memorystatus_get_memlimit_properties_internal(proc_t p, memorystatus_memlimit_properties_t* p_entry) | |
7495 | { | |
7496 | memset(p_entry, 0, sizeof(memorystatus_memlimit_properties_t)); | |
7497 | ||
7498 | if (p->p_memstat_memlimit_active > 0) { | |
7499 | p_entry->memlimit_active = p->p_memstat_memlimit_active; | |
7500 | } else { | |
7501 | task_convert_phys_footprint_limit(-1, &p_entry->memlimit_active); | |
7502 | } | |
7503 | ||
7504 | if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL) { | |
7505 | p_entry->memlimit_active_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
7506 | } | |
7507 | ||
7508 | /* | |
7509 | * Get the inactive limit and attributes | |
7510 | */ | |
7511 | if (p->p_memstat_memlimit_inactive <= 0) { | |
7512 | task_convert_phys_footprint_limit(-1, &p_entry->memlimit_inactive); | |
7513 | } else { | |
7514 | p_entry->memlimit_inactive = p->p_memstat_memlimit_inactive; | |
7515 | } | |
7516 | if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) { | |
7517 | p_entry->memlimit_inactive_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
7518 | } | |
7519 | } | |
7520 | ||
3e170ce0 A |
7521 | /* |
7522 | * When getting the memlimit settings, we can't simply call task_get_phys_footprint_limit(). | |
7523 | * That gets the proc's cached memlimit and there is no guarantee that the active/inactive | |
7524 | * limits will be the same in the no-limit case. Instead we convert limits <= 0 using | |
7525 | * task_convert_phys_footprint_limit(). It computes the same limit value that would be written | |
7526 | * to the task's ledgers via task_set_phys_footprint_limit(). | |
7527 | */ | |
7528 | static int | |
0a7de745 A |
7529 | memorystatus_cmd_get_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) |
7530 | { | |
cb323159 | 7531 | memorystatus_memlimit_properties2_t mmp_entry; |
3e170ce0 A |
7532 | |
7533 | /* Validate inputs */ | |
cb323159 A |
7534 | if ((pid == 0) || (buffer == USER_ADDR_NULL) || |
7535 | ((buffer_size != sizeof(memorystatus_memlimit_properties_t)) && | |
7536 | (buffer_size != sizeof(memorystatus_memlimit_properties2_t)))) { | |
3e170ce0 A |
7537 | return EINVAL; |
7538 | } | |
7539 | ||
cb323159 | 7540 | memset(&mmp_entry, 0, sizeof(memorystatus_memlimit_properties2_t)); |
3e170ce0 A |
7541 | |
7542 | proc_t p = proc_find(pid); | |
7543 | if (!p) { | |
7544 | return ESRCH; | |
7545 | } | |
7546 | ||
7547 | /* | |
7548 | * Get the active limit and attributes. | |
7549 | * No locks taken since we hold a reference to the proc. | |
7550 | */ | |
7551 | ||
cb323159 | 7552 | memorystatus_get_memlimit_properties_internal(p, &mmp_entry.v1); |
3e170ce0 | 7553 | |
cb323159 A |
7554 | #if CONFIG_JETSAM |
7555 | #if DEVELOPMENT || DEBUG | |
3e170ce0 | 7556 | /* |
cb323159 | 7557 | * Get the limit increased via SPI |
3e170ce0 | 7558 | */ |
cb323159 A |
7559 | mmp_entry.memlimit_increase = roundToNearestMB(p->p_memlimit_increase); |
7560 | mmp_entry.memlimit_increase_bytes = p->p_memlimit_increase; | |
7561 | #endif /* DEVELOPMENT || DEBUG */ | |
7562 | #endif /* CONFIG_JETSAM */ | |
7563 | ||
3e170ce0 A |
7564 | proc_rele(p); |
7565 | ||
cb323159 | 7566 | int error = copyout(&mmp_entry, buffer, buffer_size); |
3e170ce0 | 7567 | |
0a7de745 | 7568 | return error; |
b0d623f7 A |
7569 | } |
7570 | ||
3e170ce0 | 7571 | |
39037602 A |
7572 | /* |
7573 | * SPI for kbd - pr24956468 | |
7574 | * This is a very simple snapshot that calculates how much a | |
7575 | * process's phys_footprint exceeds a specific memory limit. | |
7576 | * Only the inactive memory limit is supported for now. | |
7577 | * The delta is returned as bytes in excess or zero. | |
7578 | */ | |
7579 | static int | |
0a7de745 A |
7580 | memorystatus_cmd_get_memlimit_excess_np(pid_t pid, uint32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) |
7581 | { | |
39037602 A |
7582 | int error = 0; |
7583 | uint64_t footprint_in_bytes = 0; | |
7584 | uint64_t delta_in_bytes = 0; | |
7585 | int32_t memlimit_mb = 0; | |
7586 | uint64_t memlimit_bytes = 0; | |
7587 | ||
7588 | /* Validate inputs */ | |
7589 | if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(uint64_t)) || (flags != 0)) { | |
0a7de745 | 7590 | return EINVAL; |
39037602 A |
7591 | } |
7592 | ||
7593 | proc_t p = proc_find(pid); | |
7594 | if (!p) { | |
7595 | return ESRCH; | |
7596 | } | |
7597 | ||
7598 | /* | |
7599 | * Get the inactive limit. | |
7600 | * No locks taken since we hold a reference to the proc. | |
7601 | */ | |
7602 | ||
7603 | if (p->p_memstat_memlimit_inactive <= 0) { | |
7604 | task_convert_phys_footprint_limit(-1, &memlimit_mb); | |
7605 | } else { | |
7606 | memlimit_mb = p->p_memstat_memlimit_inactive; | |
7607 | } | |
7608 | ||
7609 | footprint_in_bytes = get_task_phys_footprint(p->task); | |
7610 | ||
7611 | proc_rele(p); | |
7612 | ||
0a7de745 | 7613 | memlimit_bytes = memlimit_mb * 1024 * 1024; /* MB to bytes */ |
39037602 A |
7614 | |
7615 | /* | |
7616 | * Computed delta always returns >= 0 bytes | |
7617 | */ | |
7618 | if (footprint_in_bytes > memlimit_bytes) { | |
7619 | delta_in_bytes = footprint_in_bytes - memlimit_bytes; | |
7620 | } | |
7621 | ||
7622 | error = copyout(&delta_in_bytes, buffer, sizeof(delta_in_bytes)); | |
7623 | ||
0a7de745 | 7624 | return error; |
39037602 A |
7625 | } |
7626 | ||
7627 | ||
39236c6e | 7628 | static int |
0a7de745 A |
7629 | memorystatus_cmd_get_pressure_status(int32_t *retval) |
7630 | { | |
39236c6e | 7631 | int error; |
0a7de745 | 7632 | |
39236c6e A |
7633 | /* Need privilege for check */ |
7634 | error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); | |
7635 | if (error) { | |
0a7de745 | 7636 | return error; |
39236c6e | 7637 | } |
0a7de745 | 7638 | |
39236c6e A |
7639 | /* Inherently racy, so it's not worth taking a lock here */ |
7640 | *retval = (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0; | |
0a7de745 | 7641 | |
39236c6e A |
7642 | return error; |
7643 | } | |
316670eb | 7644 | |
3e170ce0 | 7645 | int |
0a7de745 A |
7646 | memorystatus_get_pressure_status_kdp() |
7647 | { | |
3e170ce0 A |
7648 | return (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0; |
7649 | } | |
7650 | ||
fe8ab488 A |
7651 | /* |
7652 | * Every process, including a P_MEMSTAT_INTERNAL process (currently only pid 1), is allowed to set a HWM. | |
3e170ce0 A |
7653 | * |
7654 | * This call is inflexible -- it does not distinguish between active/inactive, fatal/non-fatal | |
7655 | * So, with 2-level HWM preserving previous behavior will map as follows. | |
7656 | * - treat the limit passed in as both an active and inactive limit. | |
7657 | * - treat the is_fatal_limit flag as though it applies to both active and inactive limits. | |
7658 | * | |
7659 | * When invoked via MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK | |
7660 | * - the is_fatal_limit is FALSE, meaning the active and inactive limits are non-fatal/soft | |
7661 | * - so mapping is (active/non-fatal, inactive/non-fatal) | |
7662 | * | |
7663 | * When invoked via MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT | |
7664 | * - the is_fatal_limit is TRUE, meaning the process's active and inactive limits are fatal/hard | |
7665 | * - so mapping is (active/fatal, inactive/fatal) | |
fe8ab488 A |
7666 | */ |
7667 | ||
5ba3f43e | 7668 | #if CONFIG_JETSAM |
b0d623f7 | 7669 | static int |
0a7de745 A |
7670 | memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit) |
7671 | { | |
39236c6e | 7672 | int error = 0; |
3e170ce0 A |
7673 | memorystatus_memlimit_properties_t entry; |
7674 | ||
7675 | entry.memlimit_active = high_water_mark; | |
7676 | entry.memlimit_active_attr = 0; | |
7677 | entry.memlimit_inactive = high_water_mark; | |
7678 | entry.memlimit_inactive_attr = 0; | |
7679 | ||
7680 | if (is_fatal_limit == TRUE) { | |
7681 | entry.memlimit_active_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
7682 | entry.memlimit_inactive_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
7683 | } | |
7684 | ||
7685 | error = memorystatus_set_memlimit_properties(pid, &entry); | |
0a7de745 | 7686 | return error; |
3e170ce0 | 7687 | } |
5ba3f43e | 7688 | #endif /* CONFIG_JETSAM */ |
3e170ce0 A |
7689 | |
7690 | static int | |
cb323159 | 7691 | memorystatus_set_memlimit_properties_internal(proc_t p, memorystatus_memlimit_properties_t *p_entry) |
0a7de745 | 7692 | { |
cb323159 | 7693 | int error = 0; |
3e170ce0 | 7694 | |
c3c9b80d | 7695 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED); |
3e170ce0 A |
7696 | |
7697 | /* | |
7698 | * Store the active limit variants in the proc. | |
7699 | */ | |
cb323159 | 7700 | SET_ACTIVE_LIMITS_LOCKED(p, p_entry->memlimit_active, p_entry->memlimit_active_attr); |
3e170ce0 A |
7701 | |
7702 | /* | |
7703 | * Store the inactive limit variants in the proc. | |
7704 | */ | |
cb323159 | 7705 | SET_INACTIVE_LIMITS_LOCKED(p, p_entry->memlimit_inactive, p_entry->memlimit_inactive_attr); |
3e170ce0 A |
7706 | |
7707 | /* | |
7708 | * Enforce appropriate limit variant by updating the cached values | |
7709 | * and writing the ledger. | |
7710 | * Limit choice is based on process active/inactive state. | |
7711 | */ | |
7712 | ||
7713 | if (memorystatus_highwater_enabled) { | |
813fb2f6 A |
7714 | boolean_t is_fatal; |
7715 | boolean_t use_active; | |
3e170ce0 A |
7716 | |
7717 | if (proc_jetsam_state_is_active_locked(p) == TRUE) { | |
813fb2f6 A |
7718 | CACHE_ACTIVE_LIMITS_LOCKED(p, is_fatal); |
7719 | use_active = TRUE; | |
fe8ab488 | 7720 | } else { |
813fb2f6 A |
7721 | CACHE_INACTIVE_LIMITS_LOCKED(p, is_fatal); |
7722 | use_active = FALSE; | |
fe8ab488 | 7723 | } |
3e170ce0 A |
7724 | |
7725 | /* Enforce the limit by writing to the ledgers */ | |
813fb2f6 | 7726 | error = (task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, use_active, is_fatal) == 0) ? 0 : EINVAL; |
3e170ce0 A |
7727 | |
7728 | MEMORYSTATUS_DEBUG(3, "memorystatus_set_memlimit_properties: new limit on pid %d (%dMB %s) current priority (%d) dirty_state?=0x%x %s\n", | |
0a7de745 A |
7729 | p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), |
7730 | (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty, | |
7731 | (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); | |
39037602 | 7732 | DTRACE_MEMORYSTATUS2(memorystatus_set_memlimit, proc_t, p, int32_t, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1)); |
fe8ab488 A |
7733 | } |
7734 | ||
39236c6e A |
7735 | return error; |
7736 | } | |
7737 | ||
d9a64523 | 7738 | static int |
cb323159 | 7739 | memorystatus_set_memlimit_properties(pid_t pid, memorystatus_memlimit_properties_t *entry) |
d9a64523 | 7740 | { |
cb323159 | 7741 | memorystatus_memlimit_properties_t set_entry; |
d9a64523 | 7742 | |
cb323159 | 7743 | proc_t p = proc_find(pid); |
d9a64523 A |
7744 | if (!p) { |
7745 | return ESRCH; | |
7746 | } | |
7747 | ||
cb323159 A |
7748 | /* |
7749 | * Check for valid attribute flags. | |
7750 | */ | |
7751 | const uint32_t valid_attrs = MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
7752 | if ((entry->memlimit_active_attr & (~valid_attrs)) != 0) { | |
7753 | proc_rele(p); | |
7754 | return EINVAL; | |
7755 | } | |
7756 | if ((entry->memlimit_inactive_attr & (~valid_attrs)) != 0) { | |
7757 | proc_rele(p); | |
7758 | return EINVAL; | |
7759 | } | |
d9a64523 | 7760 | |
cb323159 A |
7761 | /* |
7762 | * Setup the active memlimit properties | |
7763 | */ | |
7764 | set_entry.memlimit_active = entry->memlimit_active; | |
7765 | set_entry.memlimit_active_attr = entry->memlimit_active_attr & MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
d9a64523 | 7766 | |
cb323159 A |
7767 | /* |
7768 | * Setup the inactive memlimit properties | |
7769 | */ | |
7770 | set_entry.memlimit_inactive = entry->memlimit_inactive; | |
7771 | set_entry.memlimit_inactive_attr = entry->memlimit_inactive_attr & MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
d9a64523 | 7772 | |
cb323159 A |
7773 | /* |
7774 | * Setting a limit of <= 0 implies that the process has no | |
7775 | * high-water-mark and has no per-task-limit. That means | |
7776 | * the system_wide task limit is in place, which by the way, | |
7777 | * is always fatal. | |
7778 | */ | |
d9a64523 | 7779 | |
cb323159 A |
7780 | if (set_entry.memlimit_active <= 0) { |
7781 | /* | |
7782 | * Enforce the fatal system_wide task limit while process is active. | |
7783 | */ | |
7784 | set_entry.memlimit_active = -1; | |
7785 | set_entry.memlimit_active_attr = MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
7786 | } | |
7787 | #if CONFIG_JETSAM | |
7788 | #if DEVELOPMENT || DEBUG | |
7789 | else { | |
7790 | /* add the current increase to it, for roots */ | |
7791 | set_entry.memlimit_active += roundToNearestMB(p->p_memlimit_increase); | |
d9a64523 | 7792 | } |
cb323159 A |
7793 | #endif /* DEVELOPMENT || DEBUG */ |
7794 | #endif /* CONFIG_JETSAM */ | |
d9a64523 | 7795 | |
cb323159 A |
7796 | if (set_entry.memlimit_inactive <= 0) { |
7797 | /* | |
7798 | * Enforce the fatal system_wide task limit while process is inactive. | |
7799 | */ | |
7800 | set_entry.memlimit_inactive = -1; | |
7801 | set_entry.memlimit_inactive_attr = MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
d9a64523 | 7802 | } |
cb323159 A |
7803 | #if CONFIG_JETSAM |
7804 | #if DEVELOPMENT || DEBUG | |
7805 | else { | |
7806 | /* add the current increase to it, for roots */ | |
7807 | set_entry.memlimit_inactive += roundToNearestMB(p->p_memlimit_increase); | |
7808 | } | |
7809 | #endif /* DEVELOPMENT || DEBUG */ | |
7810 | #endif /* CONFIG_JETSAM */ | |
7811 | ||
7812 | proc_list_lock(); | |
7813 | ||
7814 | int error = memorystatus_set_memlimit_properties_internal(p, &set_entry); | |
7815 | ||
d9a64523 | 7816 | proc_list_unlock(); |
cb323159 A |
7817 | proc_rele(p); |
7818 | ||
7819 | return error; | |
7820 | } | |
d9a64523 | 7821 | |
cb323159 A |
7822 | /* |
7823 | * Returns the jetsam priority (effective or requested) of the process | |
7824 | * associated with this task. | |
7825 | */ | |
7826 | int | |
7827 | proc_get_memstat_priority(proc_t p, boolean_t effective_priority) | |
7828 | { | |
7829 | if (p) { | |
7830 | if (effective_priority) { | |
7831 | return p->p_memstat_effectivepriority; | |
7832 | } else { | |
7833 | return p->p_memstat_requestedpriority; | |
7834 | } | |
7835 | } | |
d9a64523 A |
7836 | return 0; |
7837 | } | |
7838 | ||
7839 | static int | |
cb323159 | 7840 | memorystatus_get_process_is_managed(pid_t pid, int *is_managed) |
d9a64523 | 7841 | { |
cb323159 | 7842 | proc_t p = NULL; |
d9a64523 | 7843 | |
cb323159 | 7844 | /* Validate inputs */ |
d9a64523 A |
7845 | if (pid == 0) { |
7846 | return EINVAL; | |
7847 | } | |
7848 | ||
7849 | p = proc_find(pid); | |
7850 | if (!p) { | |
7851 | return ESRCH; | |
7852 | } | |
7853 | ||
d9a64523 | 7854 | proc_list_lock(); |
cb323159 | 7855 | *is_managed = ((p->p_memstat_state & P_MEMSTAT_MANAGED) ? 1 : 0); |
d9a64523 A |
7856 | proc_rele_locked(p); |
7857 | proc_list_unlock(); | |
7858 | ||
7859 | return 0; | |
7860 | } | |
7861 | ||
7862 | static int | |
cb323159 | 7863 | memorystatus_set_process_is_managed(pid_t pid, boolean_t set_managed) |
d9a64523 | 7864 | { |
cb323159 | 7865 | proc_t p = NULL; |
d9a64523 | 7866 | |
cb323159 | 7867 | /* Validate inputs */ |
d9a64523 A |
7868 | if (pid == 0) { |
7869 | return EINVAL; | |
7870 | } | |
7871 | ||
7872 | p = proc_find(pid); | |
7873 | if (!p) { | |
7874 | return ESRCH; | |
7875 | } | |
7876 | ||
d9a64523 | 7877 | proc_list_lock(); |
cb323159 A |
7878 | if (set_managed == TRUE) { |
7879 | p->p_memstat_state |= P_MEMSTAT_MANAGED; | |
7880 | /* | |
7881 | * The P_MEMSTAT_MANAGED bit is set by assertiond for Apps. | |
7882 | * Also opt them in to being frozen (they might have started | |
7883 | * off with the P_MEMSTAT_FREEZE_DISABLED bit set.) | |
7884 | */ | |
d9a64523 | 7885 | p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED; |
cb323159 A |
7886 | } else { |
7887 | p->p_memstat_state &= ~P_MEMSTAT_MANAGED; | |
d9a64523 A |
7888 | } |
7889 | proc_rele_locked(p); | |
7890 | proc_list_unlock(); | |
7891 | ||
7892 | return 0; | |
7893 | } | |
7894 | ||
39236c6e | 7895 | int |
0a7de745 A |
7896 | memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *args, int *ret) |
7897 | { | |
39236c6e | 7898 | int error = EINVAL; |
d9a64523 | 7899 | boolean_t skip_auth_check = FALSE; |
39037602 | 7900 | os_reason_t jetsam_reason = OS_REASON_NULL; |
39236c6e A |
7901 | |
7902 | #if !CONFIG_JETSAM | |
cb323159 A |
7903 | #pragma unused(ret) |
7904 | #pragma unused(jetsam_reason) | |
39236c6e A |
7905 | #endif |
7906 | ||
2a1bd2d3 A |
7907 | /* We don't need entitlements if we're setting / querying the freeze preference or frozen status for a process. */ |
7908 | if (args->command == MEMORYSTATUS_CMD_SET_PROCESS_IS_FREEZABLE || | |
7909 | args->command == MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE || | |
7910 | args->command == MEMORYSTATUS_CMD_GET_PROCESS_IS_FROZEN) { | |
d9a64523 A |
7911 | skip_auth_check = TRUE; |
7912 | } | |
7913 | ||
7914 | /* Need to be root or have entitlement. */ | |
7915 | if (!kauth_cred_issuser(kauth_cred_get()) && !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT) && !skip_auth_check) { | |
39236c6e A |
7916 | error = EPERM; |
7917 | goto out; | |
b0d623f7 | 7918 | } |
39037602 A |
7919 | |
7920 | /* | |
7921 | * Sanity check. | |
7922 | * Do not enforce it for snapshots. | |
7923 | */ | |
7924 | if (args->command != MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT) { | |
7925 | if (args->buffersize > MEMORYSTATUS_BUFFERSIZE_MAX) { | |
7926 | error = EINVAL; | |
7927 | goto out; | |
7928 | } | |
39236c6e A |
7929 | } |
7930 | ||
7931 | switch (args->command) { | |
7932 | case MEMORYSTATUS_CMD_GET_PRIORITY_LIST: | |
5ba3f43e | 7933 | error = memorystatus_cmd_get_priority_list(args->pid, args->buffer, args->buffersize, ret); |
39236c6e | 7934 | break; |
39236c6e | 7935 | case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES: |
cb323159 | 7936 | error = memorystatus_cmd_set_priority_properties(args->pid, args->flags, args->buffer, args->buffersize, ret); |
39236c6e | 7937 | break; |
3e170ce0 A |
7938 | case MEMORYSTATUS_CMD_SET_MEMLIMIT_PROPERTIES: |
7939 | error = memorystatus_cmd_set_memlimit_properties(args->pid, args->buffer, args->buffersize, ret); | |
7940 | break; | |
7941 | case MEMORYSTATUS_CMD_GET_MEMLIMIT_PROPERTIES: | |
7942 | error = memorystatus_cmd_get_memlimit_properties(args->pid, args->buffer, args->buffersize, ret); | |
7943 | break; | |
39037602 A |
7944 | case MEMORYSTATUS_CMD_GET_MEMLIMIT_EXCESS: |
7945 | error = memorystatus_cmd_get_memlimit_excess_np(args->pid, args->flags, args->buffer, args->buffersize, ret); | |
7946 | break; | |
fe8ab488 A |
7947 | case MEMORYSTATUS_CMD_GRP_SET_PROPERTIES: |
7948 | error = memorystatus_cmd_grp_set_properties((int32_t)args->flags, args->buffer, args->buffersize, ret); | |
0a7de745 | 7949 | break; |
39236c6e | 7950 | case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT: |
3e170ce0 | 7951 | error = memorystatus_cmd_get_jetsam_snapshot((int32_t)args->flags, args->buffer, args->buffersize, ret); |
39236c6e | 7952 | break; |
f427ee49 | 7953 | #if DEVELOPMENT || DEBUG |
c3c9b80d A |
7954 | case MEMORYSTATUS_CMD_SET_TESTING_PID: |
7955 | error = memorystatus_cmd_set_testing_pid((int32_t) args->flags); | |
f427ee49 A |
7956 | break; |
7957 | #endif | |
39236c6e A |
7958 | case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS: |
7959 | error = memorystatus_cmd_get_pressure_status(ret); | |
7960 | break; | |
5ba3f43e | 7961 | #if CONFIG_JETSAM |
39236c6e | 7962 | case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK: |
3e170ce0 A |
7963 | /* |
7964 | * This call does not distinguish between active and inactive limits. | |
7965 | * Default behavior in 2-level HWM world is to set both. | |
7966 | * Non-fatal limit is also assumed for both. | |
7967 | */ | |
fe8ab488 A |
7968 | error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, FALSE); |
7969 | break; | |
7970 | case MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT: | |
3e170ce0 A |
7971 | /* |
7972 | * This call does not distinguish between active and inactive limits. | |
7973 | * Default behavior in 2-level HWM world is to set both. | |
7974 | * Fatal limit is also assumed for both. | |
7975 | */ | |
fe8ab488 | 7976 | error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, TRUE); |
39236c6e | 7977 | break; |
5ba3f43e | 7978 | #endif /* CONFIG_JETSAM */ |
0a7de745 | 7979 | /* Test commands */ |
39236c6e A |
7980 | #if DEVELOPMENT || DEBUG |
7981 | case MEMORYSTATUS_CMD_TEST_JETSAM: | |
39037602 A |
7982 | jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_GENERIC); |
7983 | if (jetsam_reason == OS_REASON_NULL) { | |
7984 | printf("memorystatus_control: failed to allocate jetsam reason\n"); | |
7985 | } | |
7986 | ||
7987 | error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled, jetsam_reason) ? 0 : EINVAL; | |
39236c6e | 7988 | break; |
3e170ce0 | 7989 | case MEMORYSTATUS_CMD_TEST_JETSAM_SORT: |
f427ee49 | 7990 | error = memorystatus_cmd_test_jetsam_sort(args->pid, (int32_t)args->flags, args->buffer, args->buffersize); |
3e170ce0 | 7991 | break; |
5ba3f43e | 7992 | #if CONFIG_JETSAM |
39236c6e A |
7993 | case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS: |
7994 | error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize); | |
7995 | break; | |
5ba3f43e | 7996 | #endif /* CONFIG_JETSAM */ |
39037602 A |
7997 | #else /* DEVELOPMENT || DEBUG */ |
7998 | #pragma unused(jetsam_reason) | |
39236c6e | 7999 | #endif /* DEVELOPMENT || DEBUG */ |
490019cf A |
8000 | case MEMORYSTATUS_CMD_AGGRESSIVE_JETSAM_LENIENT_MODE_ENABLE: |
8001 | if (memorystatus_aggressive_jetsam_lenient_allowed == FALSE) { | |
8002 | #if DEVELOPMENT || DEBUG | |
8003 | printf("Enabling Lenient Mode\n"); | |
8004 | #endif /* DEVELOPMENT || DEBUG */ | |
8005 | ||
8006 | memorystatus_aggressive_jetsam_lenient_allowed = TRUE; | |
8007 | memorystatus_aggressive_jetsam_lenient = TRUE; | |
39037602 | 8008 | error = 0; |
490019cf A |
8009 | } |
8010 | break; | |
8011 | case MEMORYSTATUS_CMD_AGGRESSIVE_JETSAM_LENIENT_MODE_DISABLE: | |
8012 | #if DEVELOPMENT || DEBUG | |
8013 | printf("Disabling Lenient mode\n"); | |
8014 | #endif /* DEVELOPMENT || DEBUG */ | |
8015 | memorystatus_aggressive_jetsam_lenient_allowed = FALSE; | |
8016 | memorystatus_aggressive_jetsam_lenient = FALSE; | |
39037602 | 8017 | error = 0; |
490019cf | 8018 | break; |
cb323159 A |
8019 | case MEMORYSTATUS_CMD_GET_AGGRESSIVE_JETSAM_LENIENT_MODE: |
8020 | *ret = (memorystatus_aggressive_jetsam_lenient ? 1 : 0); | |
8021 | error = 0; | |
8022 | break; | |
3e170ce0 A |
8023 | case MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE: |
8024 | case MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE: | |
8025 | error = memorystatus_low_mem_privileged_listener(args->command); | |
8026 | break; | |
39037602 | 8027 | |
39037602 A |
8028 | case MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE: |
8029 | case MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_DISABLE: | |
d9a64523 A |
8030 | error = memorystatus_update_inactive_jetsam_priority_band(args->pid, args->command, JETSAM_PRIORITY_ELEVATED_INACTIVE, args->flags ? TRUE : FALSE); |
8031 | break; | |
8032 | case MEMORYSTATUS_CMD_SET_PROCESS_IS_MANAGED: | |
8033 | error = memorystatus_set_process_is_managed(args->pid, args->flags); | |
39037602 | 8034 | break; |
39037602 | 8035 | |
d9a64523 A |
8036 | case MEMORYSTATUS_CMD_GET_PROCESS_IS_MANAGED: |
8037 | error = memorystatus_get_process_is_managed(args->pid, ret); | |
8038 | break; | |
8039 | ||
cb323159 | 8040 | #if CONFIG_FREEZE |
d9a64523 A |
8041 | case MEMORYSTATUS_CMD_SET_PROCESS_IS_FREEZABLE: |
8042 | error = memorystatus_set_process_is_freezable(args->pid, args->flags ? TRUE : FALSE); | |
8043 | break; | |
8044 | ||
8045 | case MEMORYSTATUS_CMD_GET_PROCESS_IS_FREEZABLE: | |
8046 | error = memorystatus_get_process_is_freezable(args->pid, ret); | |
8047 | break; | |
2a1bd2d3 A |
8048 | case MEMORYSTATUS_CMD_GET_PROCESS_IS_FROZEN: |
8049 | error = memorystatus_get_process_is_frozen(args->pid, ret); | |
8050 | break; | |
d9a64523 | 8051 | |
d9a64523 A |
8052 | case MEMORYSTATUS_CMD_FREEZER_CONTROL: |
8053 | error = memorystatus_freezer_control(args->flags, args->buffer, args->buffersize, ret); | |
8054 | break; | |
d9a64523 A |
8055 | #endif /* CONFIG_FREEZE */ |
8056 | ||
cb323159 A |
8057 | #if CONFIG_JETSAM |
8058 | #if DEVELOPMENT || DEBUG | |
8059 | case MEMORYSTATUS_CMD_INCREASE_JETSAM_TASK_LIMIT: | |
8060 | error = memorystatus_cmd_increase_jetsam_task_limit(args->pid, args->flags); | |
8061 | break; | |
f427ee49 | 8062 | #endif /* DEVELOPMENT || DEBUG */ |
cb323159 A |
8063 | #endif /* CONFIG_JETSAM */ |
8064 | ||
39236c6e A |
8065 | default: |
8066 | break; | |
8067 | } | |
8068 | ||
8069 | out: | |
8070 | return error; | |
8071 | } | |
8072 | ||
3e170ce0 A |
8073 | /* Coalition support */ |
8074 | ||
8075 | /* sorting info for a particular priority bucket */ | |
8076 | typedef struct memstat_sort_info { | |
0a7de745 A |
8077 | coalition_t msi_coal; |
8078 | uint64_t msi_page_count; | |
8079 | pid_t msi_pid; | |
8080 | int msi_ntasks; | |
3e170ce0 A |
8081 | } memstat_sort_info_t; |
8082 | ||
0a7de745 | 8083 | /* |
3e170ce0 A |
8084 | * qsort from smallest page count to largest page count |
8085 | * | |
8086 | * return < 0 for a < b | |
8087 | * 0 for a == b | |
8088 | * > 0 for a > b | |
8089 | */ | |
0a7de745 A |
8090 | static int |
8091 | memstat_asc_cmp(const void *a, const void *b) | |
3e170ce0 | 8092 | { |
0a7de745 A |
8093 | const memstat_sort_info_t *msA = (const memstat_sort_info_t *)a; |
8094 | const memstat_sort_info_t *msB = (const memstat_sort_info_t *)b; | |
3e170ce0 | 8095 | |
0a7de745 | 8096 | return (int)((uint64_t)msA->msi_page_count - (uint64_t)msB->msi_page_count); |
3e170ce0 A |
8097 | } |
8098 | ||
8099 | /* | |
8100 | * Return the number of pids rearranged during this sort. | |
8101 | */ | |
8102 | static int | |
8103 | memorystatus_sort_by_largest_coalition_locked(unsigned int bucket_index, int coal_sort_order) | |
8104 | { | |
0a7de745 A |
8105 | #define MAX_SORT_PIDS 80 |
8106 | #define MAX_COAL_LEADERS 10 | |
3e170ce0 A |
8107 | |
8108 | unsigned int b = bucket_index; | |
8109 | int nleaders = 0; | |
8110 | int ntasks = 0; | |
8111 | proc_t p = NULL; | |
8112 | coalition_t coal = COALITION_NULL; | |
8113 | int pids_moved = 0; | |
8114 | int total_pids_moved = 0; | |
8115 | int i; | |
8116 | ||
0a7de745 | 8117 | /* |
3e170ce0 A |
8118 | * The system is typically under memory pressure when in this |
8119 | * path, hence, we want to avoid dynamic memory allocation. | |
8120 | */ | |
8121 | memstat_sort_info_t leaders[MAX_COAL_LEADERS]; | |
8122 | pid_t pid_list[MAX_SORT_PIDS]; | |
8123 | ||
8124 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
0a7de745 A |
8125 | return 0; |
8126 | } | |
3e170ce0 A |
8127 | |
8128 | /* | |
8129 | * Clear the array that holds coalition leader information | |
8130 | */ | |
0a7de745 | 8131 | for (i = 0; i < MAX_COAL_LEADERS; i++) { |
3e170ce0 | 8132 | leaders[i].msi_coal = COALITION_NULL; |
0a7de745 A |
8133 | leaders[i].msi_page_count = 0; /* will hold total coalition page count */ |
8134 | leaders[i].msi_pid = 0; /* will hold coalition leader pid */ | |
8135 | leaders[i].msi_ntasks = 0; /* will hold the number of tasks in a coalition */ | |
3e170ce0 A |
8136 | } |
8137 | ||
0a7de745 A |
8138 | p = memorystatus_get_first_proc_locked(&b, FALSE); |
8139 | while (p) { | |
cb323159 A |
8140 | coal = task_get_coalition(p->task, COALITION_TYPE_JETSAM); |
8141 | if (coalition_is_leader(p->task, coal)) { | |
3e170ce0 A |
8142 | if (nleaders < MAX_COAL_LEADERS) { |
8143 | int coal_ntasks = 0; | |
8144 | uint64_t coal_page_count = coalition_get_page_count(coal, &coal_ntasks); | |
8145 | leaders[nleaders].msi_coal = coal; | |
8146 | leaders[nleaders].msi_page_count = coal_page_count; | |
0a7de745 | 8147 | leaders[nleaders].msi_pid = p->p_pid; /* the coalition leader */ |
3e170ce0 A |
8148 | leaders[nleaders].msi_ntasks = coal_ntasks; |
8149 | nleaders++; | |
8150 | } else { | |
0a7de745 | 8151 | /* |
3e170ce0 | 8152 | * We've hit MAX_COAL_LEADERS meaning we can handle no more coalitions. |
0a7de745 | 8153 | * Abandoned coalitions will linger at the tail of the priority band |
3e170ce0 A |
8154 | * when this sort session ends. |
8155 | * TODO: should this be an assert? | |
8156 | */ | |
8157 | printf("%s: WARNING: more than %d leaders in priority band [%d]\n", | |
0a7de745 | 8158 | __FUNCTION__, MAX_COAL_LEADERS, bucket_index); |
3e170ce0 A |
8159 | break; |
8160 | } | |
0a7de745 A |
8161 | } |
8162 | p = memorystatus_get_next_proc_locked(&b, p, FALSE); | |
8163 | } | |
3e170ce0 A |
8164 | |
8165 | if (nleaders == 0) { | |
8166 | /* Nothing to sort */ | |
0a7de745 | 8167 | return 0; |
3e170ce0 A |
8168 | } |
8169 | ||
0a7de745 | 8170 | /* |
3e170ce0 A |
8171 | * Sort the coalition leader array, from smallest coalition page count |
8172 | * to largest coalition page count. When inserted in the priority bucket, | |
8173 | * smallest coalition is handled first, resulting in the last to be jetsammed. | |
8174 | */ | |
8175 | if (nleaders > 1) { | |
8176 | qsort(leaders, nleaders, sizeof(memstat_sort_info_t), memstat_asc_cmp); | |
8177 | } | |
8178 | ||
8179 | #if 0 | |
8180 | for (i = 0; i < nleaders; i++) { | |
8181 | printf("%s: coal_leader[%d of %d] pid[%d] pages[%llu] ntasks[%d]\n", | |
0a7de745 A |
8182 | __FUNCTION__, i, nleaders, leaders[i].msi_pid, leaders[i].msi_page_count, |
8183 | leaders[i].msi_ntasks); | |
3e170ce0 A |
8184 | } |
8185 | #endif | |
8186 | ||
8187 | /* | |
8188 | * During coalition sorting, processes in a priority band are rearranged | |
8189 | * by being re-inserted at the head of the queue. So, when handling a | |
8190 | * list, the first process that gets moved to the head of the queue, | |
8191 | * ultimately gets pushed toward the queue tail, and hence, jetsams last. | |
8192 | * | |
8193 | * So, for example, the coalition leader is expected to jetsam last, | |
8194 | * after its coalition members. Therefore, the coalition leader is | |
8195 | * inserted at the head of the queue first. | |
8196 | * | |
8197 | * After processing a coalition, the jetsam order is as follows: | |
8198 | * undefs(jetsam first), extensions, xpc services, leader(jetsam last) | |
8199 | */ | |
8200 | ||
8201 | /* | |
8202 | * Coalition members are rearranged in the priority bucket here, | |
8203 | * based on their coalition role. | |
8204 | */ | |
8205 | total_pids_moved = 0; | |
0a7de745 | 8206 | for (i = 0; i < nleaders; i++) { |
3e170ce0 A |
8207 | /* a bit of bookkeeping */ |
8208 | pids_moved = 0; | |
8209 | ||
8210 | /* Coalition leaders are jetsammed last, so move into place first */ | |
8211 | pid_list[0] = leaders[i].msi_pid; | |
8212 | pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, 1); | |
8213 | ||
8214 | /* xpc services should jetsam after extensions */ | |
0a7de745 A |
8215 | ntasks = coalition_get_pid_list(leaders[i].msi_coal, COALITION_ROLEMASK_XPC, |
8216 | coal_sort_order, pid_list, MAX_SORT_PIDS); | |
3e170ce0 A |
8217 | |
8218 | if (ntasks > 0) { | |
0a7de745 A |
8219 | pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, |
8220 | (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS)); | |
3e170ce0 A |
8221 | } |
8222 | ||
8223 | /* extensions should jetsam after unmarked processes */ | |
0a7de745 A |
8224 | ntasks = coalition_get_pid_list(leaders[i].msi_coal, COALITION_ROLEMASK_EXT, |
8225 | coal_sort_order, pid_list, MAX_SORT_PIDS); | |
3e170ce0 A |
8226 | |
8227 | if (ntasks > 0) { | |
8228 | pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, | |
0a7de745 | 8229 | (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS)); |
3e170ce0 A |
8230 | } |
8231 | ||
8232 | /* undefined coalition members should be the first to jetsam */ | |
0a7de745 A |
8233 | ntasks = coalition_get_pid_list(leaders[i].msi_coal, COALITION_ROLEMASK_UNDEF, |
8234 | coal_sort_order, pid_list, MAX_SORT_PIDS); | |
3e170ce0 A |
8235 | |
8236 | if (ntasks > 0) { | |
0a7de745 A |
8237 | pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, |
8238 | (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS)); | |
3e170ce0 A |
8239 | } |
8240 | ||
8241 | #if 0 | |
8242 | if (pids_moved == leaders[i].msi_ntasks) { | |
8243 | /* | |
8244 | * All the pids in the coalition were found in this band. | |
8245 | */ | |
8246 | printf("%s: pids_moved[%d] equal total coalition ntasks[%d] \n", __FUNCTION__, | |
0a7de745 | 8247 | pids_moved, leaders[i].msi_ntasks); |
3e170ce0 A |
8248 | } else if (pids_moved > leaders[i].msi_ntasks) { |
8249 | /* | |
8250 | * Apparently new coalition members showed up during the sort? | |
8251 | */ | |
8252 | printf("%s: pids_moved[%d] were greater than expected coalition ntasks[%d] \n", __FUNCTION__, | |
0a7de745 | 8253 | pids_moved, leaders[i].msi_ntasks); |
3e170ce0 A |
8254 | } else { |
8255 | /* | |
8256 | * Apparently not all the pids in the coalition were found in this band? | |
8257 | */ | |
8258 | printf("%s: pids_moved[%d] were less than expected coalition ntasks[%d] \n", __FUNCTION__, | |
0a7de745 | 8259 | pids_moved, leaders[i].msi_ntasks); |
3e170ce0 A |
8260 | } |
8261 | #endif | |
8262 | ||
8263 | total_pids_moved += pids_moved; | |
3e170ce0 A |
8264 | } /* end for */ |
8265 | ||
0a7de745 | 8266 | return total_pids_moved; |
3e170ce0 A |
8267 | } |
8268 | ||
8269 | ||
8270 | /* | |
8271 | * Traverse a list of pids, searching for each within the priority band provided. | |
8272 | * If pid is found, move it to the front of the priority band. | |
8273 | * Never searches outside the priority band provided. | |
0a7de745 | 8274 | * |
3e170ce0 A |
8275 | * Input: |
8276 | * bucket_index - jetsam priority band. | |
8277 | * pid_list - pointer to a list of pids. | |
8278 | * list_sz - number of pids in the list. | |
8279 | * | |
0a7de745 | 8280 | * Pid list ordering is important in that, |
3e170ce0 A |
8281 | * pid_list[n] is expected to jetsam ahead of pid_list[n+1]. |
8282 | * The sort_order is set by the coalition default. | |
8283 | * | |
0a7de745 | 8284 | * Return: |
3e170ce0 A |
8285 | * the number of pids found and hence moved within the priority band. |
8286 | */ | |
8287 | static int | |
8288 | memorystatus_move_list_locked(unsigned int bucket_index, pid_t *pid_list, int list_sz) | |
8289 | { | |
8290 | memstat_bucket_t *current_bucket; | |
8291 | int i; | |
8292 | int found_pids = 0; | |
8293 | ||
8294 | if ((pid_list == NULL) || (list_sz <= 0)) { | |
0a7de745 | 8295 | return 0; |
3e170ce0 A |
8296 | } |
8297 | ||
8298 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
0a7de745 A |
8299 | return 0; |
8300 | } | |
3e170ce0 A |
8301 | |
8302 | current_bucket = &memstat_bucket[bucket_index]; | |
0a7de745 | 8303 | for (i = 0; i < list_sz; i++) { |
3e170ce0 A |
8304 | unsigned int b = bucket_index; |
8305 | proc_t p = NULL; | |
8306 | proc_t aProc = NULL; | |
8307 | pid_t aPid; | |
8308 | int list_index; | |
8309 | ||
8310 | list_index = ((list_sz - 1) - i); | |
0a7de745 A |
8311 | aPid = pid_list[list_index]; |
8312 | ||
8313 | /* never search beyond bucket_index provided */ | |
8314 | p = memorystatus_get_first_proc_locked(&b, FALSE); | |
8315 | while (p) { | |
8316 | if (p->p_pid == aPid) { | |
8317 | aProc = p; | |
8318 | break; | |
8319 | } | |
8320 | p = memorystatus_get_next_proc_locked(&b, p, FALSE); | |
8321 | } | |
8322 | ||
8323 | if (aProc == NULL) { | |
3e170ce0 | 8324 | /* pid not found in this band, just skip it */ |
0a7de745 A |
8325 | continue; |
8326 | } else { | |
8327 | TAILQ_REMOVE(¤t_bucket->list, aProc, p_memstat_list); | |
8328 | TAILQ_INSERT_HEAD(¤t_bucket->list, aProc, p_memstat_list); | |
3e170ce0 | 8329 | found_pids++; |
0a7de745 A |
8330 | } |
8331 | } | |
8332 | return found_pids; | |
3e170ce0 | 8333 | } |
5ba3f43e A |
8334 | |
8335 | int | |
8336 | memorystatus_get_proccnt_upto_priority(int32_t max_bucket_index) | |
8337 | { | |
0a7de745 | 8338 | int32_t i = JETSAM_PRIORITY_IDLE; |
5ba3f43e A |
8339 | int count = 0; |
8340 | ||
8341 | if (max_bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
0a7de745 A |
8342 | return -1; |
8343 | } | |
5ba3f43e | 8344 | |
0a7de745 | 8345 | while (i <= max_bucket_index) { |
5ba3f43e A |
8346 | count += memstat_bucket[i++].count; |
8347 | } | |
8348 | ||
8349 | return count; | |
8350 | } | |
8351 | ||
8352 | int | |
8353 | memorystatus_update_priority_for_appnap(proc_t p, boolean_t is_appnap) | |
8354 | { | |
8355 | #if !CONFIG_JETSAM | |
d9a64523 | 8356 | if (!p || (!isApp(p)) || (p->p_memstat_state & (P_MEMSTAT_INTERNAL | P_MEMSTAT_MANAGED))) { |
5ba3f43e A |
8357 | /* |
8358 | * Ineligible processes OR system processes e.g. launchd. | |
d9a64523 A |
8359 | * |
8360 | * We also skip processes that have the P_MEMSTAT_MANAGED bit set, i.e. | |
8361 | * they're managed by assertiond. These are iOS apps that have been ported | |
8362 | * to macOS. assertiond might be in the process of modifying the app's | |
8363 | * priority / memory limit - so it might have the proc_list lock, and then try | |
8364 | * to take the task lock. Meanwhile we've entered this function with the task lock | |
8365 | * held, and we need the proc_list lock below. So we'll deadlock with assertiond. | |
8366 | * | |
8367 | * It should be fine to read the P_MEMSTAT_MANAGED bit without the proc_list | |
8368 | * lock here, since assertiond only sets this bit on process launch. | |
5ba3f43e A |
8369 | */ |
8370 | return -1; | |
8371 | } | |
8372 | ||
8373 | /* | |
8374 | * For macOS only: | |
8375 | * We would like to use memorystatus_update() here to move the processes | |
8376 | * within the bands. Unfortunately memorystatus_update() calls | |
8377 | * memorystatus_update_priority_locked() which uses any band transitions | |
8378 | * as an indication to modify ledgers. For that it needs the task lock | |
8379 | * and since we came into this function with the task lock held, we'll deadlock. | |
8380 | * | |
0a7de745 | 8381 | * Unfortunately we can't completely disable ledger updates because we still |
5ba3f43e A |
8382 | * need the ledger updates for a subset of processes i.e. daemons. |
8383 | * When all processes on all platforms support memory limits, we can simply call | |
8384 | * memorystatus_update(). | |
0a7de745 | 8385 | * |
5ba3f43e A |
8386 | * It also has some logic to deal with 'aging' which, currently, is only applicable |
8387 | * on CONFIG_JETSAM configs. So, till every platform has CONFIG_JETSAM we'll need | |
8388 | * to do this explicit band transition. | |
8389 | */ | |
8390 | ||
8391 | memstat_bucket_t *current_bucket, *new_bucket; | |
0a7de745 | 8392 | int32_t priority = 0; |
5ba3f43e A |
8393 | |
8394 | proc_list_lock(); | |
8395 | ||
8396 | if (((p->p_listflag & P_LIST_EXITED) != 0) || | |
8397 | (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED))) { | |
8398 | /* | |
8399 | * If the process is on its way out OR | |
8400 | * jetsam has alread tried and failed to kill this process, | |
8401 | * let's skip the whole jetsam band transition. | |
8402 | */ | |
8403 | proc_list_unlock(); | |
0a7de745 | 8404 | return 0; |
5ba3f43e A |
8405 | } |
8406 | ||
8407 | if (is_appnap) { | |
8408 | current_bucket = &memstat_bucket[p->p_memstat_effectivepriority]; | |
8409 | new_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; | |
8410 | priority = JETSAM_PRIORITY_IDLE; | |
8411 | } else { | |
8412 | if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) { | |
8413 | /* | |
8414 | * It is possible that someone pulled this process | |
8415 | * out of the IDLE band without updating its app-nap | |
8416 | * parameters. | |
8417 | */ | |
8418 | proc_list_unlock(); | |
0a7de745 | 8419 | return 0; |
5ba3f43e A |
8420 | } |
8421 | ||
8422 | current_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; | |
8423 | new_bucket = &memstat_bucket[p->p_memstat_requestedpriority]; | |
8424 | priority = p->p_memstat_requestedpriority; | |
8425 | } | |
8426 | ||
8427 | TAILQ_REMOVE(¤t_bucket->list, p, p_memstat_list); | |
8428 | current_bucket->count--; | |
cb323159 A |
8429 | if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) { |
8430 | current_bucket->relaunch_high_count--; | |
8431 | } | |
5ba3f43e A |
8432 | TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list); |
8433 | new_bucket->count++; | |
cb323159 A |
8434 | if (p->p_memstat_relaunch_flags & (P_MEMSTAT_RELAUNCH_HIGH)) { |
8435 | new_bucket->relaunch_high_count++; | |
8436 | } | |
5ba3f43e A |
8437 | /* |
8438 | * Record idle start or idle delta. | |
8439 | */ | |
8440 | if (p->p_memstat_effectivepriority == priority) { | |
0a7de745 | 8441 | /* |
5ba3f43e A |
8442 | * This process is not transitioning between |
8443 | * jetsam priority buckets. Do nothing. | |
8444 | */ | |
8445 | } else if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) { | |
8446 | uint64_t now; | |
8447 | /* | |
8448 | * Transitioning out of the idle priority bucket. | |
8449 | * Record idle delta. | |
8450 | */ | |
8451 | assert(p->p_memstat_idle_start != 0); | |
8452 | now = mach_absolute_time(); | |
8453 | if (now > p->p_memstat_idle_start) { | |
8454 | p->p_memstat_idle_delta = now - p->p_memstat_idle_start; | |
8455 | } | |
8456 | } else if (priority == JETSAM_PRIORITY_IDLE) { | |
8457 | /* | |
8458 | * Transitioning into the idle priority bucket. | |
8459 | * Record idle start. | |
8460 | */ | |
8461 | p->p_memstat_idle_start = mach_absolute_time(); | |
8462 | } | |
8463 | ||
d9a64523 A |
8464 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CHANGE_PRIORITY), p->p_pid, priority, p->p_memstat_effectivepriority, 0, 0); |
8465 | ||
5ba3f43e A |
8466 | p->p_memstat_effectivepriority = priority; |
8467 | ||
8468 | proc_list_unlock(); | |
8469 | ||
0a7de745 | 8470 | return 0; |
5ba3f43e A |
8471 | |
8472 | #else /* !CONFIG_JETSAM */ | |
8473 | #pragma unused(p) | |
8474 | #pragma unused(is_appnap) | |
8475 | return -1; | |
8476 | #endif /* !CONFIG_JETSAM */ | |
8477 | } | |
cb323159 A |
8478 | |
8479 | uint64_t | |
f427ee49 | 8480 | memorystatus_available_memory_internal(struct proc *p) |
cb323159 A |
8481 | { |
8482 | #ifdef XNU_TARGET_OS_OSX | |
f427ee49 A |
8483 | if (p->p_memstat_memlimit <= 0) { |
8484 | return 0; | |
8485 | } | |
8486 | #endif /* XNU_TARGET_OS_OSX */ | |
cb323159 A |
8487 | const uint64_t footprint_in_bytes = get_task_phys_footprint(p->task); |
8488 | int32_t memlimit_mb; | |
8489 | int64_t memlimit_bytes; | |
8490 | int64_t rc; | |
8491 | ||
8492 | if (isApp(p) == FALSE) { | |
8493 | return 0; | |
8494 | } | |
8495 | ||
8496 | if (p->p_memstat_memlimit > 0) { | |
8497 | memlimit_mb = p->p_memstat_memlimit; | |
8498 | } else if (task_convert_phys_footprint_limit(-1, &memlimit_mb) != KERN_SUCCESS) { | |
8499 | return 0; | |
8500 | } | |
8501 | ||
8502 | if (memlimit_mb <= 0) { | |
8503 | memlimit_bytes = INT_MAX & ~((1 << 20) - 1); | |
8504 | } else { | |
8505 | memlimit_bytes = ((int64_t) memlimit_mb) << 20; | |
8506 | } | |
8507 | ||
8508 | rc = memlimit_bytes - footprint_in_bytes; | |
8509 | ||
8510 | return (rc >= 0) ? rc : 0; | |
cb323159 A |
8511 | } |
8512 | ||
8513 | int | |
8514 | memorystatus_available_memory(struct proc *p, __unused struct memorystatus_available_memory_args *args, uint64_t *ret) | |
8515 | { | |
8516 | *ret = memorystatus_available_memory_internal(p); | |
8517 | ||
8518 | return 0; | |
8519 | } | |
8520 | ||
8521 | #if CONFIG_JETSAM | |
8522 | #if DEVELOPMENT || DEBUG | |
8523 | static int | |
8524 | memorystatus_cmd_increase_jetsam_task_limit(pid_t pid, uint32_t byte_increase) | |
8525 | { | |
8526 | memorystatus_memlimit_properties_t mmp_entry; | |
8527 | ||
8528 | /* Validate inputs */ | |
8529 | if ((pid == 0) || (byte_increase == 0)) { | |
8530 | return EINVAL; | |
8531 | } | |
8532 | ||
8533 | proc_t p = proc_find(pid); | |
8534 | ||
8535 | if (!p) { | |
8536 | return ESRCH; | |
8537 | } | |
8538 | ||
8539 | const uint32_t current_memlimit_increase = roundToNearestMB(p->p_memlimit_increase); | |
f427ee49 A |
8540 | /* round to page */ |
8541 | const int32_t page_aligned_increase = (int32_t) MIN(round_page(p->p_memlimit_increase + byte_increase), INT32_MAX); | |
cb323159 A |
8542 | |
8543 | proc_list_lock(); | |
8544 | ||
8545 | memorystatus_get_memlimit_properties_internal(p, &mmp_entry); | |
8546 | ||
8547 | if (mmp_entry.memlimit_active > 0) { | |
8548 | mmp_entry.memlimit_active -= current_memlimit_increase; | |
8549 | mmp_entry.memlimit_active += roundToNearestMB(page_aligned_increase); | |
8550 | } | |
8551 | ||
8552 | if (mmp_entry.memlimit_inactive > 0) { | |
8553 | mmp_entry.memlimit_inactive -= current_memlimit_increase; | |
8554 | mmp_entry.memlimit_inactive += roundToNearestMB(page_aligned_increase); | |
8555 | } | |
8556 | ||
8557 | /* | |
8558 | * Store the updated delta limit in the proc. | |
8559 | */ | |
8560 | p->p_memlimit_increase = page_aligned_increase; | |
8561 | ||
8562 | int error = memorystatus_set_memlimit_properties_internal(p, &mmp_entry); | |
8563 | ||
8564 | proc_list_unlock(); | |
8565 | proc_rele(p); | |
8566 | ||
8567 | return error; | |
8568 | } | |
8569 | #endif /* DEVELOPMENT */ | |
8570 | #endif /* CONFIG_JETSAM */ |