]>
Commit | Line | Data |
---|---|---|
2d21ac55 A |
1 | /* |
2 | * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | * | |
28 | */ | |
2d21ac55 | 29 | |
2d21ac55 | 30 | #include <kern/sched_prim.h> |
6d2010ae | 31 | #include <kern/kalloc.h> |
316670eb | 32 | #include <kern/assert.h> |
6d2010ae | 33 | #include <kern/debug.h> |
fe8ab488 | 34 | #include <kern/locks.h> |
2d21ac55 A |
35 | #include <kern/task.h> |
36 | #include <kern/thread.h> | |
316670eb | 37 | #include <kern/host.h> |
2d21ac55 | 38 | #include <libkern/libkern.h> |
3e170ce0 | 39 | #include <mach/coalition.h> |
316670eb | 40 | #include <mach/mach_time.h> |
b0d623f7 | 41 | #include <mach/task.h> |
316670eb | 42 | #include <mach/host_priv.h> |
39236c6e A |
43 | #include <mach/mach_host.h> |
44 | #include <pexpert/pexpert.h> | |
3e170ce0 | 45 | #include <sys/coalition.h> |
316670eb | 46 | #include <sys/kern_event.h> |
b0d623f7 | 47 | #include <sys/proc.h> |
39236c6e | 48 | #include <sys/proc_info.h> |
b0d623f7 A |
49 | #include <sys/signal.h> |
50 | #include <sys/signalvar.h> | |
2d21ac55 | 51 | #include <sys/sysctl.h> |
316670eb | 52 | #include <sys/sysproto.h> |
b0d623f7 | 53 | #include <sys/wait.h> |
6d2010ae | 54 | #include <sys/tree.h> |
316670eb | 55 | #include <sys/priv.h> |
39236c6e A |
56 | #include <vm/vm_pageout.h> |
57 | #include <vm/vm_protos.h> | |
6d2010ae A |
58 | |
59 | #if CONFIG_FREEZE | |
6d2010ae | 60 | #include <vm/vm_map.h> |
39236c6e | 61 | #endif /* CONFIG_FREEZE */ |
6d2010ae | 62 | |
316670eb | 63 | #include <sys/kern_memorystatus.h> |
6d2010ae | 64 | |
fe8ab488 A |
65 | #if CONFIG_JETSAM |
66 | /* For logging clarity */ | |
67 | static const char *jetsam_kill_cause_name[] = { | |
68 | "" , | |
69 | "jettisoned" , /* kMemorystatusKilled */ | |
70 | "highwater" , /* kMemorystatusKilledHiwat */ | |
71 | "vnode-limit" , /* kMemorystatusKilledVnodes */ | |
72 | "vm-pageshortage" , /* kMemorystatusKilledVMPageShortage */ | |
73 | "vm-thrashing" , /* kMemorystatusKilledVMThrashing */ | |
74 | "fc-thrashing" , /* kMemorystatusKilledFCThrashing */ | |
75 | "per-process-limit" , /* kMemorystatusKilledPerProcessLimit */ | |
76 | "diagnostic" , /* kMemorystatusKilledDiagnostic */ | |
77 | "idle-exit" , /* kMemorystatusKilledIdleExit */ | |
78 | }; | |
79 | ||
80 | /* Does cause indicate vm or fc thrashing? */ | |
81 | static boolean_t | |
82 | is_thrashing(unsigned cause) | |
83 | { | |
84 | switch (cause) { | |
85 | case kMemorystatusKilledVMThrashing: | |
86 | case kMemorystatusKilledFCThrashing: | |
87 | return TRUE; | |
88 | default: | |
89 | return FALSE; | |
90 | } | |
91 | } | |
92 | ||
93 | /* Callback into vm_compressor.c to signal that thrashing has been mitigated. */ | |
94 | extern void vm_thrashing_jetsam_done(void); | |
95 | #endif | |
96 | ||
316670eb A |
97 | /* These are very verbose printfs(), enable with |
98 | * MEMORYSTATUS_DEBUG_LOG | |
99 | */ | |
100 | #if MEMORYSTATUS_DEBUG_LOG | |
101 | #define MEMORYSTATUS_DEBUG(cond, format, ...) \ | |
102 | do { \ | |
103 | if (cond) { printf(format, ##__VA_ARGS__); } \ | |
104 | } while(0) | |
105 | #else | |
106 | #define MEMORYSTATUS_DEBUG(cond, format, ...) | |
107 | #endif | |
6d2010ae | 108 | |
3e170ce0 A |
109 | /* |
110 | * Active / Inactive limit support | |
111 | * proc list must be locked | |
112 | * | |
113 | * The SET_*** macros are used to initialize a limit | |
114 | * for the first time. | |
115 | * | |
116 | * The CACHE_*** macros are use to cache the limit that will | |
117 | * soon be in effect down in the ledgers. | |
118 | */ | |
119 | ||
120 | #define SET_ACTIVE_LIMITS_LOCKED(p, limit, is_fatal) \ | |
121 | MACRO_BEGIN \ | |
122 | (p)->p_memstat_memlimit_active = (limit); \ | |
123 | (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED; \ | |
124 | if (is_fatal) { \ | |
125 | (p)->p_memstat_state |= P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL; \ | |
126 | } else { \ | |
127 | (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL; \ | |
128 | } \ | |
129 | MACRO_END | |
130 | ||
131 | #define SET_INACTIVE_LIMITS_LOCKED(p, limit, is_fatal) \ | |
132 | MACRO_BEGIN \ | |
133 | (p)->p_memstat_memlimit_inactive = (limit); \ | |
134 | (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED; \ | |
135 | if (is_fatal) { \ | |
136 | (p)->p_memstat_state |= P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL; \ | |
137 | } else { \ | |
138 | (p)->p_memstat_state &= ~P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL; \ | |
139 | } \ | |
140 | MACRO_END | |
141 | ||
142 | #define CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception) \ | |
143 | MACRO_BEGIN \ | |
144 | (p)->p_memstat_memlimit = (p)->p_memstat_memlimit_active; \ | |
145 | if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL) { \ | |
146 | (p)->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; \ | |
147 | } else { \ | |
148 | (p)->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; \ | |
149 | } \ | |
150 | if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED) { \ | |
151 | trigger_exception = FALSE; \ | |
152 | } else { \ | |
153 | trigger_exception = TRUE; \ | |
154 | } \ | |
155 | MACRO_END | |
156 | ||
157 | #define CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception) \ | |
158 | MACRO_BEGIN \ | |
159 | (p)->p_memstat_memlimit = (p)->p_memstat_memlimit_inactive; \ | |
160 | if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) { \ | |
161 | (p)->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; \ | |
162 | } else { \ | |
163 | (p)->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; \ | |
164 | } \ | |
165 | if ((p)->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED) { \ | |
166 | trigger_exception = FALSE; \ | |
167 | } else { \ | |
168 | trigger_exception = TRUE; \ | |
169 | } \ | |
170 | MACRO_END | |
171 | ||
172 | ||
39236c6e A |
173 | /* General tunables */ |
174 | ||
175 | unsigned long delta_percentage = 5; | |
176 | unsigned long critical_threshold_percentage = 5; | |
177 | unsigned long idle_offset_percentage = 5; | |
178 | unsigned long pressure_threshold_percentage = 15; | |
179 | unsigned long freeze_threshold_percentage = 50; | |
180 | ||
316670eb | 181 | /* General memorystatus stuff */ |
6d2010ae | 182 | |
39236c6e A |
183 | struct klist memorystatus_klist; |
184 | static lck_mtx_t memorystatus_klist_mutex; | |
6d2010ae | 185 | |
39236c6e A |
186 | static void memorystatus_klist_lock(void); |
187 | static void memorystatus_klist_unlock(void); | |
6d2010ae | 188 | |
39236c6e A |
189 | static uint64_t memorystatus_idle_delay_time = 0; |
190 | ||
191 | /* | |
192 | * Memorystatus kevents | |
193 | */ | |
194 | ||
195 | static int filt_memorystatusattach(struct knote *kn); | |
196 | static void filt_memorystatusdetach(struct knote *kn); | |
197 | static int filt_memorystatus(struct knote *kn, long hint); | |
198 | ||
199 | struct filterops memorystatus_filtops = { | |
200 | .f_attach = filt_memorystatusattach, | |
201 | .f_detach = filt_memorystatusdetach, | |
202 | .f_event = filt_memorystatus, | |
203 | }; | |
204 | ||
205 | enum { | |
fe8ab488 A |
206 | kMemorystatusNoPressure = 0x1, |
207 | kMemorystatusPressure = 0x2, | |
208 | kMemorystatusLowSwap = 0x4 | |
39236c6e A |
209 | }; |
210 | ||
211 | /* Idle guard handling */ | |
212 | ||
213 | static int32_t memorystatus_scheduled_idle_demotions = 0; | |
214 | ||
215 | static thread_call_t memorystatus_idle_demotion_call; | |
216 | ||
217 | static void memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2); | |
218 | static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state); | |
219 | static void memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clean_state); | |
220 | static void memorystatus_reschedule_idle_demotion_locked(void); | |
6d2010ae | 221 | |
fe8ab488 A |
222 | static void memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert); |
223 | ||
224 | boolean_t is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t); | |
225 | void memorystatus_send_low_swap_note(void); | |
39236c6e A |
226 | |
227 | int memorystatus_wakeup = 0; | |
228 | ||
229 | unsigned int memorystatus_level = 0; | |
3e170ce0 | 230 | unsigned int memorystatus_early_boot_level = 0; |
6d2010ae | 231 | |
316670eb | 232 | static int memorystatus_list_count = 0; |
6d2010ae | 233 | |
39236c6e | 234 | #define MEMSTAT_BUCKET_COUNT (JETSAM_PRIORITY_MAX + 1) |
6d2010ae | 235 | |
39236c6e A |
236 | typedef struct memstat_bucket { |
237 | TAILQ_HEAD(, proc) list; | |
238 | int count; | |
239 | } memstat_bucket_t; | |
6d2010ae | 240 | |
39236c6e A |
241 | memstat_bucket_t memstat_bucket[MEMSTAT_BUCKET_COUNT]; |
242 | ||
243 | uint64_t memstat_idle_demotion_deadline = 0; | |
6d2010ae | 244 | |
316670eb | 245 | static unsigned int memorystatus_dirty_count = 0; |
6d2010ae | 246 | |
3e170ce0 A |
247 | #if CONFIG_JETSAM |
248 | SYSCTL_INT(_kern, OID_AUTO, max_task_pmem, CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED, &max_task_footprint_mb, 0, ""); | |
249 | #endif // CONFIG_JETSAM | |
250 | ||
39236c6e A |
251 | |
252 | int | |
253 | memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret) | |
254 | { | |
255 | user_addr_t level = 0; | |
256 | ||
257 | level = args->level; | |
258 | ||
259 | if (copyout(&memorystatus_level, level, sizeof(memorystatus_level)) != 0) { | |
260 | return EFAULT; | |
261 | } | |
262 | ||
263 | return 0; | |
264 | } | |
265 | ||
266 | static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search); | |
267 | static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search); | |
268 | ||
269 | static void memorystatus_thread(void *param __unused, wait_result_t wr __unused); | |
6d2010ae | 270 | |
316670eb A |
271 | /* Jetsam */ |
272 | ||
273 | #if CONFIG_JETSAM | |
274 | ||
3e170ce0 A |
275 | static int memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit); |
276 | ||
277 | static int memorystatus_cmd_set_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval); | |
278 | ||
279 | static int memorystatus_set_memlimit_properties(pid_t pid, memorystatus_memlimit_properties_t *entry); | |
280 | ||
281 | static int memorystatus_cmd_get_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval); | |
282 | ||
283 | static boolean_t proc_jetsam_state_is_active_locked(proc_t); | |
284 | ||
fe8ab488 A |
285 | int proc_get_memstat_priority(proc_t, boolean_t); |
286 | ||
39236c6e A |
287 | /* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */ |
288 | #define LEGACY_HIWATER 1 | |
289 | ||
fe8ab488 | 290 | static boolean_t memorystatus_idle_snapshot = 0; |
39236c6e | 291 | |
3e170ce0 | 292 | static int memorystatus_highwater_enabled = 1; /* Update the cached memlimit data. This should be removed. */ |
316670eb | 293 | |
316670eb A |
294 | unsigned int memorystatus_delta = 0; |
295 | ||
39236c6e | 296 | static unsigned int memorystatus_available_pages_critical_base = 0; |
fe8ab488 | 297 | //static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1; |
39236c6e | 298 | static unsigned int memorystatus_available_pages_critical_idle_offset = 0; |
316670eb | 299 | |
3e170ce0 A |
300 | /* Jetsam Loop Detection */ |
301 | static boolean_t memorystatus_jld_enabled = TRUE; /* Enables jetsam loop detection on all devices */ | |
302 | static uint32_t memorystatus_jld_eval_period_msecs = 0; /* Init pass sets this based on device memory size */ | |
303 | static int memorystatus_jld_eval_aggressive_count = 3; /* Raise the priority max after 'n' aggressive loops */ | |
304 | static int memorystatus_jld_eval_aggressive_priority_band_max = 15; /* Kill aggressively up through this band */ | |
305 | ||
306 | #if DEVELOPMENT || DEBUG | |
307 | /* | |
308 | * Jetsam Loop Detection tunables. | |
309 | */ | |
310 | ||
311 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_period_msecs, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jld_eval_period_msecs, 0, ""); | |
312 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_aggressive_count, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jld_eval_aggressive_count, 0, ""); | |
313 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jld_eval_aggressive_priority_band_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jld_eval_aggressive_priority_band_max, 0, ""); | |
314 | #endif /* DEVELOPMENT || DEBUG */ | |
315 | ||
39236c6e A |
316 | #if DEVELOPMENT || DEBUG |
317 | static unsigned int memorystatus_jetsam_panic_debug = 0; | |
316670eb | 318 | |
39236c6e A |
319 | static unsigned int memorystatus_jetsam_policy = kPolicyDefault; |
320 | static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0; | |
3e170ce0 | 321 | static unsigned int memorystatus_debug_dump_this_bucket = 0; |
39236c6e | 322 | #endif |
316670eb | 323 | |
fe8ab488 A |
324 | static unsigned int memorystatus_thread_wasted_wakeup = 0; |
325 | ||
326 | static uint32_t kill_under_pressure_cause = 0; | |
316670eb | 327 | |
3e170ce0 A |
328 | /* |
329 | * default jetsam snapshot support | |
330 | */ | |
39236c6e A |
331 | static memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot; |
332 | #define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries | |
39236c6e A |
333 | static unsigned int memorystatus_jetsam_snapshot_count = 0; |
334 | static unsigned int memorystatus_jetsam_snapshot_max = 0; | |
3e170ce0 A |
335 | static uint64_t memorystatus_jetsam_snapshot_last_timestamp = 0; |
336 | static uint64_t memorystatus_jetsam_snapshot_timeout = 0; | |
337 | #define JETSAM_SNAPSHOT_TIMEOUT_SECS 30 | |
338 | ||
339 | /* | |
340 | * snapshot support for memstats collected at boot. | |
341 | */ | |
342 | static memorystatus_jetsam_snapshot_t memorystatus_at_boot_snapshot; | |
316670eb | 343 | |
39236c6e | 344 | static void memorystatus_clear_errors(void); |
fe8ab488 | 345 | static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages); |
39236c6e A |
346 | static uint32_t memorystatus_build_state(proc_t p); |
347 | static void memorystatus_update_levels_locked(boolean_t critical_only); | |
fe8ab488 | 348 | //static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured); |
39236c6e A |
349 | |
350 | static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause); | |
3e170ce0 A |
351 | static boolean_t memorystatus_kill_top_process(boolean_t any, boolean_t sort_flag, uint32_t cause, int32_t *priority, uint32_t *errors); |
352 | static boolean_t memorystatus_kill_top_process_aggressive(boolean_t any, uint32_t cause, int aggr_count, int32_t priority_max, uint32_t *errors); | |
39236c6e A |
353 | #if LEGACY_HIWATER |
354 | static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors); | |
355 | #endif | |
356 | ||
357 | static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause); | |
358 | static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause); | |
316670eb | 359 | |
3e170ce0 A |
360 | /* Priority Band Sorting Routines */ |
361 | static int memorystatus_sort_bucket(unsigned int bucket_index, int sort_order); | |
362 | static int memorystatus_sort_by_largest_coalition_locked(unsigned int bucket_index, int coal_sort_order); | |
363 | static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index); | |
364 | static int memorystatus_move_list_locked(unsigned int bucket_index, pid_t *pid_list, int list_sz); | |
365 | ||
366 | /* qsort routines */ | |
367 | typedef int (*cmpfunc_t)(const void *a, const void *b); | |
368 | extern void qsort(void *a, size_t n, size_t es, cmpfunc_t cmp); | |
369 | static int memstat_asc_cmp(const void *a, const void *b); | |
370 | ||
39236c6e | 371 | #endif /* CONFIG_JETSAM */ |
6d2010ae | 372 | |
316670eb | 373 | /* VM pressure */ |
6d2010ae | 374 | |
fe8ab488 A |
375 | extern unsigned int vm_page_free_count; |
376 | extern unsigned int vm_page_active_count; | |
377 | extern unsigned int vm_page_inactive_count; | |
378 | extern unsigned int vm_page_throttled_count; | |
379 | extern unsigned int vm_page_purgeable_count; | |
380 | extern unsigned int vm_page_wire_count; | |
381 | ||
316670eb | 382 | #if VM_PRESSURE_EVENTS |
6d2010ae | 383 | |
39236c6e | 384 | #include "vm_pressure.h" |
6d2010ae | 385 | |
fe8ab488 | 386 | extern boolean_t memorystatus_warn_process(pid_t pid, boolean_t critical); |
316670eb | 387 | |
39236c6e | 388 | vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal; |
316670eb | 389 | |
fe8ab488 A |
390 | #if CONFIG_MEMORYSTATUS |
391 | unsigned int memorystatus_available_pages = (unsigned int)-1; | |
392 | unsigned int memorystatus_available_pages_pressure = 0; | |
393 | unsigned int memorystatus_available_pages_critical = 0; | |
394 | unsigned int memorystatus_frozen_count = 0; | |
395 | unsigned int memorystatus_suspended_count = 0; | |
396 | ||
397 | /* | |
398 | * We use this flag to signal if we have any HWM offenders | |
399 | * on the system. This way we can reduce the number of wakeups | |
400 | * of the memorystatus_thread when the system is between the | |
401 | * "pressure" and "critical" threshold. | |
402 | * | |
403 | * The (re-)setting of this variable is done without any locks | |
404 | * or synchronization simply because it is not possible (currently) | |
405 | * to keep track of HWM offenders that drop down below their memory | |
406 | * limit and/or exit. So, we choose to burn a couple of wasted wakeups | |
407 | * by allowing the unguarded modification of this variable. | |
408 | */ | |
409 | boolean_t memorystatus_hwm_candidates = 0; | |
410 | ||
411 | static int memorystatus_send_note(int event_code, void *data, size_t data_length); | |
412 | #endif /* CONFIG_MEMORYSTATUS */ | |
413 | ||
316670eb A |
414 | #endif /* VM_PRESSURE_EVENTS */ |
415 | ||
316670eb A |
416 | /* Freeze */ |
417 | ||
418 | #if CONFIG_FREEZE | |
419 | ||
316670eb A |
420 | boolean_t memorystatus_freeze_enabled = FALSE; |
421 | int memorystatus_freeze_wakeup = 0; | |
422 | ||
3e170ce0 A |
423 | lck_grp_attr_t *freezer_lck_grp_attr; |
424 | lck_grp_t *freezer_lck_grp; | |
425 | static lck_mtx_t freezer_mutex; | |
426 | ||
316670eb A |
427 | static inline boolean_t memorystatus_can_freeze_processes(void); |
428 | static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low); | |
429 | ||
430 | static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused); | |
431 | ||
432 | /* Thresholds */ | |
433 | static unsigned int memorystatus_freeze_threshold = 0; | |
434 | ||
fe8ab488 A |
435 | static unsigned int memorystatus_freeze_pages_min = 0; |
436 | static unsigned int memorystatus_freeze_pages_max = 0; | |
316670eb A |
437 | |
438 | static unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; | |
439 | ||
3e170ce0 A |
440 | static unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT; |
441 | ||
316670eb A |
442 | /* Stats */ |
443 | static uint64_t memorystatus_freeze_count = 0; | |
444 | static uint64_t memorystatus_freeze_pageouts = 0; | |
6d2010ae A |
445 | |
446 | /* Throttling */ | |
316670eb A |
447 | static throttle_interval_t throttle_intervals[] = { |
448 | { 60, 8, 0, 0, { 0, 0 }, FALSE }, /* 1 hour intermediate interval, 8x burst */ | |
6d2010ae A |
449 | { 24 * 60, 1, 0, 0, { 0, 0 }, FALSE }, /* 24 hour long interval, no burst */ |
450 | }; | |
451 | ||
316670eb | 452 | static uint64_t memorystatus_freeze_throttle_count = 0; |
6d2010ae | 453 | |
39236c6e | 454 | static unsigned int memorystatus_suspended_footprint_total = 0; |
6d2010ae | 455 | |
3e170ce0 A |
456 | extern uint64_t vm_swap_get_free_space(void); |
457 | ||
458 | static boolean_t memorystatus_freeze_update_throttle(); | |
459 | ||
39236c6e | 460 | #endif /* CONFIG_FREEZE */ |
6d2010ae | 461 | |
316670eb | 462 | /* Debug */ |
6d2010ae | 463 | |
fe8ab488 A |
464 | extern struct knote *vm_find_knote_from_pid(pid_t, struct klist *); |
465 | ||
6d2010ae | 466 | #if DEVELOPMENT || DEBUG |
6d2010ae | 467 | |
39236c6e A |
468 | #if CONFIG_JETSAM |
469 | ||
3e170ce0 A |
470 | static void |
471 | memorystatus_debug_dump_bucket_locked (unsigned int bucket_index) | |
472 | { | |
473 | proc_t p = NULL; | |
474 | uint32_t pages = 0; | |
475 | uint32_t pages_in_mb = 0; | |
476 | unsigned int b = bucket_index; | |
477 | boolean_t traverse_all_buckets = FALSE; | |
478 | ||
479 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
480 | traverse_all_buckets = TRUE; | |
481 | b = 0; | |
482 | } else { | |
483 | traverse_all_buckets = FALSE; | |
484 | b = bucket_index; | |
485 | } | |
486 | ||
487 | /* | |
488 | * Missing from this dump is the value actually | |
489 | * stored in the ledger... also, format could be better. | |
490 | */ | |
491 | printf("memorystatus_debug_dump ***START***\n"); | |
492 | printf("bucket [pid] [pages/pages-mb] state [EP / RP] dirty deadline [C-limit / A-limit / IA-limit] name\n"); | |
493 | p = memorystatus_get_first_proc_locked(&b, traverse_all_buckets); | |
494 | while (p) { | |
495 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); | |
496 | pages_in_mb = (pages * 4096) /1024 / 1024; | |
497 | printf("%d [%d] [%d/%dMB] 0x%x [%d / %d] 0x%x %lld [%d%s / %d%s / %d%s] %s\n", | |
498 | b, p->p_pid, pages, pages_in_mb, | |
499 | p->p_memstat_state, p->p_memstat_effectivepriority, p->p_memstat_requestedpriority, p->p_memstat_dirty, p->p_memstat_idledeadline, | |
500 | p->p_memstat_memlimit, | |
501 | (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), | |
502 | p->p_memstat_memlimit_active, | |
503 | (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL ? "F " : "NF"), | |
504 | p->p_memstat_memlimit_inactive, | |
505 | (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL ? "F " : "NF"), | |
506 | (p->p_comm ? p->p_comm : "unknown")); | |
507 | p = memorystatus_get_next_proc_locked(&b, p, traverse_all_buckets); | |
508 | } | |
509 | printf("memorystatus_debug_dump ***END***\n"); | |
510 | } | |
511 | ||
512 | static int | |
513 | sysctl_memorystatus_debug_dump_bucket SYSCTL_HANDLER_ARGS | |
514 | { | |
515 | #pragma unused(oidp, arg2) | |
516 | int bucket_index = 0; | |
517 | int error; | |
518 | error = SYSCTL_OUT(req, arg1, sizeof(int)); | |
519 | if (error || !req->newptr) { | |
520 | return (error); | |
521 | } | |
522 | error = SYSCTL_IN(req, &bucket_index, sizeof(int)); | |
523 | if (error || !req->newptr) { | |
524 | return (error); | |
525 | } | |
526 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
527 | /* | |
528 | * All jetsam buckets will be dumped. | |
529 | */ | |
530 | } else { | |
531 | /* | |
532 | * Only a single bucket will be dumped. | |
533 | */ | |
534 | } | |
535 | ||
536 | proc_list_lock(); | |
537 | memorystatus_debug_dump_bucket_locked(bucket_index); | |
538 | proc_list_unlock(); | |
539 | memorystatus_debug_dump_this_bucket = bucket_index; | |
540 | return (error); | |
541 | } | |
542 | ||
543 | /* | |
544 | * Debug aid to look at jetsam buckets and proc jetsam fields. | |
545 | * Use this sysctl to act on a particular jetsam bucket. | |
546 | * Writing the sysctl triggers the dump. | |
547 | * Usage: sysctl kern.memorystatus_debug_dump_this_bucket=<bucket_index> | |
548 | */ | |
549 | ||
550 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_debug_dump_this_bucket, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_debug_dump_this_bucket, 0, sysctl_memorystatus_debug_dump_bucket, "I", ""); | |
551 | ||
552 | ||
39236c6e A |
553 | /* Debug aid to aid determination of limit */ |
554 | ||
555 | static int | |
556 | sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS | |
557 | { | |
558 | #pragma unused(oidp, arg2) | |
559 | proc_t p; | |
560 | unsigned int b = 0; | |
561 | int error, enable = 0; | |
39236c6e A |
562 | |
563 | error = SYSCTL_OUT(req, arg1, sizeof(int)); | |
564 | if (error || !req->newptr) { | |
565 | return (error); | |
566 | } | |
567 | ||
568 | error = SYSCTL_IN(req, &enable, sizeof(int)); | |
569 | if (error || !req->newptr) { | |
570 | return (error); | |
571 | } | |
572 | ||
573 | if (!(enable == 0 || enable == 1)) { | |
574 | return EINVAL; | |
575 | } | |
576 | ||
577 | proc_list_lock(); | |
578 | ||
579 | p = memorystatus_get_first_proc_locked(&b, TRUE); | |
580 | while (p) { | |
3e170ce0 A |
581 | boolean_t trigger_exception; |
582 | ||
39236c6e | 583 | if (enable) { |
3e170ce0 A |
584 | /* |
585 | * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore. | |
586 | * Background limits are described via the inactive limit slots. | |
587 | */ | |
588 | ||
589 | if (proc_jetsam_state_is_active_locked(p) == TRUE) { | |
590 | CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception); | |
39236c6e | 591 | } else { |
3e170ce0 | 592 | CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception); |
39236c6e | 593 | } |
3e170ce0 | 594 | |
39236c6e | 595 | } else { |
3e170ce0 A |
596 | /* |
597 | * Disabling limits does not touch the stored variants. | |
598 | * Set the cached limit fields to system_wide defaults. | |
599 | */ | |
600 | p->p_memstat_memlimit = -1; | |
601 | p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; | |
602 | trigger_exception = TRUE; | |
fe8ab488 | 603 | } |
3e170ce0 A |
604 | |
605 | /* | |
606 | * Enforce the cached limit by writing to the ledger. | |
607 | */ | |
608 | task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit: -1, NULL, trigger_exception); | |
609 | ||
39236c6e A |
610 | p = memorystatus_get_next_proc_locked(&b, p, TRUE); |
611 | } | |
612 | ||
613 | memorystatus_highwater_enabled = enable; | |
614 | ||
615 | proc_list_unlock(); | |
616 | ||
617 | return 0; | |
3e170ce0 | 618 | |
39236c6e A |
619 | } |
620 | ||
fe8ab488 A |
621 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_idle_snapshot, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_idle_snapshot, 0, ""); |
622 | ||
39236c6e A |
623 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_highwater_enabled, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_highwater_enabled, 0, sysctl_memorystatus_highwater_enable, "I", ""); |
624 | ||
625 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages, 0, ""); | |
626 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages_critical, 0, ""); | |
627 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_base, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_base, 0, ""); | |
39236c6e | 628 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_idle_offset, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_idle_offset, 0, ""); |
316670eb A |
629 | |
630 | /* Diagnostic code */ | |
39236c6e | 631 | |
316670eb A |
632 | enum { |
633 | kJetsamDiagnosticModeNone = 0, | |
634 | kJetsamDiagnosticModeAll = 1, | |
635 | kJetsamDiagnosticModeStopAtFirstActive = 2, | |
636 | kJetsamDiagnosticModeCount | |
637 | } jetsam_diagnostic_mode = kJetsamDiagnosticModeNone; | |
638 | ||
639 | static int jetsam_diagnostic_suspended_one_active_proc = 0; | |
640 | ||
641 | static int | |
642 | sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS | |
643 | { | |
644 | #pragma unused(arg1, arg2) | |
645 | ||
646 | const char *diagnosticStrings[] = { | |
647 | "jetsam: diagnostic mode: resetting critical level.", | |
648 | "jetsam: diagnostic mode: will examine all processes", | |
649 | "jetsam: diagnostic mode: will stop at first active process" | |
650 | }; | |
651 | ||
652 | int error, val = jetsam_diagnostic_mode; | |
653 | boolean_t changed = FALSE; | |
654 | ||
655 | error = sysctl_handle_int(oidp, &val, 0, req); | |
656 | if (error || !req->newptr) | |
657 | return (error); | |
658 | if ((val < 0) || (val >= kJetsamDiagnosticModeCount)) { | |
659 | printf("jetsam: diagnostic mode: invalid value - %d\n", val); | |
660 | return EINVAL; | |
661 | } | |
662 | ||
39236c6e | 663 | proc_list_lock(); |
316670eb A |
664 | |
665 | if ((unsigned int) val != jetsam_diagnostic_mode) { | |
666 | jetsam_diagnostic_mode = val; | |
667 | ||
668 | memorystatus_jetsam_policy &= ~kPolicyDiagnoseActive; | |
669 | ||
670 | switch (jetsam_diagnostic_mode) { | |
671 | case kJetsamDiagnosticModeNone: | |
672 | /* Already cleared */ | |
673 | break; | |
674 | case kJetsamDiagnosticModeAll: | |
675 | memorystatus_jetsam_policy |= kPolicyDiagnoseAll; | |
676 | break; | |
677 | case kJetsamDiagnosticModeStopAtFirstActive: | |
678 | memorystatus_jetsam_policy |= kPolicyDiagnoseFirst; | |
679 | break; | |
680 | default: | |
681 | /* Already validated */ | |
682 | break; | |
683 | } | |
684 | ||
39236c6e | 685 | memorystatus_update_levels_locked(FALSE); |
316670eb A |
686 | changed = TRUE; |
687 | } | |
688 | ||
39236c6e | 689 | proc_list_unlock(); |
316670eb A |
690 | |
691 | if (changed) { | |
692 | printf("%s\n", diagnosticStrings[val]); | |
693 | } | |
694 | ||
695 | return (0); | |
696 | } | |
697 | ||
39236c6e | 698 | SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED|CTLFLAG_ANYBODY, |
316670eb A |
699 | &jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode"); |
700 | ||
39236c6e | 701 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_diagnostic, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jetsam_policy_offset_pages_diagnostic, 0, ""); |
316670eb A |
702 | |
703 | #if VM_PRESSURE_EVENTS | |
704 | ||
39236c6e | 705 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_pressure, 0, ""); |
316670eb | 706 | |
316670eb | 707 | |
fe8ab488 A |
708 | /* |
709 | * This routine is used for targeted notifications | |
710 | * regardless of system memory pressure. | |
711 | * "memnote" is the current user. | |
712 | */ | |
316670eb A |
713 | |
714 | static int | |
715 | sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS | |
716 | { | |
717 | #pragma unused(arg1, arg2) | |
718 | ||
fe8ab488 A |
719 | int error = 0, pid = 0; |
720 | int ret = 0; | |
721 | struct knote *kn = NULL; | |
3e170ce0 | 722 | boolean_t found_knote = FALSE; |
316670eb A |
723 | |
724 | error = sysctl_handle_int(oidp, &pid, 0, req); | |
725 | if (error || !req->newptr) | |
726 | return (error); | |
727 | ||
fe8ab488 A |
728 | /* |
729 | * We inspect 3 lists here for targeted notifications: | |
730 | * - memorystatus_klist | |
731 | * - vm_pressure_klist | |
732 | * - vm_pressure_dormant_klist | |
733 | * | |
734 | * The vm_pressure_* lists are tied to the old VM_PRESSURE | |
735 | * notification mechanism. We intend to stop using that | |
736 | * mechanism and, in turn, get rid of the 2 lists and | |
737 | * vm_dispatch_pressure_note_to_pid() too. | |
738 | */ | |
739 | ||
740 | memorystatus_klist_lock(); | |
3e170ce0 A |
741 | |
742 | SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { | |
743 | proc_t knote_proc = kn->kn_kq->kq_p; | |
744 | pid_t knote_pid = knote_proc->p_pid; | |
745 | ||
746 | if (knote_pid == pid) { | |
747 | /* | |
748 | * Forcibly send this pid a "warning" memory pressure notification. | |
749 | */ | |
750 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
751 | found_knote = TRUE; | |
752 | } | |
753 | } | |
754 | ||
755 | if (found_knote) { | |
756 | KNOTE(&memorystatus_klist, 0); | |
757 | ret = 0; | |
fe8ab488 A |
758 | } else { |
759 | ret = vm_dispatch_pressure_note_to_pid(pid, FALSE); | |
760 | } | |
3e170ce0 | 761 | |
fe8ab488 A |
762 | memorystatus_klist_unlock(); |
763 | ||
764 | return ret; | |
316670eb A |
765 | } |
766 | ||
767 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, | |
768 | 0, 0, &sysctl_memorystatus_vm_pressure_send, "I", ""); | |
769 | ||
770 | #endif /* VM_PRESSURE_EVENTS */ | |
771 | ||
772 | #endif /* CONFIG_JETSAM */ | |
773 | ||
774 | #if CONFIG_FREEZE | |
775 | ||
3e170ce0 A |
776 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, ""); |
777 | ||
39236c6e | 778 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, ""); |
316670eb | 779 | |
39236c6e A |
780 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, ""); |
781 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, ""); | |
316670eb | 782 | |
39236c6e A |
783 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_count, ""); |
784 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, ""); | |
785 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_throttle_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_count, ""); | |
786 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, ""); | |
316670eb A |
787 | |
788 | boolean_t memorystatus_freeze_throttle_enabled = TRUE; | |
39236c6e | 789 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, ""); |
316670eb A |
790 | |
791 | /* | |
fe8ab488 | 792 | * Manual trigger of freeze and thaw for dev / debug kernels only. |
316670eb A |
793 | */ |
794 | static int | |
795 | sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS | |
796 | { | |
797 | #pragma unused(arg1, arg2) | |
316670eb A |
798 | int error, pid = 0; |
799 | proc_t p; | |
800 | ||
fe8ab488 A |
801 | if (memorystatus_freeze_enabled == FALSE) { |
802 | return ENOTSUP; | |
803 | } | |
804 | ||
316670eb A |
805 | error = sysctl_handle_int(oidp, &pid, 0, req); |
806 | if (error || !req->newptr) | |
807 | return (error); | |
808 | ||
3e170ce0 A |
809 | if (pid == 2) { |
810 | vm_pageout_anonymous_pages(); | |
811 | ||
812 | return 0; | |
813 | } | |
814 | ||
815 | lck_mtx_lock(&freezer_mutex); | |
816 | ||
316670eb A |
817 | p = proc_find(pid); |
818 | if (p != NULL) { | |
819 | uint32_t purgeable, wired, clean, dirty; | |
820 | boolean_t shared; | |
39236c6e A |
821 | uint32_t max_pages = 0; |
822 | ||
fe8ab488 | 823 | if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { |
3e170ce0 A |
824 | |
825 | unsigned int avail_swap_space = 0; /* in pages. */ | |
826 | ||
827 | if (DEFAULT_FREEZER_IS_ACTIVE) { | |
828 | /* | |
829 | * Freezer backed by default pager and swap file(s). | |
830 | */ | |
831 | avail_swap_space = default_pager_swap_pages_free(); | |
832 | } else { | |
833 | /* | |
834 | * Freezer backed by the compressor and swap file(s) | |
835 | * while will hold compressed data. | |
836 | */ | |
837 | avail_swap_space = vm_swap_get_free_space() / PAGE_SIZE_64; | |
838 | } | |
839 | ||
840 | max_pages = MIN(avail_swap_space, memorystatus_freeze_pages_max); | |
841 | ||
39236c6e | 842 | } else { |
3e170ce0 A |
843 | /* |
844 | * We only have the compressor without any swap. | |
845 | */ | |
39236c6e A |
846 | max_pages = UINT32_MAX - 1; |
847 | } | |
3e170ce0 | 848 | |
39236c6e | 849 | error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); |
316670eb | 850 | proc_rele(p); |
316670eb | 851 | |
39236c6e A |
852 | if (error) |
853 | error = EIO; | |
3e170ce0 A |
854 | |
855 | lck_mtx_unlock(&freezer_mutex); | |
39236c6e A |
856 | return error; |
857 | } | |
3e170ce0 A |
858 | |
859 | lck_mtx_unlock(&freezer_mutex); | |
316670eb A |
860 | return EINVAL; |
861 | } | |
862 | ||
863 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, | |
864 | 0, 0, &sysctl_memorystatus_freeze, "I", ""); | |
865 | ||
866 | static int | |
867 | sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS | |
868 | { | |
869 | #pragma unused(arg1, arg2) | |
870 | ||
871 | int error, pid = 0; | |
872 | proc_t p; | |
873 | ||
fe8ab488 A |
874 | if (memorystatus_freeze_enabled == FALSE) { |
875 | return ENOTSUP; | |
876 | } | |
877 | ||
316670eb A |
878 | error = sysctl_handle_int(oidp, &pid, 0, req); |
879 | if (error || !req->newptr) | |
880 | return (error); | |
881 | ||
882 | p = proc_find(pid); | |
883 | if (p != NULL) { | |
39236c6e | 884 | error = task_thaw(p->task); |
316670eb | 885 | proc_rele(p); |
39236c6e A |
886 | |
887 | if (error) | |
888 | error = EIO; | |
889 | return error; | |
316670eb A |
890 | } |
891 | ||
892 | return EINVAL; | |
893 | } | |
894 | ||
895 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, | |
896 | 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", ""); | |
6d2010ae | 897 | |
6d2010ae | 898 | #endif /* CONFIG_FREEZE */ |
2d21ac55 | 899 | |
fe8ab488 A |
900 | #endif /* DEVELOPMENT || DEBUG */ |
901 | ||
39236c6e A |
902 | extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation, |
903 | void *parameter, | |
904 | integer_t priority, | |
905 | thread_t *new_thread); | |
906 | ||
fe8ab488 | 907 | #if CONFIG_JETSAM |
3e170ce0 A |
908 | /* |
909 | * Picks the sorting routine for a given jetsam priority band. | |
910 | * | |
911 | * Input: | |
912 | * bucket_index - jetsam priority band to be sorted. | |
913 | * sort_order - JETSAM_SORT_xxx from kern_memorystatus.h | |
914 | * Currently sort_order is only meaningful when handling | |
915 | * coalitions. | |
916 | * | |
917 | * Return: | |
918 | * 0 on success | |
919 | * non-0 on failure | |
920 | */ | |
921 | static int memorystatus_sort_bucket(unsigned int bucket_index, int sort_order) | |
922 | { | |
923 | int coal_sort_order; | |
924 | ||
925 | /* | |
926 | * Verify the jetsam priority | |
927 | */ | |
928 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
929 | return(EINVAL); | |
930 | } | |
931 | ||
932 | #if DEVELOPMENT || DEBUG | |
933 | if (sort_order == JETSAM_SORT_DEFAULT) { | |
934 | coal_sort_order = COALITION_SORT_DEFAULT; | |
935 | } else { | |
936 | coal_sort_order = sort_order; /* only used for testing scenarios */ | |
937 | } | |
938 | #else | |
939 | /* Verify default */ | |
940 | if (sort_order == JETSAM_SORT_DEFAULT) { | |
941 | coal_sort_order = COALITION_SORT_DEFAULT; | |
942 | } else { | |
943 | return(EINVAL); | |
944 | } | |
945 | #endif | |
946 | ||
947 | proc_list_lock(); | |
948 | switch (bucket_index) { | |
949 | case JETSAM_PRIORITY_FOREGROUND: | |
950 | if (memorystatus_sort_by_largest_coalition_locked(bucket_index, coal_sort_order) == 0) { | |
951 | /* | |
952 | * Fall back to per process sorting when zero coalitions are found. | |
953 | */ | |
954 | memorystatus_sort_by_largest_process_locked(bucket_index); | |
955 | } | |
956 | break; | |
957 | default: | |
958 | memorystatus_sort_by_largest_process_locked(bucket_index); | |
959 | break; | |
960 | } | |
961 | proc_list_unlock(); | |
962 | ||
963 | return(0); | |
964 | } | |
965 | ||
fe8ab488 A |
966 | /* |
967 | * Sort processes by size for a single jetsam bucket. | |
968 | */ | |
969 | ||
970 | static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index) | |
971 | { | |
972 | proc_t p = NULL, insert_after_proc = NULL, max_proc = NULL; | |
3e170ce0 | 973 | proc_t next_p = NULL, prev_max_proc = NULL; |
fe8ab488 A |
974 | uint32_t pages = 0, max_pages = 0; |
975 | memstat_bucket_t *current_bucket; | |
976 | ||
977 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
978 | return; | |
979 | } | |
980 | ||
981 | current_bucket = &memstat_bucket[bucket_index]; | |
982 | ||
983 | p = TAILQ_FIRST(¤t_bucket->list); | |
984 | ||
3e170ce0 | 985 | while (p) { |
fe8ab488 A |
986 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); |
987 | max_pages = pages; | |
3e170ce0 A |
988 | max_proc = p; |
989 | prev_max_proc = p; | |
990 | ||
991 | while ((next_p = TAILQ_NEXT(p, p_memstat_list)) != NULL) { | |
992 | /* traversing list until we find next largest process */ | |
993 | p=next_p; | |
fe8ab488 | 994 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); |
fe8ab488 A |
995 | if (pages > max_pages) { |
996 | max_pages = pages; | |
997 | max_proc = p; | |
998 | } | |
fe8ab488 A |
999 | } |
1000 | ||
3e170ce0 A |
1001 | if (prev_max_proc != max_proc) { |
1002 | /* found a larger process, place it in the list */ | |
fe8ab488 | 1003 | TAILQ_REMOVE(¤t_bucket->list, max_proc, p_memstat_list); |
fe8ab488 A |
1004 | if (insert_after_proc == NULL) { |
1005 | TAILQ_INSERT_HEAD(¤t_bucket->list, max_proc, p_memstat_list); | |
1006 | } else { | |
1007 | TAILQ_INSERT_AFTER(¤t_bucket->list, insert_after_proc, max_proc, p_memstat_list); | |
1008 | } | |
3e170ce0 A |
1009 | prev_max_proc = max_proc; |
1010 | } | |
fe8ab488 | 1011 | |
3e170ce0 | 1012 | insert_after_proc = max_proc; |
fe8ab488 | 1013 | |
3e170ce0 | 1014 | p = TAILQ_NEXT(max_proc, p_memstat_list); |
fe8ab488 A |
1015 | } |
1016 | } | |
1017 | ||
1018 | #endif /* CONFIG_JETSAM */ | |
1019 | ||
39236c6e A |
1020 | static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search) { |
1021 | memstat_bucket_t *current_bucket; | |
1022 | proc_t next_p; | |
1023 | ||
1024 | if ((*bucket_index) >= MEMSTAT_BUCKET_COUNT) { | |
1025 | return NULL; | |
1026 | } | |
1027 | ||
1028 | current_bucket = &memstat_bucket[*bucket_index]; | |
1029 | next_p = TAILQ_FIRST(¤t_bucket->list); | |
1030 | if (!next_p && search) { | |
1031 | while (!next_p && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { | |
1032 | current_bucket = &memstat_bucket[*bucket_index]; | |
1033 | next_p = TAILQ_FIRST(¤t_bucket->list); | |
1034 | } | |
1035 | } | |
1036 | ||
1037 | return next_p; | |
1038 | } | |
1039 | ||
1040 | static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search) { | |
1041 | memstat_bucket_t *current_bucket; | |
1042 | proc_t next_p; | |
1043 | ||
1044 | if (!p || ((*bucket_index) >= MEMSTAT_BUCKET_COUNT)) { | |
1045 | return NULL; | |
1046 | } | |
1047 | ||
1048 | next_p = TAILQ_NEXT(p, p_memstat_list); | |
1049 | while (!next_p && search && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { | |
1050 | current_bucket = &memstat_bucket[*bucket_index]; | |
1051 | next_p = TAILQ_FIRST(¤t_bucket->list); | |
1052 | } | |
1053 | ||
1054 | return next_p; | |
1055 | } | |
316670eb A |
1056 | |
1057 | __private_extern__ void | |
1058 | memorystatus_init(void) | |
1059 | { | |
1060 | thread_t thread = THREAD_NULL; | |
1061 | kern_return_t result; | |
39236c6e A |
1062 | int i; |
1063 | ||
fe8ab488 A |
1064 | #if CONFIG_FREEZE |
1065 | memorystatus_freeze_pages_min = FREEZE_PAGES_MIN; | |
1066 | memorystatus_freeze_pages_max = FREEZE_PAGES_MAX; | |
1067 | #endif | |
1068 | ||
39236c6e A |
1069 | nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_idle_delay_time); |
1070 | ||
1071 | /* Init buckets */ | |
1072 | for (i = 0; i < MEMSTAT_BUCKET_COUNT; i++) { | |
1073 | TAILQ_INIT(&memstat_bucket[i].list); | |
1074 | memstat_bucket[i].count = 0; | |
1075 | } | |
316670eb | 1076 | |
39236c6e | 1077 | memorystatus_idle_demotion_call = thread_call_allocate((thread_call_func_t)memorystatus_perform_idle_demotion, NULL); |
316670eb | 1078 | |
39236c6e A |
1079 | /* Apply overrides */ |
1080 | PE_get_default("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage)); | |
1081 | assert(delta_percentage < 100); | |
1082 | PE_get_default("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage)); | |
1083 | assert(critical_threshold_percentage < 100); | |
1084 | PE_get_default("kern.jetsam_idle_offset", &idle_offset_percentage, sizeof(idle_offset_percentage)); | |
1085 | assert(idle_offset_percentage < 100); | |
1086 | PE_get_default("kern.jetsam_pressure_threshold", &pressure_threshold_percentage, sizeof(pressure_threshold_percentage)); | |
1087 | assert(pressure_threshold_percentage < 100); | |
1088 | PE_get_default("kern.jetsam_freeze_threshold", &freeze_threshold_percentage, sizeof(freeze_threshold_percentage)); | |
1089 | assert(freeze_threshold_percentage < 100); | |
316670eb | 1090 | |
39236c6e | 1091 | #if CONFIG_JETSAM |
3e170ce0 A |
1092 | /* device tree can request to take snapshots for idle-exit kills by default */ |
1093 | PE_get_default("kern.jetsam_idle_snapshot", &memorystatus_idle_snapshot, sizeof(memorystatus_idle_snapshot)); | |
1094 | ||
39236c6e | 1095 | memorystatus_delta = delta_percentage * atop_64(max_mem) / 100; |
39236c6e | 1096 | memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100; |
39236c6e A |
1097 | memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta; |
1098 | ||
1099 | memorystatus_jetsam_snapshot_max = maxproc; | |
1100 | memorystatus_jetsam_snapshot = | |
1101 | (memorystatus_jetsam_snapshot_t*)kalloc(sizeof(memorystatus_jetsam_snapshot_t) + | |
1102 | sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_max); | |
1103 | if (!memorystatus_jetsam_snapshot) { | |
1104 | panic("Could not allocate memorystatus_jetsam_snapshot"); | |
1105 | } | |
1106 | ||
3e170ce0 A |
1107 | nanoseconds_to_absolutetime((uint64_t)JETSAM_SNAPSHOT_TIMEOUT_SECS * NSEC_PER_SEC, &memorystatus_jetsam_snapshot_timeout); |
1108 | ||
1109 | memset(&memorystatus_at_boot_snapshot, 0, sizeof(memorystatus_jetsam_snapshot_t)); | |
1110 | ||
39236c6e A |
1111 | /* No contention at this point */ |
1112 | memorystatus_update_levels_locked(FALSE); | |
3e170ce0 A |
1113 | |
1114 | /* Jetsam Loop Detection */ | |
1115 | if (max_mem <= (512 * 1024 * 1024)) { | |
1116 | /* 512 MB devices */ | |
1117 | memorystatus_jld_eval_period_msecs = 8000; /* 8000 msecs == 8 second window */ | |
1118 | } else { | |
1119 | /* 1GB and larger devices */ | |
1120 | memorystatus_jld_eval_period_msecs = 6000; /* 6000 msecs == 6 second window */ | |
1121 | } | |
39236c6e A |
1122 | #endif |
1123 | ||
316670eb | 1124 | #if CONFIG_FREEZE |
39236c6e | 1125 | memorystatus_freeze_threshold = (freeze_threshold_percentage / delta_percentage) * memorystatus_delta; |
316670eb | 1126 | #endif |
39236c6e A |
1127 | |
1128 | result = kernel_thread_start_priority(memorystatus_thread, NULL, 95 /* MAXPRI_KERNEL */, &thread); | |
316670eb A |
1129 | if (result == KERN_SUCCESS) { |
1130 | thread_deallocate(thread); | |
1131 | } else { | |
1132 | panic("Could not create memorystatus_thread"); | |
1133 | } | |
39236c6e | 1134 | } |
316670eb | 1135 | |
39236c6e A |
1136 | /* Centralised for the purposes of allowing panic-on-jetsam */ |
1137 | extern void | |
1138 | vm_wake_compactor_swapper(void); | |
316670eb | 1139 | |
fe8ab488 A |
1140 | /* |
1141 | * The jetsam no frills kill call | |
1142 | * Return: 0 on success | |
1143 | * error code on failure (EINVAL...) | |
1144 | */ | |
1145 | static int | |
1146 | jetsam_do_kill(proc_t p, int jetsam_flags) { | |
1147 | int error = 0; | |
1148 | error = exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags); | |
1149 | return(error); | |
1150 | } | |
1151 | ||
1152 | /* | |
1153 | * Wrapper for processes exiting with memorystatus details | |
1154 | */ | |
39236c6e A |
1155 | static boolean_t |
1156 | memorystatus_do_kill(proc_t p, uint32_t cause) { | |
1157 | ||
fe8ab488 A |
1158 | int error = 0; |
1159 | __unused pid_t victim_pid = p->p_pid; | |
1160 | ||
1161 | KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_START, | |
1162 | victim_pid, cause, vm_page_free_count, 0, 0); | |
39236c6e A |
1163 | |
1164 | #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) | |
1165 | if (memorystatus_jetsam_panic_debug & (1 << cause)) { | |
1166 | panic("memorystatus_do_kill(): jetsam debug panic (cause: %d)", cause); | |
316670eb | 1167 | } |
39236c6e A |
1168 | #else |
1169 | #pragma unused(cause) | |
316670eb | 1170 | #endif |
39236c6e A |
1171 | int jetsam_flags = P_LTERM_JETSAM; |
1172 | switch (cause) { | |
1173 | case kMemorystatusKilledHiwat: jetsam_flags |= P_JETSAM_HIWAT; break; | |
1174 | case kMemorystatusKilledVnodes: jetsam_flags |= P_JETSAM_VNODE; break; | |
1175 | case kMemorystatusKilledVMPageShortage: jetsam_flags |= P_JETSAM_VMPAGESHORTAGE; break; | |
1176 | case kMemorystatusKilledVMThrashing: jetsam_flags |= P_JETSAM_VMTHRASHING; break; | |
fe8ab488 | 1177 | case kMemorystatusKilledFCThrashing: jetsam_flags |= P_JETSAM_FCTHRASHING; break; |
39236c6e A |
1178 | case kMemorystatusKilledPerProcessLimit: jetsam_flags |= P_JETSAM_PID; break; |
1179 | case kMemorystatusKilledIdleExit: jetsam_flags |= P_JETSAM_IDLEEXIT; break; | |
1180 | } | |
fe8ab488 A |
1181 | error = jetsam_do_kill(p, jetsam_flags); |
1182 | ||
1183 | KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_END, | |
1184 | victim_pid, cause, vm_page_free_count, error, 0); | |
39236c6e | 1185 | |
3e170ce0 | 1186 | vm_wake_compactor_swapper(); |
fe8ab488 A |
1187 | |
1188 | return (error == 0); | |
316670eb A |
1189 | } |
1190 | ||
1191 | /* | |
1192 | * Node manipulation | |
1193 | */ | |
1194 | ||
1195 | static void | |
39236c6e A |
1196 | memorystatus_check_levels_locked(void) { |
1197 | #if CONFIG_JETSAM | |
1198 | /* Update levels */ | |
1199 | memorystatus_update_levels_locked(TRUE); | |
1200 | #endif | |
1201 | } | |
316670eb | 1202 | |
39236c6e A |
1203 | static void |
1204 | memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2) | |
1205 | { | |
1206 | proc_t p; | |
1207 | uint64_t current_time; | |
1208 | memstat_bucket_t *demotion_bucket; | |
1209 | ||
1210 | MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion()\n"); | |
1211 | ||
1212 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_START, 0, 0, 0, 0, 0); | |
1213 | ||
1214 | current_time = mach_absolute_time(); | |
1215 | ||
1216 | proc_list_lock(); | |
316670eb | 1217 | |
39236c6e A |
1218 | demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; |
1219 | p = TAILQ_FIRST(&demotion_bucket->list); | |
1220 | ||
1221 | while (p) { | |
1222 | MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion() found %d\n", p->p_pid); | |
1223 | ||
1224 | assert(p->p_memstat_idledeadline); | |
1225 | assert(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS); | |
1226 | assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED); | |
1227 | ||
1228 | if (current_time >= p->p_memstat_idledeadline) { | |
1229 | #if DEBUG || DEVELOPMENT | |
1230 | if (!(p->p_memstat_dirty & P_DIRTY_MARKED)) { | |
fe8ab488 A |
1231 | printf("memorystatus_perform_idle_demotion: moving process %d [%s] to idle band, but never dirtied (0x%x)!\n", |
1232 | p->p_pid, (p->p_comm ? p->p_comm : "(unknown)"), p->p_memstat_dirty); | |
39236c6e A |
1233 | } |
1234 | #endif | |
1235 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
fe8ab488 | 1236 | memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, false); |
39236c6e A |
1237 | |
1238 | // The prior process has moved out of the demotion bucket, so grab the new head and continue | |
1239 | p = TAILQ_FIRST(&demotion_bucket->list); | |
1240 | continue; | |
316670eb | 1241 | } |
39236c6e A |
1242 | |
1243 | // No further candidates | |
1244 | break; | |
316670eb | 1245 | } |
39236c6e A |
1246 | |
1247 | memorystatus_reschedule_idle_demotion_locked(); | |
1248 | ||
1249 | proc_list_unlock(); | |
316670eb | 1250 | |
39236c6e | 1251 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_END, 0, 0, 0, 0, 0); |
316670eb A |
1252 | } |
1253 | ||
1254 | static void | |
39236c6e A |
1255 | memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state) |
1256 | { | |
fe8ab488 A |
1257 | boolean_t present_in_deferred_bucket = FALSE; |
1258 | ||
1259 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
1260 | present_in_deferred_bucket = TRUE; | |
1261 | } | |
1262 | ||
3e170ce0 | 1263 | MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for pid %d (dirty:0x%x, set_state %d, demotions %d).\n", |
39236c6e | 1264 | p->p_pid, p->p_memstat_dirty, set_state, memorystatus_scheduled_idle_demotions); |
316670eb | 1265 | |
fe8ab488 | 1266 | assert((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED); |
316670eb | 1267 | |
39236c6e A |
1268 | if (set_state) { |
1269 | assert(p->p_memstat_idledeadline == 0); | |
fe8ab488 | 1270 | p->p_memstat_dirty |= P_DIRTY_DEFER_IN_PROGRESS; |
39236c6e | 1271 | p->p_memstat_idledeadline = mach_absolute_time() + memorystatus_idle_delay_time; |
316670eb | 1272 | } |
39236c6e | 1273 | |
fe8ab488 | 1274 | assert(p->p_memstat_idledeadline); |
39236c6e | 1275 | |
fe8ab488 A |
1276 | if (present_in_deferred_bucket == FALSE) { |
1277 | memorystatus_scheduled_idle_demotions++; | |
1278 | } | |
316670eb A |
1279 | } |
1280 | ||
39236c6e A |
1281 | static void |
1282 | memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state) | |
316670eb | 1283 | { |
fe8ab488 A |
1284 | boolean_t present_in_deferred_bucket = FALSE; |
1285 | ||
1286 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
1287 | present_in_deferred_bucket = TRUE; | |
1288 | assert(p->p_memstat_idledeadline); | |
1289 | } | |
1290 | ||
3e170ce0 | 1291 | MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for pid %d (clear_state %d, demotions %d).\n", |
39236c6e A |
1292 | p->p_pid, clear_state, memorystatus_scheduled_idle_demotions); |
1293 | ||
39236c6e A |
1294 | |
1295 | if (clear_state) { | |
1296 | p->p_memstat_idledeadline = 0; | |
1297 | p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; | |
316670eb | 1298 | } |
39236c6e | 1299 | |
fe8ab488 A |
1300 | if (present_in_deferred_bucket == TRUE) { |
1301 | memorystatus_scheduled_idle_demotions--; | |
1302 | } | |
1303 | ||
39236c6e | 1304 | assert(memorystatus_scheduled_idle_demotions >= 0); |
316670eb A |
1305 | } |
1306 | ||
1307 | static void | |
39236c6e A |
1308 | memorystatus_reschedule_idle_demotion_locked(void) { |
1309 | if (0 == memorystatus_scheduled_idle_demotions) { | |
1310 | if (memstat_idle_demotion_deadline) { | |
1311 | /* Transitioned 1->0, so cancel next call */ | |
1312 | thread_call_cancel(memorystatus_idle_demotion_call); | |
1313 | memstat_idle_demotion_deadline = 0; | |
1314 | } | |
1315 | } else { | |
1316 | memstat_bucket_t *demotion_bucket; | |
1317 | proc_t p; | |
1318 | demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; | |
1319 | p = TAILQ_FIRST(&demotion_bucket->list); | |
39236c6e | 1320 | |
fe8ab488 A |
1321 | assert(p && p->p_memstat_idledeadline); |
1322 | ||
1323 | if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline){ | |
1324 | thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline); | |
1325 | memstat_idle_demotion_deadline = p->p_memstat_idledeadline; | |
39236c6e A |
1326 | } |
1327 | } | |
316670eb A |
1328 | } |
1329 | ||
1330 | /* | |
1331 | * List manipulation | |
1332 | */ | |
1333 | ||
39236c6e A |
1334 | int |
1335 | memorystatus_add(proc_t p, boolean_t locked) | |
316670eb | 1336 | { |
39236c6e A |
1337 | memstat_bucket_t *bucket; |
1338 | ||
3e170ce0 | 1339 | MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding pid %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority); |
39236c6e A |
1340 | |
1341 | if (!locked) { | |
1342 | proc_list_lock(); | |
1343 | } | |
1344 | ||
1345 | /* Processes marked internal do not have priority tracked */ | |
1346 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { | |
1347 | goto exit; | |
1348 | } | |
1349 | ||
1350 | bucket = &memstat_bucket[p->p_memstat_effectivepriority]; | |
1351 | ||
fe8ab488 A |
1352 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { |
1353 | assert(bucket->count == memorystatus_scheduled_idle_demotions); | |
1354 | } | |
1355 | ||
39236c6e A |
1356 | TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list); |
1357 | bucket->count++; | |
316670eb | 1358 | |
39236c6e | 1359 | memorystatus_list_count++; |
316670eb | 1360 | |
39236c6e A |
1361 | memorystatus_check_levels_locked(); |
1362 | ||
1363 | exit: | |
1364 | if (!locked) { | |
1365 | proc_list_unlock(); | |
1366 | } | |
1367 | ||
1368 | return 0; | |
1369 | } | |
316670eb | 1370 | |
3e170ce0 A |
1371 | /* |
1372 | * Description: | |
1373 | * Moves a process from one jetsam bucket to another. | |
1374 | * which changes the LRU position of the process. | |
1375 | * | |
1376 | * Monitors transition between buckets and if necessary | |
1377 | * will update cached memory limits accordingly. | |
1378 | */ | |
39236c6e | 1379 | static void |
fe8ab488 | 1380 | memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert) |
39236c6e A |
1381 | { |
1382 | memstat_bucket_t *old_bucket, *new_bucket; | |
1383 | ||
1384 | assert(priority < MEMSTAT_BUCKET_COUNT); | |
1385 | ||
1386 | /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */ | |
1387 | if ((p->p_listflag & P_LIST_EXITED) != 0) { | |
1388 | return; | |
316670eb | 1389 | } |
39236c6e | 1390 | |
3e170ce0 | 1391 | MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting pid %d to priority %d, inserting at %s\n", |
fe8ab488 | 1392 | p->p_pid, priority, head_insert ? "head" : "tail"); |
316670eb | 1393 | |
39236c6e | 1394 | old_bucket = &memstat_bucket[p->p_memstat_effectivepriority]; |
fe8ab488 A |
1395 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { |
1396 | assert(old_bucket->count == (memorystatus_scheduled_idle_demotions + 1)); | |
1397 | } | |
1398 | ||
39236c6e A |
1399 | TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list); |
1400 | old_bucket->count--; | |
316670eb | 1401 | |
39236c6e | 1402 | new_bucket = &memstat_bucket[priority]; |
fe8ab488 A |
1403 | if (head_insert) |
1404 | TAILQ_INSERT_HEAD(&new_bucket->list, p, p_memstat_list); | |
1405 | else | |
1406 | TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list); | |
39236c6e | 1407 | new_bucket->count++; |
3e170ce0 | 1408 | |
39236c6e | 1409 | #if CONFIG_JETSAM |
3e170ce0 A |
1410 | if (memorystatus_highwater_enabled) { |
1411 | boolean_t trigger_exception; | |
1412 | ||
1413 | /* | |
1414 | * If cached limit data is updated, then the limits | |
1415 | * will be enforced by writing to the ledgers. | |
1416 | */ | |
1417 | boolean_t ledger_update_needed = TRUE; | |
fe8ab488 A |
1418 | |
1419 | /* | |
3e170ce0 A |
1420 | * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore. |
1421 | * Background limits are described via the inactive limit slots. | |
1422 | * | |
1423 | * Here, we must update the cached memory limit if the task | |
1424 | * is transitioning between: | |
1425 | * active <--> inactive | |
1426 | * FG <--> BG | |
1427 | * but: | |
1428 | * dirty <--> clean is ignored | |
1429 | * | |
1430 | * We bypass processes that have opted into dirty tracking because | |
1431 | * a move between buckets does not imply a transition between the | |
1432 | * dirty <--> clean state. | |
1433 | * Setting limits on processes opted into dirty tracking is handled | |
1434 | * in memorystatus_dirty_set() where the transition is very clear. | |
fe8ab488 A |
1435 | */ |
1436 | ||
3e170ce0 A |
1437 | if (p->p_memstat_dirty & P_DIRTY_TRACK) { |
1438 | ||
1439 | ledger_update_needed = FALSE; | |
1440 | ||
1441 | } else if ((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) { | |
1442 | /* | |
1443 | * inactive --> active | |
1444 | * BG --> FG | |
1445 | * assign active state | |
1446 | */ | |
1447 | CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception); | |
1448 | ||
1449 | } else if ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { | |
1450 | /* | |
1451 | * active --> inactive | |
1452 | * FG --> BG | |
1453 | * assign inactive state | |
1454 | */ | |
1455 | CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception); | |
1456 | } else { | |
1457 | /* | |
1458 | * The transition between jetsam priority buckets apparently did | |
1459 | * not affect active/inactive state. | |
1460 | * This is not unusual... especially during startup when | |
1461 | * processes are getting established in their respective bands. | |
1462 | */ | |
1463 | ledger_update_needed = FALSE; | |
1464 | } | |
1465 | ||
1466 | /* | |
1467 | * Enforce the new limits by writing to the ledger | |
1468 | */ | |
1469 | if (ledger_update_needed) { | |
1470 | task_set_phys_footprint_limit_internal(p->task, (p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1, NULL, trigger_exception); | |
1471 | ||
1472 | MEMORYSTATUS_DEBUG(3, "memorystatus_update_priority_locked: new limit on pid %d (%dMB %s) priority old --> new (%d --> %d) dirty?=0x%x %s\n", | |
1473 | p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), | |
1474 | (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, priority, p->p_memstat_dirty, | |
1475 | (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); | |
39236c6e A |
1476 | } |
1477 | } | |
3e170ce0 A |
1478 | |
1479 | #endif /* CONFIG_JETSAM */ | |
39236c6e A |
1480 | |
1481 | p->p_memstat_effectivepriority = priority; | |
1482 | ||
1483 | memorystatus_check_levels_locked(); | |
316670eb A |
1484 | } |
1485 | ||
3e170ce0 A |
1486 | /* |
1487 | * | |
1488 | * Description: Update the jetsam priority and memory limit attributes for a given process. | |
1489 | * | |
1490 | * Parameters: | |
1491 | * p init this process's jetsam information. | |
1492 | * priority The jetsam priority band | |
1493 | * user_data user specific data, unused by the kernel | |
1494 | * effective guards against race if process's update already occurred | |
1495 | * update_memlimit When true we know this is the init step via the posix_spawn path. | |
1496 | * | |
1497 | * memlimit_active Value in megabytes; The monitored footprint level while the | |
1498 | * process is active. Exceeding it may result in termination | |
1499 | * based on it's associated fatal flag. | |
1500 | * | |
1501 | * memlimit_active_is_fatal When a process is active and exceeds its memory footprint, | |
1502 | * this describes whether or not it should be immediately fatal. | |
1503 | * | |
1504 | * memlimit_inactive Value in megabytes; The monitored footprint level while the | |
1505 | * process is inactive. Exceeding it may result in termination | |
1506 | * based on it's associated fatal flag. | |
1507 | * | |
1508 | * memlimit_inactive_is_fatal When a process is inactive and exceeds its memory footprint, | |
1509 | * this describes whether or not it should be immediatly fatal. | |
1510 | * | |
1511 | * memlimit_background This process has a high-water-mark while in the background. | |
1512 | * No longer meaningful. Background limits are described via | |
1513 | * the inactive slots. Flag is ignored. | |
1514 | * | |
1515 | * | |
1516 | * Returns: 0 Success | |
1517 | * non-0 Failure | |
1518 | */ | |
1519 | ||
39236c6e | 1520 | int |
3e170ce0 A |
1521 | memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, |
1522 | int32_t memlimit_active, boolean_t memlimit_active_is_fatal, | |
1523 | int32_t memlimit_inactive, boolean_t memlimit_inactive_is_fatal, | |
1524 | __unused boolean_t memlimit_background) | |
316670eb | 1525 | { |
39236c6e | 1526 | int ret; |
fe8ab488 | 1527 | boolean_t head_insert = false; |
39236c6e | 1528 | |
316670eb | 1529 | #if !CONFIG_JETSAM |
3e170ce0 A |
1530 | #pragma unused(update_memlimit, memlimit_active, memlimit_inactive) |
1531 | #pragma unused(memlimit_active_is_fatal, memlimit_inactive_is_fatal) | |
1532 | #endif /* !CONFIG_JETSAM */ | |
1533 | ||
1534 | MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing pid %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data); | |
316670eb | 1535 | |
39236c6e A |
1536 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0); |
1537 | ||
1538 | if (priority == -1) { | |
1539 | /* Use as shorthand for default priority */ | |
1540 | priority = JETSAM_PRIORITY_DEFAULT; | |
1541 | } else if (priority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
1542 | /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */ | |
1543 | priority = JETSAM_PRIORITY_IDLE; | |
fe8ab488 A |
1544 | } else if (priority == JETSAM_PRIORITY_IDLE_HEAD) { |
1545 | /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle queue */ | |
1546 | priority = JETSAM_PRIORITY_IDLE; | |
3e170ce0 | 1547 | head_insert = TRUE; |
39236c6e A |
1548 | } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) { |
1549 | /* Sanity check */ | |
1550 | ret = EINVAL; | |
1551 | goto out; | |
316670eb | 1552 | } |
3e170ce0 | 1553 | |
39236c6e A |
1554 | proc_list_lock(); |
1555 | ||
1556 | assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); | |
316670eb | 1557 | |
39236c6e A |
1558 | if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) { |
1559 | ret = EALREADY; | |
1560 | proc_list_unlock(); | |
fe8ab488 A |
1561 | MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", p->p_pid); |
1562 | goto out; | |
1563 | } | |
1564 | ||
1565 | if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) || ((p->p_listflag & P_LIST_EXITED) != 0)) { | |
1566 | /* | |
1567 | * This could happen when a process calling posix_spawn() is exiting on the jetsam thread. | |
1568 | */ | |
1569 | ret = EBUSY; | |
1570 | proc_list_unlock(); | |
316670eb A |
1571 | goto out; |
1572 | } | |
1573 | ||
39236c6e A |
1574 | p->p_memstat_state |= P_MEMSTAT_PRIORITYUPDATED; |
1575 | p->p_memstat_userdata = user_data; | |
1576 | p->p_memstat_requestedpriority = priority; | |
1577 | ||
1578 | #if CONFIG_JETSAM | |
1579 | if (update_memlimit) { | |
3e170ce0 A |
1580 | boolean_t trigger_exception; |
1581 | ||
1582 | /* | |
1583 | * Posix_spawn'd processes come through this path to instantiate ledger limits. | |
1584 | * Forked processes do not come through this path, so no ledger limits exist. | |
1585 | * (That's why forked processes can consume unlimited memory.) | |
1586 | */ | |
1587 | ||
1588 | MEMORYSTATUS_DEBUG(3, "memorystatus_update(enter): pid %d, priority %d, dirty=0x%x, Active(%dMB %s), Inactive(%dMB, %s)\n", | |
1589 | p->p_pid, priority, p->p_memstat_dirty, | |
1590 | memlimit_active, (memlimit_active_is_fatal ? "F " : "NF"), | |
1591 | memlimit_inactive, (memlimit_inactive_is_fatal ? "F " : "NF")); | |
1592 | ||
39236c6e | 1593 | if (memlimit_background) { |
fe8ab488 | 1594 | |
3e170ce0 A |
1595 | /* |
1596 | * With 2-level HWM support, we no longer honor P_MEMSTAT_MEMLIMIT_BACKGROUND. | |
1597 | * Background limits are described via the inactive limit slots. | |
1598 | */ | |
1599 | ||
1600 | // p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND; | |
fe8ab488 | 1601 | |
3e170ce0 A |
1602 | #if DEVELOPMENT || DEBUG |
1603 | printf("memorystatus_update: WARNING %s[%d] set unused flag P_MEMSTAT_MEMLIMIT_BACKGROUND [A==%dMB %s] [IA==%dMB %s]\n", | |
1604 | (p->p_comm ? p->p_comm : "unknown"), p->p_pid, | |
1605 | memlimit_active, (memlimit_active_is_fatal ? "F " : "NF"), | |
1606 | memlimit_inactive, (memlimit_inactive_is_fatal ? "F " : "NF")); | |
1607 | #endif /* DEVELOPMENT || DEBUG */ | |
1608 | } | |
1609 | ||
1610 | if (memlimit_active <= 0) { | |
1611 | /* | |
1612 | * This process will have a system_wide task limit when active. | |
1613 | * System_wide task limit is always fatal. | |
1614 | * It's quite common to see non-fatal flag passed in here. | |
1615 | * It's not an error, we just ignore it. | |
1616 | */ | |
1617 | ||
1618 | /* | |
1619 | * For backward compatibility with some unexplained launchd behavior, | |
1620 | * we allow a zero sized limit. But we still enforce system_wide limit | |
1621 | * when written to the ledgers. | |
1622 | */ | |
1623 | ||
1624 | if (memlimit_active < 0) { | |
1625 | memlimit_active = -1; /* enforces system_wide task limit */ | |
39236c6e | 1626 | } |
3e170ce0 | 1627 | memlimit_active_is_fatal = TRUE; |
316670eb | 1628 | } |
3e170ce0 A |
1629 | |
1630 | if (memlimit_inactive <= 0) { | |
1631 | /* | |
1632 | * This process will have a system_wide task limit when inactive. | |
1633 | * System_wide task limit is always fatal. | |
1634 | */ | |
1635 | ||
1636 | memlimit_inactive = -1; | |
1637 | memlimit_inactive_is_fatal = TRUE; | |
fe8ab488 | 1638 | } |
316670eb | 1639 | |
3e170ce0 A |
1640 | /* |
1641 | * Initialize the active limit variants for this process. | |
1642 | */ | |
1643 | SET_ACTIVE_LIMITS_LOCKED(p, memlimit_active, memlimit_active_is_fatal); | |
1644 | ||
1645 | /* | |
1646 | * Initialize the inactive limit variants for this process. | |
1647 | */ | |
1648 | SET_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive, memlimit_inactive_is_fatal); | |
1649 | ||
1650 | /* | |
1651 | * Initialize the cached limits for target process. | |
1652 | * When the target process is dirty tracked, it's typically | |
1653 | * in a clean state. Non dirty tracked processes are | |
1654 | * typically active (Foreground or above). | |
1655 | * But just in case, we don't make assumptions... | |
1656 | */ | |
1657 | ||
1658 | if (proc_jetsam_state_is_active_locked(p) == TRUE) { | |
1659 | CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception); | |
1660 | } else { | |
1661 | CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception); | |
1662 | } | |
1663 | ||
1664 | /* | |
1665 | * Enforce the cached limit by writing to the ledger. | |
1666 | */ | |
1667 | if (memorystatus_highwater_enabled) { | |
1668 | /* apply now */ | |
1669 | assert(trigger_exception == TRUE); | |
1670 | task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, trigger_exception); | |
1671 | ||
1672 | MEMORYSTATUS_DEBUG(3, "memorystatus_update: init: limit on pid %d (%dMB %s) targeting priority(%d) dirty?=0x%x %s\n", | |
1673 | p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), | |
1674 | (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), priority, p->p_memstat_dirty, | |
1675 | (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); | |
1676 | } | |
1677 | } | |
1678 | #endif /* CONFIG_JETSAM */ | |
1679 | ||
1680 | /* | |
1681 | * We can't add to the JETSAM_PRIORITY_IDLE_DEFERRED bucket here. | |
1682 | * But, we could be removing it from the bucket. | |
1683 | * Check and take appropriate steps if so. | |
1684 | */ | |
1685 | ||
1686 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
1687 | ||
fe8ab488 A |
1688 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); |
1689 | } | |
1690 | ||
1691 | memorystatus_update_priority_locked(p, priority, head_insert); | |
39236c6e A |
1692 | |
1693 | proc_list_unlock(); | |
1694 | ret = 0; | |
316670eb A |
1695 | |
1696 | out: | |
39236c6e A |
1697 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_END, ret, 0, 0, 0, 0); |
1698 | ||
316670eb A |
1699 | return ret; |
1700 | } | |
1701 | ||
39236c6e A |
1702 | int |
1703 | memorystatus_remove(proc_t p, boolean_t locked) | |
316670eb | 1704 | { |
39236c6e A |
1705 | int ret; |
1706 | memstat_bucket_t *bucket; | |
316670eb | 1707 | |
3e170ce0 | 1708 | MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing pid %d\n", p->p_pid); |
316670eb | 1709 | |
39236c6e A |
1710 | if (!locked) { |
1711 | proc_list_lock(); | |
1712 | } | |
316670eb | 1713 | |
39236c6e | 1714 | assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); |
fe8ab488 | 1715 | |
39236c6e | 1716 | bucket = &memstat_bucket[p->p_memstat_effectivepriority]; |
fe8ab488 A |
1717 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { |
1718 | assert(bucket->count == memorystatus_scheduled_idle_demotions); | |
1719 | } | |
1720 | ||
39236c6e A |
1721 | TAILQ_REMOVE(&bucket->list, p, p_memstat_list); |
1722 | bucket->count--; | |
1723 | ||
1724 | memorystatus_list_count--; | |
316670eb | 1725 | |
39236c6e A |
1726 | /* If awaiting demotion to the idle band, clean up */ |
1727 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
1728 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1729 | memorystatus_reschedule_idle_demotion_locked(); | |
1730 | } | |
316670eb | 1731 | |
39236c6e A |
1732 | memorystatus_check_levels_locked(); |
1733 | ||
1734 | #if CONFIG_FREEZE | |
1735 | if (p->p_memstat_state & (P_MEMSTAT_FROZEN)) { | |
1736 | memorystatus_frozen_count--; | |
1737 | } | |
316670eb | 1738 | |
39236c6e A |
1739 | if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { |
1740 | memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint; | |
1741 | memorystatus_suspended_count--; | |
316670eb | 1742 | } |
39236c6e A |
1743 | #endif |
1744 | ||
1745 | if (!locked) { | |
1746 | proc_list_unlock(); | |
1747 | } | |
316670eb | 1748 | |
39236c6e A |
1749 | if (p) { |
1750 | ret = 0; | |
316670eb | 1751 | } else { |
39236c6e | 1752 | ret = ESRCH; |
316670eb A |
1753 | } |
1754 | ||
1755 | return ret; | |
1756 | } | |
1757 | ||
3e170ce0 A |
1758 | /* |
1759 | * Validate dirty tracking flags with process state. | |
1760 | * | |
1761 | * Return: | |
1762 | * 0 on success | |
1763 | * non-0 on failure | |
1764 | */ | |
1765 | ||
1766 | static int | |
39236c6e A |
1767 | memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) { |
1768 | /* See that the process isn't marked for termination */ | |
1769 | if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) { | |
3e170ce0 | 1770 | return EBUSY; |
316670eb A |
1771 | } |
1772 | ||
39236c6e A |
1773 | /* Idle exit requires that process be tracked */ |
1774 | if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) && | |
1775 | !(pcontrol & PROC_DIRTY_TRACK)) { | |
3e170ce0 | 1776 | return EINVAL; |
39236c6e A |
1777 | } |
1778 | ||
fe8ab488 A |
1779 | /* 'Launch in progress' tracking requires that process have enabled dirty tracking too. */ |
1780 | if ((pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) && | |
1781 | !(pcontrol & PROC_DIRTY_TRACK)) { | |
3e170ce0 | 1782 | return EINVAL; |
fe8ab488 A |
1783 | } |
1784 | ||
39236c6e A |
1785 | /* Deferral is only relevant if idle exit is specified */ |
1786 | if ((pcontrol & PROC_DIRTY_DEFER) && | |
1787 | !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) { | |
3e170ce0 | 1788 | return EINVAL; |
316670eb A |
1789 | } |
1790 | ||
3e170ce0 | 1791 | return(0); |
316670eb | 1792 | } |
593a1d5f | 1793 | |
39236c6e A |
1794 | static void |
1795 | memorystatus_update_idle_priority_locked(proc_t p) { | |
1796 | int32_t priority; | |
3e170ce0 | 1797 | |
39236c6e A |
1798 | MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty); |
1799 | ||
1800 | if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) { | |
1801 | priority = (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) ? JETSAM_PRIORITY_IDLE_DEFERRED : JETSAM_PRIORITY_IDLE; | |
1802 | } else { | |
1803 | priority = p->p_memstat_requestedpriority; | |
1804 | } | |
1805 | ||
fe8ab488 A |
1806 | if (priority != p->p_memstat_effectivepriority) { |
1807 | memorystatus_update_priority_locked(p, priority, false); | |
1808 | } | |
39236c6e A |
1809 | } |
1810 | ||
1811 | /* | |
1812 | * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle | |
1813 | * (clean). They may also indicate that they support termination when idle, with the result that they are promoted | |
1814 | * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low | |
1815 | * priority idle band when clean (and killed earlier, protecting higher priority procesess). | |
1816 | * | |
1817 | * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by | |
1818 | * memorystatus_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band | |
1819 | * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to | |
1820 | * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle | |
1821 | * band. The deferral can be cleared early by clearing the appropriate flag. | |
1822 | * | |
1823 | * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process | |
1824 | * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be | |
1825 | * re-enabled or the guard state cleared, depending on whether the guard deadline has passed. | |
1826 | */ | |
1827 | ||
1828 | int | |
1829 | memorystatus_dirty_track(proc_t p, uint32_t pcontrol) { | |
1830 | unsigned int old_dirty; | |
1831 | boolean_t reschedule = FALSE; | |
fe8ab488 A |
1832 | boolean_t already_deferred = FALSE; |
1833 | boolean_t defer_now = FALSE; | |
3e170ce0 | 1834 | int ret = 0; |
39236c6e | 1835 | |
fe8ab488 A |
1836 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_TRACK), |
1837 | p->p_pid, p->p_memstat_dirty, pcontrol, 0, 0); | |
1838 | ||
39236c6e | 1839 | proc_list_lock(); |
316670eb | 1840 | |
fe8ab488 A |
1841 | if ((p->p_listflag & P_LIST_EXITED) != 0) { |
1842 | /* | |
1843 | * Process is on its way out. | |
1844 | */ | |
1845 | ret = EBUSY; | |
1846 | goto exit; | |
1847 | } | |
1848 | ||
39236c6e A |
1849 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { |
1850 | ret = EPERM; | |
1851 | goto exit; | |
316670eb A |
1852 | } |
1853 | ||
3e170ce0 A |
1854 | if ((ret = memorystatus_validate_track_flags(p, pcontrol)) != 0) { |
1855 | /* error */ | |
39236c6e | 1856 | goto exit; |
3e170ce0 | 1857 | } |
39236c6e A |
1858 | |
1859 | old_dirty = p->p_memstat_dirty; | |
1860 | ||
1861 | /* These bits are cumulative, as per <rdar://problem/11159924> */ | |
1862 | if (pcontrol & PROC_DIRTY_TRACK) { | |
1863 | p->p_memstat_dirty |= P_DIRTY_TRACK; | |
1864 | } | |
1865 | ||
1866 | if (pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) { | |
1867 | p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT; | |
1868 | } | |
1869 | ||
fe8ab488 A |
1870 | if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) { |
1871 | p->p_memstat_dirty |= P_DIRTY_LAUNCH_IN_PROGRESS; | |
1872 | } | |
1873 | ||
1874 | if (old_dirty & P_DIRTY_DEFER_IN_PROGRESS) { | |
1875 | already_deferred = TRUE; | |
1876 | } | |
1877 | ||
39236c6e | 1878 | /* This can be set and cleared exactly once. */ |
fe8ab488 A |
1879 | if (pcontrol & PROC_DIRTY_DEFER) { |
1880 | ||
1881 | if ( !(old_dirty & P_DIRTY_DEFER)) { | |
1882 | p->p_memstat_dirty |= P_DIRTY_DEFER; | |
1883 | } | |
1884 | ||
1885 | defer_now = TRUE; | |
39236c6e A |
1886 | } |
1887 | ||
3e170ce0 | 1888 | MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for pid %d\n", |
39236c6e | 1889 | ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N", |
fe8ab488 | 1890 | defer_now ? "Y" : "N", |
39236c6e A |
1891 | p->p_memstat_dirty & P_DIRTY ? "Y" : "N", |
1892 | p->p_pid); | |
1893 | ||
1894 | /* Kick off or invalidate the idle exit deferment if there's a state transition. */ | |
1895 | if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) { | |
1896 | if (((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) && | |
fe8ab488 A |
1897 | defer_now && !already_deferred) { |
1898 | ||
1899 | /* | |
1900 | * Request to defer a clean process that's idle-exit enabled | |
1901 | * and not already in the jetsam deferred band. | |
1902 | */ | |
39236c6e A |
1903 | memorystatus_schedule_idle_demotion_locked(p, TRUE); |
1904 | reschedule = TRUE; | |
fe8ab488 A |
1905 | |
1906 | } else if (!defer_now && already_deferred) { | |
1907 | ||
1908 | /* | |
1909 | * Either the process is no longer idle-exit enabled OR | |
1910 | * there's a request to cancel a currently active deferral. | |
1911 | */ | |
1912 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1913 | reschedule = TRUE; | |
1914 | } | |
1915 | } else { | |
1916 | ||
1917 | /* | |
1918 | * We are trying to operate on a dirty process. Dirty processes have to | |
1919 | * be removed from the deferred band. The question is do we reset the | |
1920 | * deferred state or not? | |
1921 | * | |
1922 | * This could be a legal request like: | |
1923 | * - this process had opted into the JETSAM_DEFERRED band | |
1924 | * - but it's now dirty and requests to opt out. | |
1925 | * In this case, we remove the process from the band and reset its | |
1926 | * state too. It'll opt back in properly when needed. | |
1927 | * | |
1928 | * OR, this request could be a user-space bug. E.g.: | |
1929 | * - this process had opted into the JETSAM_DEFERRED band when clean | |
1930 | * - and, then issues another request to again put it into the band except | |
1931 | * this time the process is dirty. | |
1932 | * The process going dirty, as a transition in memorystatus_dirty_set(), will pull the process out of | |
1933 | * the deferred band with its state intact. So our request below is no-op. | |
1934 | * But we do it here anyways for coverage. | |
1935 | * | |
1936 | * memorystatus_update_idle_priority_locked() | |
1937 | * single-mindedly treats a dirty process as "cannot be in the deferred band". | |
1938 | */ | |
1939 | ||
1940 | if (!defer_now && already_deferred) { | |
39236c6e A |
1941 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); |
1942 | reschedule = TRUE; | |
fe8ab488 A |
1943 | } else { |
1944 | memorystatus_invalidate_idle_demotion_locked(p, FALSE); | |
1945 | reschedule = TRUE; | |
316670eb A |
1946 | } |
1947 | } | |
39236c6e A |
1948 | |
1949 | memorystatus_update_idle_priority_locked(p); | |
1950 | ||
1951 | if (reschedule) { | |
1952 | memorystatus_reschedule_idle_demotion_locked(); | |
1953 | } | |
1954 | ||
1955 | ret = 0; | |
316670eb | 1956 | |
39236c6e A |
1957 | exit: |
1958 | proc_list_unlock(); | |
316670eb A |
1959 | |
1960 | return ret; | |
1961 | } | |
2d21ac55 | 1962 | |
39236c6e A |
1963 | int |
1964 | memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) { | |
1965 | int ret; | |
1966 | boolean_t kill = false; | |
1967 | boolean_t reschedule = FALSE; | |
1968 | boolean_t was_dirty = FALSE; | |
1969 | boolean_t now_dirty = FALSE; | |
6d2010ae | 1970 | |
39236c6e | 1971 | MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty); |
fe8ab488 A |
1972 | |
1973 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_SET), p->p_pid, self, pcontrol, 0, 0); | |
b0d623f7 | 1974 | |
39236c6e A |
1975 | proc_list_lock(); |
1976 | ||
fe8ab488 A |
1977 | if ((p->p_listflag & P_LIST_EXITED) != 0) { |
1978 | /* | |
1979 | * Process is on its way out. | |
1980 | */ | |
1981 | ret = EBUSY; | |
1982 | goto exit; | |
1983 | } | |
1984 | ||
39236c6e A |
1985 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { |
1986 | ret = EPERM; | |
1987 | goto exit; | |
1988 | } | |
1989 | ||
1990 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) | |
1991 | was_dirty = TRUE; | |
1992 | ||
1993 | if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { | |
1994 | /* Dirty tracking not enabled */ | |
1995 | ret = EINVAL; | |
1996 | } else if (pcontrol && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { | |
1997 | /* | |
1998 | * Process is set to be terminated and we're attempting to mark it dirty. | |
1999 | * Set for termination and marking as clean is OK - see <rdar://problem/10594349>. | |
2000 | */ | |
2001 | ret = EBUSY; | |
2002 | } else { | |
2003 | int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN; | |
2004 | if (pcontrol && !(p->p_memstat_dirty & flag)) { | |
2005 | /* Mark the process as having been dirtied at some point */ | |
2006 | p->p_memstat_dirty |= (flag | P_DIRTY_MARKED); | |
2007 | memorystatus_dirty_count++; | |
2008 | ret = 0; | |
2009 | } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) { | |
3e170ce0 | 2010 | if ((flag == P_DIRTY_SHUTDOWN) && (!(p->p_memstat_dirty & P_DIRTY))) { |
39236c6e A |
2011 | /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */ |
2012 | p->p_memstat_dirty |= P_DIRTY_TERMINATED; | |
2013 | kill = true; | |
2014 | } else if ((flag == P_DIRTY) && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { | |
2015 | /* Kill previously terminated processes if set clean */ | |
2016 | kill = true; | |
2017 | } | |
2018 | p->p_memstat_dirty &= ~flag; | |
2019 | memorystatus_dirty_count--; | |
2020 | ret = 0; | |
2021 | } else { | |
2022 | /* Already set */ | |
2023 | ret = EALREADY; | |
316670eb | 2024 | } |
39236c6e A |
2025 | } |
2026 | ||
2027 | if (ret != 0) { | |
2028 | goto exit; | |
2029 | } | |
3e170ce0 | 2030 | |
39236c6e A |
2031 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) |
2032 | now_dirty = TRUE; | |
2033 | ||
2034 | if ((was_dirty == TRUE && now_dirty == FALSE) || | |
2035 | (was_dirty == FALSE && now_dirty == TRUE)) { | |
2036 | ||
2037 | /* Manage idle exit deferral, if applied */ | |
2038 | if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) == | |
2039 | (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) { | |
fe8ab488 A |
2040 | |
2041 | /* | |
2042 | * P_DIRTY_DEFER_IN_PROGRESS means the process is in the deferred band OR it might be heading back | |
2043 | * there once it's clean again and has some protection window left. | |
2044 | */ | |
2045 | ||
39236c6e | 2046 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { |
fe8ab488 A |
2047 | /* |
2048 | * New dirty process i.e. "was_dirty == FALSE && now_dirty == TRUE" | |
2049 | * | |
2050 | * The process will move from the deferred band to its higher requested | |
2051 | * jetsam band. But we don't clear its state i.e. we want to remember that | |
2052 | * this process was part of the "deferred" band and will return to it. | |
2053 | * | |
2054 | * This way, we don't let it age beyond the protection | |
2055 | * window when it returns to "clean". All the while giving | |
2056 | * it a chance to perform its work while "dirty". | |
2057 | * | |
2058 | */ | |
39236c6e A |
2059 | memorystatus_invalidate_idle_demotion_locked(p, FALSE); |
2060 | reschedule = TRUE; | |
2061 | } else { | |
fe8ab488 A |
2062 | |
2063 | /* | |
2064 | * Process is back from "dirty" to "clean". | |
2065 | * | |
2066 | * Is its timer up OR does it still have some protection | |
2067 | * window left? | |
2068 | */ | |
2069 | ||
39236c6e | 2070 | if (mach_absolute_time() >= p->p_memstat_idledeadline) { |
fe8ab488 A |
2071 | /* |
2072 | * The process' deadline has expired. It currently | |
2073 | * does not reside in the DEFERRED bucket. | |
2074 | * | |
2075 | * It's on its way to the JETSAM_PRIORITY_IDLE | |
2076 | * bucket via memorystatus_update_idle_priority_locked() | |
2077 | * below. | |
2078 | ||
2079 | * So all we need to do is reset all the state on the | |
2080 | * process that's related to the DEFERRED bucket i.e. | |
2081 | * the DIRTY_DEFER_IN_PROGRESS flag and the timer deadline. | |
2082 | * | |
2083 | */ | |
2084 | ||
2085 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
2086 | reschedule = TRUE; | |
39236c6e | 2087 | } else { |
fe8ab488 A |
2088 | /* |
2089 | * It still has some protection window left and so | |
2090 | * we just re-arm the timer without modifying any | |
2091 | * state on the process. | |
2092 | */ | |
39236c6e A |
2093 | memorystatus_schedule_idle_demotion_locked(p, FALSE); |
2094 | reschedule = TRUE; | |
2095 | } | |
2096 | } | |
2097 | } | |
3e170ce0 | 2098 | |
39236c6e | 2099 | memorystatus_update_idle_priority_locked(p); |
3e170ce0 A |
2100 | |
2101 | #if CONFIG_JETSAM | |
2102 | if (memorystatus_highwater_enabled) { | |
2103 | boolean_t trigger_exception; | |
2104 | /* | |
2105 | * We are in this path because this process transitioned between | |
2106 | * dirty <--> clean state. Update the cached memory limits. | |
2107 | */ | |
2108 | ||
2109 | if (proc_jetsam_state_is_active_locked(p) == TRUE) { | |
2110 | /* | |
2111 | * process is dirty | |
2112 | */ | |
2113 | CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception); | |
2114 | } else { | |
2115 | /* | |
2116 | * process is clean | |
2117 | */ | |
2118 | CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception); | |
2119 | } | |
2120 | ||
2121 | /* | |
2122 | * Enforce the new limits by writing to the ledger. | |
2123 | * | |
2124 | * This is a hot path and holding the proc_list_lock while writing to the ledgers, | |
2125 | * (where the task lock is taken) is bad. So, we temporarily drop the proc_list_lock. | |
2126 | * We aren't traversing the jetsam bucket list here, so we should be safe. | |
2127 | * See rdar://21394491. | |
2128 | */ | |
2129 | ||
2130 | if (proc_ref_locked(p) == p) { | |
2131 | int ledger_limit; | |
2132 | if (p->p_memstat_memlimit > 0) { | |
2133 | ledger_limit = p->p_memstat_memlimit; | |
2134 | } else { | |
2135 | ledger_limit = -1; | |
2136 | } | |
2137 | proc_list_unlock(); | |
2138 | task_set_phys_footprint_limit_internal(p->task, ledger_limit, NULL, trigger_exception); | |
2139 | proc_list_lock(); | |
2140 | proc_rele_locked(p); | |
2141 | ||
2142 | MEMORYSTATUS_DEBUG(3, "memorystatus_dirty_set: new limit on pid %d (%dMB %s) priority(%d) dirty?=0x%x %s\n", | |
2143 | p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), | |
2144 | (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty, | |
2145 | (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); | |
2146 | } | |
2147 | ||
2148 | } | |
2149 | #endif /* CONFIG_JETSAM */ | |
39236c6e A |
2150 | |
2151 | /* If the deferral state changed, reschedule the demotion timer */ | |
2152 | if (reschedule) { | |
2153 | memorystatus_reschedule_idle_demotion_locked(); | |
2154 | } | |
2155 | } | |
3e170ce0 | 2156 | |
39236c6e | 2157 | if (kill) { |
3e170ce0 A |
2158 | if (proc_ref_locked(p) == p) { |
2159 | proc_list_unlock(); | |
2160 | psignal(p, SIGKILL); | |
2161 | proc_list_lock(); | |
2162 | proc_rele_locked(p); | |
2163 | } | |
39236c6e A |
2164 | } |
2165 | ||
2166 | exit: | |
2167 | proc_list_unlock(); | |
2168 | ||
2169 | return ret; | |
2170 | } | |
b0d623f7 | 2171 | |
39236c6e | 2172 | int |
fe8ab488 A |
2173 | memorystatus_dirty_clear(proc_t p, uint32_t pcontrol) { |
2174 | ||
39236c6e | 2175 | int ret = 0; |
fe8ab488 A |
2176 | |
2177 | MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_clear(): %d 0x%x 0x%x\n", p->p_pid, pcontrol, p->p_memstat_dirty); | |
39236c6e | 2178 | |
fe8ab488 A |
2179 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_CLEAR), p->p_pid, pcontrol, 0, 0, 0); |
2180 | ||
2181 | proc_list_lock(); | |
2182 | ||
2183 | if ((p->p_listflag & P_LIST_EXITED) != 0) { | |
2184 | /* | |
2185 | * Process is on its way out. | |
2186 | */ | |
2187 | ret = EBUSY; | |
2188 | goto exit; | |
2189 | } | |
2190 | ||
2191 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { | |
2192 | ret = EPERM; | |
2193 | goto exit; | |
2194 | } | |
2195 | ||
2196 | if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { | |
2197 | /* Dirty tracking not enabled */ | |
2198 | ret = EINVAL; | |
2199 | goto exit; | |
2200 | } | |
2201 | ||
2202 | if (!pcontrol || (pcontrol & (PROC_DIRTY_LAUNCH_IN_PROGRESS | PROC_DIRTY_DEFER)) == 0) { | |
2203 | ret = EINVAL; | |
2204 | goto exit; | |
2205 | } | |
2206 | ||
2207 | if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) { | |
2208 | p->p_memstat_dirty &= ~P_DIRTY_LAUNCH_IN_PROGRESS; | |
2209 | } | |
2210 | ||
2211 | /* This can be set and cleared exactly once. */ | |
2212 | if (pcontrol & PROC_DIRTY_DEFER) { | |
2213 | ||
2214 | if (p->p_memstat_dirty & P_DIRTY_DEFER) { | |
2215 | ||
2216 | p->p_memstat_dirty &= ~P_DIRTY_DEFER; | |
2217 | ||
2218 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
2219 | memorystatus_update_idle_priority_locked(p); | |
2220 | memorystatus_reschedule_idle_demotion_locked(); | |
2221 | } | |
2222 | } | |
2223 | ||
2224 | ret = 0; | |
2225 | exit: | |
2226 | proc_list_unlock(); | |
2227 | ||
2228 | return ret; | |
2229 | } | |
2230 | ||
2231 | int | |
2232 | memorystatus_dirty_get(proc_t p) { | |
2233 | int ret = 0; | |
2234 | ||
2235 | proc_list_lock(); | |
2236 | ||
2237 | if (p->p_memstat_dirty & P_DIRTY_TRACK) { | |
39236c6e A |
2238 | ret |= PROC_DIRTY_TRACKED; |
2239 | if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) { | |
2240 | ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT; | |
2241 | } | |
2242 | if (p->p_memstat_dirty & P_DIRTY) { | |
2243 | ret |= PROC_DIRTY_IS_DIRTY; | |
2244 | } | |
fe8ab488 A |
2245 | if (p->p_memstat_dirty & P_DIRTY_LAUNCH_IN_PROGRESS) { |
2246 | ret |= PROC_DIRTY_LAUNCH_IS_IN_PROGRESS; | |
2247 | } | |
39236c6e A |
2248 | } |
2249 | ||
2250 | proc_list_unlock(); | |
2251 | ||
2252 | return ret; | |
2253 | } | |
b0d623f7 | 2254 | |
39236c6e A |
2255 | int |
2256 | memorystatus_on_terminate(proc_t p) { | |
2257 | int sig; | |
2258 | ||
2259 | proc_list_lock(); | |
2260 | ||
2261 | p->p_memstat_dirty |= P_DIRTY_TERMINATED; | |
2262 | ||
2263 | if ((p->p_memstat_dirty & (P_DIRTY_TRACK|P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) { | |
2264 | /* Clean; mark as terminated and issue SIGKILL */ | |
2265 | sig = SIGKILL; | |
2266 | } else { | |
2267 | /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */ | |
2268 | sig = SIGTERM; | |
316670eb | 2269 | } |
39236c6e A |
2270 | |
2271 | proc_list_unlock(); | |
2272 | ||
2273 | return sig; | |
316670eb | 2274 | } |
b0d623f7 | 2275 | |
316670eb | 2276 | void |
39236c6e A |
2277 | memorystatus_on_suspend(proc_t p) |
2278 | { | |
316670eb | 2279 | #if CONFIG_FREEZE |
39236c6e | 2280 | uint32_t pages; |
fe8ab488 | 2281 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); |
39236c6e A |
2282 | #endif |
2283 | proc_list_lock(); | |
2284 | #if CONFIG_FREEZE | |
2285 | p->p_memstat_suspendedfootprint = pages; | |
2286 | memorystatus_suspended_footprint_total += pages; | |
2287 | memorystatus_suspended_count++; | |
316670eb | 2288 | #endif |
39236c6e A |
2289 | p->p_memstat_state |= P_MEMSTAT_SUSPENDED; |
2290 | proc_list_unlock(); | |
2291 | } | |
6d2010ae | 2292 | |
39236c6e A |
2293 | void |
2294 | memorystatus_on_resume(proc_t p) | |
2295 | { | |
2296 | #if CONFIG_FREEZE | |
2297 | boolean_t frozen; | |
2298 | pid_t pid; | |
2299 | #endif | |
6d2010ae | 2300 | |
39236c6e | 2301 | proc_list_lock(); |
6d2010ae | 2302 | |
316670eb | 2303 | #if CONFIG_FREEZE |
39236c6e A |
2304 | frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN); |
2305 | if (frozen) { | |
2306 | memorystatus_frozen_count--; | |
2307 | p->p_memstat_state |= P_MEMSTAT_PRIOR_THAW; | |
2308 | } | |
2309 | ||
2310 | memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint; | |
2311 | memorystatus_suspended_count--; | |
2312 | ||
2313 | pid = p->p_pid; | |
316670eb | 2314 | #endif |
39236c6e A |
2315 | |
2316 | p->p_memstat_state &= ~(P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN); | |
2317 | ||
2318 | proc_list_unlock(); | |
2319 | ||
2320 | #if CONFIG_FREEZE | |
2321 | if (frozen) { | |
2322 | memorystatus_freeze_entry_t data = { pid, FALSE, 0 }; | |
2323 | memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); | |
316670eb | 2324 | } |
39236c6e | 2325 | #endif |
316670eb | 2326 | } |
6d2010ae | 2327 | |
316670eb | 2328 | void |
39236c6e | 2329 | memorystatus_on_inactivity(proc_t p) |
6d2010ae | 2330 | { |
39236c6e | 2331 | #pragma unused(p) |
316670eb A |
2332 | #if CONFIG_FREEZE |
2333 | /* Wake the freeze thread */ | |
2334 | thread_wakeup((event_t)&memorystatus_freeze_wakeup); | |
2335 | #endif | |
2336 | } | |
6d2010ae | 2337 | |
39236c6e A |
2338 | static uint32_t |
2339 | memorystatus_build_state(proc_t p) { | |
2340 | uint32_t snapshot_state = 0; | |
2341 | ||
2342 | /* General */ | |
2343 | if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { | |
2344 | snapshot_state |= kMemorystatusSuspended; | |
2345 | } | |
2346 | if (p->p_memstat_state & P_MEMSTAT_FROZEN) { | |
2347 | snapshot_state |= kMemorystatusFrozen; | |
2348 | } | |
2349 | if (p->p_memstat_state & P_MEMSTAT_PRIOR_THAW) { | |
2350 | snapshot_state |= kMemorystatusWasThawed; | |
2351 | } | |
2352 | ||
2353 | /* Tracking */ | |
2354 | if (p->p_memstat_dirty & P_DIRTY_TRACK) { | |
2355 | snapshot_state |= kMemorystatusTracked; | |
2356 | } | |
2357 | if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) { | |
2358 | snapshot_state |= kMemorystatusSupportsIdleExit; | |
2359 | } | |
2360 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { | |
2361 | snapshot_state |= kMemorystatusDirty; | |
2362 | } | |
2363 | ||
2364 | return snapshot_state; | |
2365 | } | |
2366 | ||
2367 | #if !CONFIG_JETSAM | |
2368 | ||
2369 | static boolean_t | |
2370 | kill_idle_exit_proc(void) | |
316670eb | 2371 | { |
39236c6e | 2372 | proc_t p, victim_p = PROC_NULL; |
316670eb | 2373 | uint64_t current_time; |
39236c6e A |
2374 | boolean_t killed = FALSE; |
2375 | unsigned int i = 0; | |
316670eb | 2376 | |
39236c6e | 2377 | /* Pick next idle exit victim. */ |
316670eb | 2378 | current_time = mach_absolute_time(); |
6d2010ae | 2379 | |
39236c6e | 2380 | proc_list_lock(); |
6d2010ae | 2381 | |
39236c6e A |
2382 | p = memorystatus_get_first_proc_locked(&i, FALSE); |
2383 | while (p) { | |
2384 | /* No need to look beyond the idle band */ | |
2385 | if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) { | |
2386 | break; | |
2387 | } | |
2388 | ||
2389 | if ((p->p_memstat_dirty & (P_DIRTY_ALLOW_IDLE_EXIT|P_DIRTY_IS_DIRTY|P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) { | |
2390 | if (current_time >= p->p_memstat_idledeadline) { | |
2391 | p->p_memstat_dirty |= P_DIRTY_TERMINATED; | |
2392 | victim_p = proc_ref_locked(p); | |
2393 | break; | |
316670eb A |
2394 | } |
2395 | } | |
39236c6e A |
2396 | |
2397 | p = memorystatus_get_next_proc_locked(&i, p, FALSE); | |
6d2010ae | 2398 | } |
316670eb | 2399 | |
39236c6e A |
2400 | proc_list_unlock(); |
2401 | ||
2402 | if (victim_p) { | |
2403 | printf("memorystatus_thread: idle exiting pid %d [%s]\n", victim_p->p_pid, (victim_p->p_comm ? victim_p->p_comm : "(unknown)")); | |
2404 | killed = memorystatus_do_kill(victim_p, kMemorystatusKilledIdleExit); | |
2405 | proc_rele(victim_p); | |
316670eb | 2406 | } |
b0d623f7 | 2407 | |
39236c6e | 2408 | return killed; |
2d21ac55 | 2409 | } |
39236c6e | 2410 | #endif |
2d21ac55 | 2411 | |
fe8ab488 | 2412 | #if CONFIG_JETSAM |
39236c6e A |
2413 | static void |
2414 | memorystatus_thread_wake(void) { | |
2415 | thread_wakeup((event_t)&memorystatus_wakeup); | |
b0d623f7 | 2416 | } |
fe8ab488 A |
2417 | #endif /* CONFIG_JETSAM */ |
2418 | ||
2419 | extern void vm_pressure_response(void); | |
b0d623f7 | 2420 | |
316670eb | 2421 | static int |
39236c6e A |
2422 | memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation) |
2423 | { | |
2424 | if (interval_ms) { | |
2425 | assert_wait_timeout(&memorystatus_wakeup, THREAD_UNINT, interval_ms, 1000 * NSEC_PER_USEC); | |
2426 | } else { | |
2427 | assert_wait(&memorystatus_wakeup, THREAD_UNINT); | |
2428 | } | |
316670eb | 2429 | |
39236c6e A |
2430 | return thread_block(continuation); |
2431 | } | |
316670eb | 2432 | |
39236c6e A |
2433 | static void |
2434 | memorystatus_thread(void *param __unused, wait_result_t wr __unused) | |
2435 | { | |
2436 | static boolean_t is_vm_privileged = FALSE; | |
3e170ce0 | 2437 | |
39236c6e A |
2438 | #if CONFIG_JETSAM |
2439 | boolean_t post_snapshot = FALSE; | |
2440 | uint32_t errors = 0; | |
fe8ab488 | 2441 | uint32_t hwm_kill = 0; |
3e170ce0 A |
2442 | boolean_t sort_flag = TRUE; |
2443 | ||
2444 | /* Jetsam Loop Detection - locals */ | |
2445 | memstat_bucket_t *bucket; | |
2446 | int jld_bucket_count = 0; | |
2447 | struct timeval jld_now_tstamp = {0,0}; | |
2448 | uint64_t jld_now_msecs = 0; | |
2449 | ||
2450 | /* Jetsam Loop Detection - statics */ | |
2451 | static uint64_t jld_timestamp_msecs = 0; | |
2452 | static int jld_idle_kill_candidates = 0; /* Number of available processes in band 0,1 at start */ | |
2453 | static int jld_idle_kills = 0; /* Number of procs killed during eval period */ | |
2454 | static int jld_eval_aggressive_count = 0; /* Bumps the max priority in aggressive loop */ | |
2455 | static int32_t jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT; | |
39236c6e | 2456 | #endif |
316670eb | 2457 | |
39236c6e A |
2458 | if (is_vm_privileged == FALSE) { |
2459 | /* | |
2460 | * It's the first time the thread has run, so just mark the thread as privileged and block. | |
2461 | * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>. | |
2462 | */ | |
2463 | thread_wire(host_priv_self(), current_thread(), TRUE); | |
2464 | is_vm_privileged = TRUE; | |
2465 | ||
3e170ce0 A |
2466 | if (vm_restricted_to_single_processor == TRUE) |
2467 | thread_vm_bind_group_add(); | |
2468 | ||
39236c6e | 2469 | memorystatus_thread_block(0, memorystatus_thread); |
316670eb A |
2470 | } |
2471 | ||
39236c6e A |
2472 | #if CONFIG_JETSAM |
2473 | ||
2474 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START, | |
3e170ce0 | 2475 | memorystatus_available_pages, memorystatus_jld_enabled, memorystatus_jld_eval_period_msecs, memorystatus_jld_eval_aggressive_count,0); |
316670eb | 2476 | |
fe8ab488 A |
2477 | /* |
2478 | * Jetsam aware version. | |
2479 | * | |
2480 | * The VM pressure notification thread is working it's way through clients in parallel. | |
39236c6e | 2481 | * |
fe8ab488 A |
2482 | * So, while the pressure notification thread is targeting processes in order of |
2483 | * increasing jetsam priority, we can hopefully reduce / stop it's work by killing | |
2484 | * any processes that have exceeded their highwater mark. | |
39236c6e | 2485 | * |
fe8ab488 A |
2486 | * If we run out of HWM processes and our available pages drops below the critical threshold, then, |
2487 | * we target the least recently used process in order of increasing jetsam priority (exception: the FG band). | |
39236c6e | 2488 | */ |
fe8ab488 A |
2489 | while (is_thrashing(kill_under_pressure_cause) || |
2490 | memorystatus_available_pages <= memorystatus_available_pages_pressure) { | |
39236c6e A |
2491 | boolean_t killed; |
2492 | int32_t priority; | |
fe8ab488 A |
2493 | uint32_t cause; |
2494 | ||
2495 | if (kill_under_pressure_cause) { | |
2496 | cause = kill_under_pressure_cause; | |
2497 | } else { | |
2498 | cause = kMemorystatusKilledVMPageShortage; | |
2499 | } | |
39236c6e A |
2500 | |
2501 | #if LEGACY_HIWATER | |
2502 | /* Highwater */ | |
2503 | killed = memorystatus_kill_hiwat_proc(&errors); | |
2504 | if (killed) { | |
fe8ab488 | 2505 | hwm_kill++; |
39236c6e A |
2506 | post_snapshot = TRUE; |
2507 | goto done; | |
fe8ab488 A |
2508 | } else { |
2509 | memorystatus_hwm_candidates = FALSE; | |
2510 | } | |
2511 | ||
2512 | /* No highwater processes to kill. Continue or stop for now? */ | |
2513 | if (!is_thrashing(kill_under_pressure_cause) && | |
2514 | (memorystatus_available_pages > memorystatus_available_pages_critical)) { | |
2515 | /* | |
2516 | * We are _not_ out of pressure but we are above the critical threshold and there's: | |
2517 | * - no compressor thrashing | |
2518 | * - no more HWM processes left. | |
2519 | * For now, don't kill any other processes. | |
2520 | */ | |
2521 | ||
2522 | if (hwm_kill == 0) { | |
2523 | memorystatus_thread_wasted_wakeup++; | |
2524 | } | |
2525 | ||
2526 | break; | |
39236c6e A |
2527 | } |
2528 | #endif | |
3e170ce0 A |
2529 | if (memorystatus_jld_enabled == TRUE) { |
2530 | ||
2531 | /* | |
2532 | * Jetsam Loop Detection: attempt to detect | |
2533 | * rapid daemon relaunches in the lower bands. | |
2534 | */ | |
2535 | ||
2536 | microuptime(&jld_now_tstamp); | |
2537 | ||
2538 | /* | |
2539 | * Ignore usecs in this calculation. | |
2540 | * msecs granularity is close enough. | |
2541 | */ | |
2542 | jld_now_msecs = (jld_now_tstamp.tv_sec * 1000); | |
2543 | ||
2544 | proc_list_lock(); | |
2545 | bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; | |
2546 | jld_bucket_count = bucket->count; | |
2547 | bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; | |
2548 | jld_bucket_count += bucket->count; | |
2549 | proc_list_unlock(); | |
2550 | ||
2551 | /* | |
2552 | * memorystatus_jld_eval_period_msecs is a tunable | |
2553 | * memorystatus_jld_eval_aggressive_count is a tunable | |
2554 | * memorystatus_jld_eval_aggressive_priority_band_max is a tunable | |
2555 | */ | |
2556 | if ( (jld_bucket_count == 0) || | |
2557 | (jld_now_msecs > (jld_timestamp_msecs + memorystatus_jld_eval_period_msecs))) { | |
2558 | ||
2559 | /* | |
2560 | * Refresh evaluation parameters | |
2561 | */ | |
2562 | jld_timestamp_msecs = jld_now_msecs; | |
2563 | jld_idle_kill_candidates = jld_bucket_count; | |
2564 | jld_idle_kills = 0; | |
2565 | jld_eval_aggressive_count = 0; | |
2566 | jld_priority_band_max = JETSAM_PRIORITY_UI_SUPPORT; | |
2567 | } | |
2568 | ||
2569 | if (jld_idle_kills > jld_idle_kill_candidates) { | |
2570 | jld_eval_aggressive_count++; | |
2571 | if (jld_eval_aggressive_count > memorystatus_jld_eval_aggressive_count) { | |
2572 | /* | |
2573 | * Bump up the jetsam priority limit (eg: the bucket index) | |
2574 | * Enforce bucket index sanity. | |
2575 | */ | |
2576 | if ((memorystatus_jld_eval_aggressive_priority_band_max < 0) || | |
2577 | (memorystatus_jld_eval_aggressive_priority_band_max >= MEMSTAT_BUCKET_COUNT)) { | |
2578 | /* | |
2579 | * Do nothing. Stick with the default level. | |
2580 | */ | |
2581 | } else { | |
2582 | jld_priority_band_max = memorystatus_jld_eval_aggressive_priority_band_max; | |
2583 | } | |
2584 | } | |
2585 | ||
2586 | killed = memorystatus_kill_top_process_aggressive( | |
2587 | TRUE, | |
2588 | kMemorystatusKilledVMThrashing, | |
2589 | jld_eval_aggressive_count, | |
2590 | jld_priority_band_max, | |
2591 | &errors); | |
2592 | ||
2593 | ||
2594 | if (killed) { | |
2595 | /* Always generate logs after aggressive kill */ | |
2596 | post_snapshot = TRUE; | |
2597 | goto done; | |
2598 | } | |
2599 | } | |
2600 | } | |
39236c6e A |
2601 | |
2602 | /* LRU */ | |
3e170ce0 A |
2603 | killed = memorystatus_kill_top_process(TRUE, sort_flag, cause, &priority, &errors); |
2604 | sort_flag = FALSE; | |
2605 | ||
39236c6e | 2606 | if (killed) { |
3e170ce0 A |
2607 | /* |
2608 | * Don't generate logs for steady-state idle-exit kills, | |
2609 | * unless it is overridden for debug or by the device | |
2610 | * tree. | |
2611 | */ | |
fe8ab488 | 2612 | if ((priority != JETSAM_PRIORITY_IDLE) || memorystatus_idle_snapshot) { |
39236c6e A |
2613 | post_snapshot = TRUE; |
2614 | } | |
3e170ce0 A |
2615 | |
2616 | /* Jetsam Loop Detection */ | |
2617 | if (memorystatus_jld_enabled == TRUE) { | |
2618 | if ((priority == JETSAM_PRIORITY_IDLE) || (priority == JETSAM_PRIORITY_IDLE_DEFERRED)) { | |
2619 | jld_idle_kills++; | |
2620 | } else { | |
2621 | /* | |
2622 | * We've reached into bands beyond idle deferred. | |
2623 | * We make no attempt to monitor them | |
2624 | */ | |
2625 | } | |
2626 | } | |
39236c6e A |
2627 | goto done; |
2628 | } | |
fe8ab488 A |
2629 | |
2630 | if (memorystatus_available_pages <= memorystatus_available_pages_critical) { | |
2631 | /* Under pressure and unable to kill a process - panic */ | |
2632 | panic("memorystatus_jetsam_thread: no victim! available pages:%d\n", memorystatus_available_pages); | |
2633 | } | |
39236c6e A |
2634 | |
2635 | done: | |
fe8ab488 A |
2636 | |
2637 | /* | |
2638 | * We do not want to over-kill when thrashing has been detected. | |
2639 | * To avoid that, we reset the flag here and notify the | |
2640 | * compressor. | |
39236c6e | 2641 | */ |
fe8ab488 A |
2642 | if (is_thrashing(kill_under_pressure_cause)) { |
2643 | kill_under_pressure_cause = 0; | |
2644 | vm_thrashing_jetsam_done(); | |
39236c6e | 2645 | } |
39236c6e | 2646 | } |
fe8ab488 A |
2647 | |
2648 | kill_under_pressure_cause = 0; | |
2649 | ||
39236c6e A |
2650 | if (errors) { |
2651 | memorystatus_clear_errors(); | |
2652 | } | |
2653 | ||
2654 | #if VM_PRESSURE_EVENTS | |
fe8ab488 A |
2655 | /* |
2656 | * LD: We used to target the foreground process first and foremost here. | |
2657 | * Now, we target all processes, starting from the non-suspended, background | |
2658 | * processes first. We will target foreground too. | |
2659 | * | |
2660 | * memorystatus_update_vm_pressure(TRUE); | |
2661 | */ | |
2662 | //vm_pressure_response(); | |
39236c6e A |
2663 | #endif |
2664 | ||
2665 | if (post_snapshot) { | |
2666 | size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + | |
2667 | sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count); | |
3e170ce0 A |
2668 | uint64_t timestamp_now = mach_absolute_time(); |
2669 | memorystatus_jetsam_snapshot->notification_time = timestamp_now; | |
2670 | if (memorystatus_jetsam_snapshot_last_timestamp == 0 || | |
2671 | timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout) { | |
2672 | int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); | |
2673 | if (!ret) { | |
2674 | proc_list_lock(); | |
2675 | memorystatus_jetsam_snapshot_last_timestamp = timestamp_now; | |
2676 | proc_list_unlock(); | |
2677 | } | |
2678 | } | |
39236c6e | 2679 | } |
3e170ce0 | 2680 | |
39236c6e A |
2681 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END, |
2682 | memorystatus_available_pages, 0, 0, 0, 0); | |
2683 | ||
2684 | #else /* CONFIG_JETSAM */ | |
2685 | ||
fe8ab488 A |
2686 | /* |
2687 | * Jetsam not enabled | |
39236c6e A |
2688 | */ |
2689 | ||
39236c6e A |
2690 | #endif /* CONFIG_JETSAM */ |
2691 | ||
2692 | memorystatus_thread_block(0, memorystatus_thread); | |
2693 | } | |
2694 | ||
2695 | #if !CONFIG_JETSAM | |
fe8ab488 A |
2696 | /* |
2697 | * Returns TRUE: | |
2698 | * when an idle-exitable proc was killed | |
2699 | * Returns FALSE: | |
2700 | * when there are no more idle-exitable procs found | |
2701 | * when the attempt to kill an idle-exitable proc failed | |
2702 | */ | |
39236c6e | 2703 | boolean_t memorystatus_idle_exit_from_VM(void) { |
fe8ab488 | 2704 | return(kill_idle_exit_proc()); |
39236c6e | 2705 | } |
fe8ab488 | 2706 | #endif /* !CONFIG_JETSAM */ |
39236c6e A |
2707 | |
2708 | #if CONFIG_JETSAM | |
2709 | ||
2710 | /* | |
2711 | * Callback invoked when allowable physical memory footprint exceeded | |
2712 | * (dirty pages + IOKit mappings) | |
2713 | * | |
2714 | * This is invoked for both advisory, non-fatal per-task high watermarks, | |
fe8ab488 | 2715 | * as well as the fatal task memory limits. |
39236c6e A |
2716 | */ |
2717 | void | |
2718 | memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footprint_mb) | |
2719 | { | |
3e170ce0 A |
2720 | boolean_t is_active; |
2721 | boolean_t is_fatal; | |
2722 | ||
39236c6e | 2723 | proc_t p = current_proc(); |
fe8ab488 | 2724 | |
3e170ce0 A |
2725 | proc_list_lock(); |
2726 | ||
2727 | is_active = proc_jetsam_state_is_active_locked(p); | |
2728 | is_fatal = (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT); | |
2729 | ||
2730 | if (warning == FALSE) { | |
2731 | /* | |
2732 | * We only want the EXC_RESOURCE to trigger once per lifetime | |
2733 | * of the active/inactive limit state. So, here, we detect the | |
2734 | * active/inactive state of the process and mark the | |
2735 | * state as exception has been triggered. | |
2736 | */ | |
2737 | if (is_active == TRUE) { | |
2738 | /* | |
2739 | * turn off exceptions for active state | |
2740 | */ | |
2741 | p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_ACTIVE_EXC_TRIGGERED; | |
2742 | } else { | |
2743 | /* | |
2744 | * turn off exceptions for inactive state | |
2745 | */ | |
2746 | p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_INACTIVE_EXC_TRIGGERED; | |
2747 | } | |
2748 | ||
2749 | /* | |
2750 | * Soft memory limit is a non-fatal high-water-mark | |
2751 | * Hard memory limit is a fatal custom-task-limit or system-wide per-task memory limit. | |
2752 | */ | |
2753 | printf("process %d (%s) exceeded physical memory footprint, the %s%sMemoryLimit of %d MB\n", | |
2754 | p->p_pid, p->p_comm, (is_active ? "Active" : "Inactive"), | |
2755 | (is_fatal ? "Hard" : "Soft"), max_footprint_mb); | |
2756 | ||
fe8ab488 | 2757 | } |
39236c6e | 2758 | |
3e170ce0 A |
2759 | proc_list_unlock(); |
2760 | ||
39236c6e A |
2761 | #if VM_PRESSURE_EVENTS |
2762 | if (warning == TRUE) { | |
fe8ab488 | 2763 | if (memorystatus_warn_process(p->p_pid, TRUE /* critical? */) != TRUE) { |
39236c6e | 2764 | /* Print warning, since it's possible that task has not registered for pressure notifications */ |
3e170ce0 | 2765 | printf("task_exceeded_footprint: failed to warn the current task (exiting, or no handler registered?).\n"); |
39236c6e A |
2766 | } |
2767 | return; | |
2768 | } | |
2769 | #endif /* VM_PRESSURE_EVENTS */ | |
2770 | ||
3e170ce0 | 2771 | if (is_fatal) { |
39236c6e | 2772 | /* |
fe8ab488 A |
2773 | * If this process has no high watermark or has a fatal task limit, then we have been invoked because the task |
2774 | * has violated either the system-wide per-task memory limit OR its own task limit. | |
39236c6e A |
2775 | */ |
2776 | if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit) != TRUE) { | |
2777 | printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n"); | |
2778 | } | |
fe8ab488 A |
2779 | } else { |
2780 | /* | |
2781 | * HWM offender exists. Done without locks or synchronization. | |
2782 | * See comment near its declaration for more details. | |
2783 | */ | |
2784 | memorystatus_hwm_candidates = TRUE; | |
2785 | } | |
2786 | } | |
2787 | ||
3e170ce0 A |
2788 | /* |
2789 | * Toggle the P_MEMSTAT_TERMINATED state. | |
2790 | * Takes the proc_list_lock. | |
2791 | */ | |
2792 | void | |
2793 | proc_memstat_terminated(proc_t p, boolean_t set) | |
2794 | { | |
2795 | #if DEVELOPMENT || DEBUG | |
2796 | if (p) { | |
2797 | proc_list_lock(); | |
2798 | if (set == TRUE) { | |
2799 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; | |
2800 | } else { | |
2801 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
2802 | } | |
2803 | proc_list_unlock(); | |
2804 | } | |
2805 | #else | |
2806 | #pragma unused(p, set) | |
2807 | /* | |
2808 | * do nothing | |
2809 | */ | |
2810 | #endif /* DEVELOPMENT || DEBUG */ | |
2811 | return; | |
2812 | } | |
2813 | ||
fe8ab488 A |
2814 | /* |
2815 | * This is invoked when cpulimits have been exceeded while in fatal mode. | |
2816 | * The jetsam_flags do not apply as those are for memory related kills. | |
2817 | * We call this routine so that the offending process is killed with | |
2818 | * a non-zero exit status. | |
2819 | */ | |
2820 | void | |
2821 | jetsam_on_ledger_cpulimit_exceeded(void) | |
2822 | { | |
2823 | int retval = 0; | |
2824 | int jetsam_flags = 0; /* make it obvious */ | |
2825 | proc_t p = current_proc(); | |
2826 | ||
2827 | printf("task_exceeded_cpulimit: killing pid %d [%s]\n", | |
2828 | p->p_pid, (p->p_comm ? p->p_comm : "(unknown)")); | |
2829 | ||
2830 | retval = jetsam_do_kill(p, jetsam_flags); | |
2831 | ||
2832 | if (retval) { | |
2833 | printf("task_exceeded_cpulimit: failed to kill current task (exiting?).\n"); | |
39236c6e A |
2834 | } |
2835 | } | |
2836 | ||
2837 | static void | |
fe8ab488 | 2838 | memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages) |
39236c6e A |
2839 | { |
2840 | assert(task); | |
2841 | assert(footprint); | |
2842 | ||
2843 | *footprint = (uint32_t)(get_task_phys_footprint(task) / PAGE_SIZE_64); | |
2844 | if (max_footprint) { | |
2845 | *max_footprint = (uint32_t)(get_task_phys_footprint_max(task) / PAGE_SIZE_64); | |
2846 | } | |
fe8ab488 A |
2847 | if (max_footprint_lifetime) { |
2848 | *max_footprint_lifetime = (uint32_t)(get_task_resident_max(task) / PAGE_SIZE_64); | |
2849 | } | |
2850 | if (purgeable_pages) { | |
2851 | *purgeable_pages = (uint32_t)(get_task_purgeable_size(task) / PAGE_SIZE_64); | |
39236c6e | 2852 | } |
39236c6e A |
2853 | } |
2854 | ||
2855 | static void | |
3e170ce0 | 2856 | memorystatus_update_jetsam_snapshot_entry_locked(proc_t p, uint32_t kill_cause) |
39236c6e A |
2857 | { |
2858 | unsigned int i; | |
2859 | ||
2860 | for (i = 0; i < memorystatus_jetsam_snapshot_count; i++) { | |
2861 | if (memorystatus_jetsam_snapshot_list[i].pid == p->p_pid) { | |
2862 | /* Update if the priority has changed since the snapshot was taken */ | |
2863 | if (memorystatus_jetsam_snapshot_list[i].priority != p->p_memstat_effectivepriority) { | |
2864 | memorystatus_jetsam_snapshot_list[i].priority = p->p_memstat_effectivepriority; | |
2865 | strlcpy(memorystatus_jetsam_snapshot_list[i].name, p->p_comm, MAXCOMLEN+1); | |
2866 | memorystatus_jetsam_snapshot_list[i].state = memorystatus_build_state(p); | |
2867 | memorystatus_jetsam_snapshot_list[i].user_data = p->p_memstat_userdata; | |
2868 | memorystatus_jetsam_snapshot_list[i].fds = p->p_fd->fd_nfiles; | |
2869 | } | |
2870 | memorystatus_jetsam_snapshot_list[i].killed = kill_cause; | |
2871 | return; | |
2872 | } | |
2873 | } | |
316670eb | 2874 | } |
b0d623f7 | 2875 | |
39236c6e A |
2876 | void memorystatus_pages_update(unsigned int pages_avail) |
2877 | { | |
fe8ab488 A |
2878 | memorystatus_available_pages = pages_avail; |
2879 | ||
2880 | #if VM_PRESSURE_EVENTS | |
2881 | /* | |
2882 | * Since memorystatus_available_pages changes, we should | |
2883 | * re-evaluate the pressure levels on the system and | |
2884 | * check if we need to wake the pressure thread. | |
2885 | * We also update memorystatus_level in that routine. | |
2886 | */ | |
2887 | vm_pressure_response(); | |
2888 | ||
2889 | if (memorystatus_available_pages <= memorystatus_available_pages_pressure) { | |
2890 | ||
2891 | if (memorystatus_hwm_candidates || (memorystatus_available_pages <= memorystatus_available_pages_critical)) { | |
2892 | memorystatus_thread_wake(); | |
2893 | } | |
2894 | } | |
2895 | #else /* VM_PRESSURE_EVENTS */ | |
2896 | ||
39236c6e A |
2897 | boolean_t critical, delta; |
2898 | ||
316670eb A |
2899 | if (!memorystatus_delta) { |
2900 | return; | |
2901 | } | |
39236c6e A |
2902 | |
2903 | critical = (pages_avail < memorystatus_available_pages_critical) ? TRUE : FALSE; | |
2904 | delta = ((pages_avail >= (memorystatus_available_pages + memorystatus_delta)) | |
2905 | || (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) ? TRUE : FALSE; | |
2906 | ||
2907 | if (critical || delta) { | |
39236c6e | 2908 | memorystatus_level = memorystatus_available_pages * 100 / atop_64(max_mem); |
39236c6e | 2909 | memorystatus_thread_wake(); |
b0d623f7 | 2910 | } |
fe8ab488 | 2911 | #endif /* VM_PRESSURE_EVENTS */ |
316670eb A |
2912 | } |
2913 | ||
2914 | static boolean_t | |
3e170ce0 | 2915 | memorystatus_init_jetsam_snapshot_entry_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry) |
316670eb | 2916 | { |
fe8ab488 A |
2917 | clock_sec_t tv_sec; |
2918 | clock_usec_t tv_usec; | |
2919 | ||
39236c6e | 2920 | memset(entry, 0, sizeof(memorystatus_jetsam_snapshot_entry_t)); |
316670eb A |
2921 | |
2922 | entry->pid = p->p_pid; | |
2923 | strlcpy(&entry->name[0], p->p_comm, MAXCOMLEN+1); | |
39236c6e | 2924 | entry->priority = p->p_memstat_effectivepriority; |
fe8ab488 | 2925 | memorystatus_get_task_page_counts(p->task, &entry->pages, &entry->max_pages, &entry->max_pages_lifetime, &entry->purgeable_pages); |
39236c6e A |
2926 | entry->state = memorystatus_build_state(p); |
2927 | entry->user_data = p->p_memstat_userdata; | |
316670eb | 2928 | memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid)); |
fe8ab488 A |
2929 | entry->fds = p->p_fd->fd_nfiles; |
2930 | ||
2931 | absolutetime_to_microtime(get_task_cpu_time(p->task), &tv_sec, &tv_usec); | |
2932 | entry->cpu_time.tv_sec = tv_sec; | |
2933 | entry->cpu_time.tv_usec = tv_usec; | |
316670eb A |
2934 | |
2935 | return TRUE; | |
b0d623f7 A |
2936 | } |
2937 | ||
2938 | static void | |
3e170ce0 | 2939 | memorystatus_init_snapshot_vmstats(memorystatus_jetsam_snapshot_t *snapshot) |
b0d623f7 | 2940 | { |
39236c6e | 2941 | kern_return_t kr = KERN_SUCCESS; |
39236c6e A |
2942 | mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; |
2943 | vm_statistics64_data_t vm_stat; | |
2944 | ||
2945 | if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count) != KERN_SUCCESS)) { | |
3e170ce0 A |
2946 | printf("memorystatus_init_jetsam_snapshot_stats: host_statistics64 failed with %d\n", kr); |
2947 | memset(&snapshot->stats, 0, sizeof(snapshot->stats)); | |
39236c6e | 2948 | } else { |
3e170ce0 A |
2949 | snapshot->stats.free_pages = vm_stat.free_count; |
2950 | snapshot->stats.active_pages = vm_stat.active_count; | |
2951 | snapshot->stats.inactive_pages = vm_stat.inactive_count; | |
2952 | snapshot->stats.throttled_pages = vm_stat.throttled_count; | |
2953 | snapshot->stats.purgeable_pages = vm_stat.purgeable_count; | |
2954 | snapshot->stats.wired_pages = vm_stat.wire_count; | |
2955 | ||
2956 | snapshot->stats.speculative_pages = vm_stat.speculative_count; | |
2957 | snapshot->stats.filebacked_pages = vm_stat.external_page_count; | |
2958 | snapshot->stats.anonymous_pages = vm_stat.internal_page_count; | |
2959 | snapshot->stats.compressions = vm_stat.compressions; | |
2960 | snapshot->stats.decompressions = vm_stat.decompressions; | |
2961 | snapshot->stats.compressor_pages = vm_stat.compressor_page_count; | |
2962 | snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor; | |
2963 | } | |
2964 | } | |
2965 | ||
2966 | /* | |
2967 | * Collect vm statistics at boot. | |
2968 | * Called only once (see kern_exec.c) | |
2969 | * Data can be consumed at any time. | |
2970 | */ | |
2971 | void | |
2972 | memorystatus_init_at_boot_snapshot() { | |
2973 | memorystatus_init_snapshot_vmstats(&memorystatus_at_boot_snapshot); | |
2974 | memorystatus_at_boot_snapshot.entry_count = 0; | |
2975 | memorystatus_at_boot_snapshot.notification_time = 0; /* updated when consumed */ | |
2976 | memorystatus_at_boot_snapshot.snapshot_time = mach_absolute_time(); | |
2977 | } | |
2978 | ||
2979 | static void | |
2980 | memorystatus_init_jetsam_snapshot_locked(memorystatus_jetsam_snapshot_t *od_snapshot, uint32_t ods_list_count ) | |
2981 | { | |
2982 | proc_t p, next_p; | |
2983 | unsigned int b = 0, i = 0; | |
2984 | ||
2985 | memorystatus_jetsam_snapshot_t *snapshot = NULL; | |
2986 | memorystatus_jetsam_snapshot_entry_t *snapshot_list = NULL; | |
2987 | unsigned int snapshot_max = 0; | |
2988 | ||
2989 | if (od_snapshot) { | |
2990 | /* | |
2991 | * This is an on_demand snapshot | |
2992 | */ | |
2993 | snapshot = od_snapshot; | |
2994 | snapshot_list = od_snapshot->entries; | |
2995 | snapshot_max = ods_list_count; | |
2996 | } else { | |
2997 | /* | |
2998 | * This is a jetsam event snapshot | |
2999 | */ | |
3000 | snapshot = memorystatus_jetsam_snapshot; | |
3001 | snapshot_list = memorystatus_jetsam_snapshot->entries; | |
3002 | snapshot_max = memorystatus_jetsam_snapshot_max; | |
39236c6e A |
3003 | } |
3004 | ||
3e170ce0 A |
3005 | memorystatus_init_snapshot_vmstats(snapshot); |
3006 | ||
39236c6e A |
3007 | next_p = memorystatus_get_first_proc_locked(&b, TRUE); |
3008 | while (next_p) { | |
3009 | p = next_p; | |
3010 | next_p = memorystatus_get_next_proc_locked(&b, p, TRUE); | |
3011 | ||
3e170ce0 | 3012 | if (FALSE == memorystatus_init_jetsam_snapshot_entry_locked(p, &snapshot_list[i])) { |
316670eb A |
3013 | continue; |
3014 | } | |
3015 | ||
3e170ce0 | 3016 | MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", |
b0d623f7 A |
3017 | p->p_pid, |
3018 | p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7], | |
3019 | p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]); | |
316670eb | 3020 | |
3e170ce0 | 3021 | if (++i == snapshot_max) { |
b0d623f7 A |
3022 | break; |
3023 | } | |
3024 | } | |
39236c6e | 3025 | |
3e170ce0 A |
3026 | snapshot->snapshot_time = mach_absolute_time(); |
3027 | snapshot->entry_count = i; | |
3028 | ||
3029 | if (!od_snapshot) { | |
3030 | /* update the system buffer count */ | |
3031 | memorystatus_jetsam_snapshot_count = i; | |
3032 | } | |
b0d623f7 A |
3033 | } |
3034 | ||
39236c6e | 3035 | #if DEVELOPMENT || DEBUG |
b0d623f7 | 3036 | |
39236c6e A |
3037 | static int |
3038 | memorystatus_cmd_set_panic_bits(user_addr_t buffer, uint32_t buffer_size) { | |
3039 | int ret; | |
3040 | memorystatus_jetsam_panic_options_t debug; | |
3041 | ||
3042 | if (buffer_size != sizeof(memorystatus_jetsam_panic_options_t)) { | |
3043 | return EINVAL; | |
b0d623f7 | 3044 | } |
39236c6e A |
3045 | |
3046 | ret = copyin(buffer, &debug, buffer_size); | |
3047 | if (ret) { | |
3048 | return ret; | |
3049 | } | |
3050 | ||
3051 | /* Panic bits match kMemorystatusKilled* enum */ | |
3052 | memorystatus_jetsam_panic_debug = (memorystatus_jetsam_panic_debug & ~debug.mask) | (debug.data & debug.mask); | |
3053 | ||
3054 | /* Copyout new value */ | |
3055 | debug.data = memorystatus_jetsam_panic_debug; | |
3056 | ret = copyout(&debug, buffer, sizeof(memorystatus_jetsam_panic_options_t)); | |
3057 | ||
3058 | return ret; | |
b0d623f7 A |
3059 | } |
3060 | ||
3e170ce0 A |
3061 | /* |
3062 | * Triggers a sort_order on a specified jetsam priority band. | |
3063 | * This is for testing only, used to force a path through the sort | |
3064 | * function. | |
3065 | */ | |
3066 | static int | |
3067 | memorystatus_cmd_test_jetsam_sort(int priority, int sort_order) { | |
3068 | ||
3069 | int error = 0; | |
3070 | ||
3071 | unsigned int bucket_index = 0; | |
3072 | ||
3073 | if (priority == -1) { | |
3074 | /* Use as shorthand for default priority */ | |
3075 | bucket_index = JETSAM_PRIORITY_DEFAULT; | |
3076 | } else { | |
3077 | bucket_index = (unsigned int)priority; | |
3078 | } | |
3079 | ||
3080 | error = memorystatus_sort_bucket(bucket_index, sort_order); | |
3081 | ||
3082 | return (error); | |
3083 | } | |
3084 | ||
39236c6e A |
3085 | #endif |
3086 | ||
3087 | /* | |
3088 | * Jetsam a specific process. | |
3089 | */ | |
3090 | static boolean_t | |
3091 | memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) { | |
3092 | boolean_t killed; | |
b0d623f7 | 3093 | proc_t p; |
39236c6e A |
3094 | |
3095 | /* TODO - add a victim queue and push this into the main jetsam thread */ | |
39236c6e A |
3096 | p = proc_find(victim_pid); |
3097 | if (!p) { | |
3098 | return FALSE; | |
3099 | } | |
3100 | ||
39236c6e A |
3101 | proc_list_lock(); |
3102 | ||
4bd07ac2 A |
3103 | if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) || |
3104 | (p->p_listflag & P_LIST_EXITED) || | |
3105 | (p->p_memstat_state & P_MEMSTAT_ERROR)) { | |
3106 | proc_list_unlock(); | |
3107 | proc_rele(p); | |
3108 | return FALSE; | |
3109 | } | |
3110 | ||
3111 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; | |
3112 | ||
39236c6e | 3113 | if (memorystatus_jetsam_snapshot_count == 0) { |
3e170ce0 | 3114 | memorystatus_init_jetsam_snapshot_locked(NULL,0); |
39236c6e A |
3115 | } |
3116 | ||
3e170ce0 | 3117 | memorystatus_update_jetsam_snapshot_entry_locked(p, cause); |
39236c6e | 3118 | proc_list_unlock(); |
4bd07ac2 A |
3119 | |
3120 | printf("memorystatus: specifically killing pid %d [%s] (%s %d) - memorystatus_available_pages: %d\n", | |
3121 | victim_pid, (p->p_comm ? p->p_comm : "(unknown)"), | |
3122 | jetsam_kill_cause_name[cause], p->p_memstat_effectivepriority, memorystatus_available_pages); | |
3123 | ||
39236c6e A |
3124 | |
3125 | killed = memorystatus_do_kill(p, cause); | |
3126 | proc_rele(p); | |
3127 | ||
3128 | return killed; | |
3129 | } | |
3130 | ||
3131 | /* | |
3132 | * Jetsam the first process in the queue. | |
3133 | */ | |
3134 | static boolean_t | |
3e170ce0 | 3135 | memorystatus_kill_top_process(boolean_t any, boolean_t sort_flag, uint32_t cause, int32_t *priority, uint32_t *errors) |
39236c6e A |
3136 | { |
3137 | pid_t aPid; | |
3138 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
3139 | boolean_t new_snapshot = FALSE, killed = FALSE; | |
3e170ce0 | 3140 | int kill_count = 0; |
39236c6e | 3141 | unsigned int i = 0; |
3e170ce0 | 3142 | uint32_t aPid_ep; |
b0d623f7 | 3143 | |
6d2010ae A |
3144 | #ifndef CONFIG_FREEZE |
3145 | #pragma unused(any) | |
3146 | #endif | |
316670eb | 3147 | |
39236c6e A |
3148 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, |
3149 | memorystatus_available_pages, 0, 0, 0, 0); | |
6d2010ae | 3150 | |
316670eb | 3151 | |
3e170ce0 A |
3152 | if (sort_flag == TRUE) { |
3153 | (void)memorystatus_sort_bucket(JETSAM_PRIORITY_FOREGROUND, JETSAM_SORT_DEFAULT); | |
3154 | } | |
3155 | ||
3156 | proc_list_lock(); | |
fe8ab488 | 3157 | |
39236c6e A |
3158 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
3159 | while (next_p) { | |
316670eb A |
3160 | #if DEVELOPMENT || DEBUG |
3161 | int activeProcess; | |
3162 | int procSuspendedForDiagnosis; | |
3163 | #endif /* DEVELOPMENT || DEBUG */ | |
39236c6e A |
3164 | |
3165 | p = next_p; | |
3166 | next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
3167 | ||
6d2010ae | 3168 | #if DEVELOPMENT || DEBUG |
39236c6e A |
3169 | activeProcess = p->p_memstat_state & P_MEMSTAT_FOREGROUND; |
3170 | procSuspendedForDiagnosis = p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED; | |
6d2010ae | 3171 | #endif /* DEVELOPMENT || DEBUG */ |
316670eb | 3172 | |
39236c6e | 3173 | aPid = p->p_pid; |
3e170ce0 | 3174 | aPid_ep = p->p_memstat_effectivepriority; |
316670eb | 3175 | |
39236c6e A |
3176 | if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { |
3177 | continue; | |
b0d623f7 | 3178 | } |
39236c6e | 3179 | |
6d2010ae | 3180 | #if DEVELOPMENT || DEBUG |
39236c6e A |
3181 | if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) { |
3182 | printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid); | |
3183 | continue; | |
3184 | } | |
6d2010ae | 3185 | #endif /* DEVELOPMENT || DEBUG */ |
316670eb | 3186 | |
fe8ab488 A |
3187 | if (cause == kMemorystatusKilledVnodes) |
3188 | { | |
3189 | /* | |
3190 | * If the system runs out of vnodes, we systematically jetsam | |
3191 | * processes in hopes of stumbling onto a vnode gain that helps | |
3192 | * the system recover. The process that happens to trigger | |
3193 | * this path has no known relationship to the vnode consumption. | |
3194 | * We attempt to safeguard that process e.g: do not jetsam it. | |
3195 | */ | |
3196 | ||
3197 | if (p == current_proc()) { | |
3198 | /* do not jetsam the current process */ | |
3199 | continue; | |
3200 | } | |
3201 | } | |
3202 | ||
6d2010ae | 3203 | #if CONFIG_FREEZE |
39236c6e A |
3204 | boolean_t skip; |
3205 | boolean_t reclaim_proc = !(p->p_memstat_state & (P_MEMSTAT_LOCKED | P_MEMSTAT_NORECLAIM)); | |
3206 | if (any || reclaim_proc) { | |
3207 | skip = FALSE; | |
3208 | } else { | |
3209 | skip = TRUE; | |
3210 | } | |
316670eb | 3211 | |
39236c6e A |
3212 | if (skip) { |
3213 | continue; | |
3214 | } else | |
6d2010ae | 3215 | #endif |
39236c6e | 3216 | { |
39236c6e A |
3217 | /* |
3218 | * Capture a snapshot if none exists and: | |
3219 | * - priority was not requested (this is something other than an ambient kill) | |
3220 | * - the priority was requested *and* the targeted process is not at idle priority | |
3221 | */ | |
3222 | if ((memorystatus_jetsam_snapshot_count == 0) && | |
fe8ab488 | 3223 | (memorystatus_idle_snapshot || ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE))))) { |
3e170ce0 | 3224 | memorystatus_init_jetsam_snapshot_locked(NULL,0); |
39236c6e A |
3225 | new_snapshot = TRUE; |
3226 | } | |
3227 | ||
3228 | /* | |
3229 | * Mark as terminated so that if exit1() indicates success, but the process (for example) | |
3230 | * is blocked in task_exception_notify(), it'll be skipped if encountered again - see | |
3231 | * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the | |
3232 | * acquisition of the proc lock. | |
3233 | */ | |
3234 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; | |
3235 | ||
6d2010ae | 3236 | #if DEVELOPMENT || DEBUG |
39236c6e A |
3237 | if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) { |
3238 | MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n", | |
3239 | aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level); | |
3e170ce0 | 3240 | memorystatus_update_jetsam_snapshot_entry_locked(p, kMemorystatusKilledDiagnostic); |
39236c6e A |
3241 | p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; |
3242 | if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) { | |
3243 | jetsam_diagnostic_suspended_one_active_proc = 1; | |
3244 | printf("jetsam: returning after suspending first active proc - %d\n", aPid); | |
3245 | } | |
3246 | ||
3247 | p = proc_ref_locked(p); | |
3248 | proc_list_unlock(); | |
3249 | if (p) { | |
316670eb | 3250 | task_suspend(p->task); |
3e170ce0 A |
3251 | if (priority) { |
3252 | *priority = aPid_ep; | |
3253 | } | |
316670eb | 3254 | proc_rele(p); |
39236c6e A |
3255 | killed = TRUE; |
3256 | } | |
3257 | ||
3258 | goto exit; | |
3259 | } else | |
6d2010ae | 3260 | #endif /* DEVELOPMENT || DEBUG */ |
39236c6e A |
3261 | { |
3262 | /* Shift queue, update stats */ | |
3e170ce0 A |
3263 | memorystatus_update_jetsam_snapshot_entry_locked(p, cause); |
3264 | ||
3265 | if (proc_ref_locked(p) == p) { | |
3266 | proc_list_unlock(); | |
3267 | printf("memorystatus: %s %d [%s] (%s %d) - memorystatus_available_pages: %d\n", | |
3268 | ((aPid_ep == JETSAM_PRIORITY_IDLE) ? | |
fe8ab488 A |
3269 | "idle exiting pid" : "jetsam killing pid"), |
3270 | aPid, (p->p_comm ? p->p_comm : "(unknown)"), | |
3e170ce0 A |
3271 | jetsam_kill_cause_name[cause], aPid_ep, memorystatus_available_pages); |
3272 | ||
39236c6e | 3273 | killed = memorystatus_do_kill(p, cause); |
3e170ce0 A |
3274 | |
3275 | /* Success? */ | |
3276 | if (killed) { | |
3277 | if (priority) { | |
3278 | *priority = aPid_ep; | |
3279 | } | |
3280 | proc_rele(p); | |
3281 | kill_count++; | |
3282 | goto exit; | |
3283 | } | |
39236c6e | 3284 | |
3e170ce0 A |
3285 | /* |
3286 | * Failure - first unwind the state, | |
3287 | * then fall through to restart the search. | |
3288 | */ | |
3289 | proc_list_lock(); | |
3290 | proc_rele_locked(p); | |
3291 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
3292 | p->p_memstat_state |= P_MEMSTAT_ERROR; | |
3293 | *errors += 1; | |
6d2010ae | 3294 | } |
39236c6e | 3295 | |
3e170ce0 A |
3296 | /* |
3297 | * Failure - restart the search. | |
3298 | * | |
3299 | * We might have raced with "p" exiting on another core, resulting in no | |
3300 | * ref on "p". Or, we may have failed to kill "p". | |
3301 | * | |
3302 | * Either way, we fall thru to here, leaving the proc in the | |
3303 | * P_MEMSTAT_TERMINATED state. | |
3304 | * | |
3305 | * And, we hold the the proc_list_lock at this point. | |
3306 | */ | |
3307 | ||
39236c6e A |
3308 | i = 0; |
3309 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
6d2010ae | 3310 | } |
b0d623f7 | 3311 | } |
b0d623f7 | 3312 | } |
316670eb | 3313 | |
39236c6e | 3314 | proc_list_unlock(); |
316670eb | 3315 | |
39236c6e A |
3316 | exit: |
3317 | /* Clear snapshot if freshly captured and no target was found */ | |
3318 | if (new_snapshot && !killed) { | |
3319 | memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
316670eb A |
3320 | } |
3321 | ||
39236c6e | 3322 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, |
3e170ce0 | 3323 | memorystatus_available_pages, killed ? aPid : 0, kill_count, 0, 0); |
b0d623f7 | 3324 | |
39236c6e | 3325 | return killed; |
316670eb A |
3326 | } |
3327 | ||
3e170ce0 A |
3328 | /* |
3329 | * Jetsam aggressively | |
3330 | */ | |
39236c6e | 3331 | static boolean_t |
3e170ce0 A |
3332 | memorystatus_kill_top_process_aggressive(boolean_t any, uint32_t cause, int aggr_count, int32_t priority_max, |
3333 | uint32_t *errors) | |
d1ecb069 | 3334 | { |
3e170ce0 | 3335 | pid_t aPid; |
39236c6e A |
3336 | proc_t p = PROC_NULL, next_p = PROC_NULL; |
3337 | boolean_t new_snapshot = FALSE, killed = FALSE; | |
3e170ce0 | 3338 | int kill_count = 0; |
39236c6e | 3339 | unsigned int i = 0; |
3e170ce0 A |
3340 | int32_t aPid_ep = 0; |
3341 | ||
3342 | #pragma unused(any) | |
3343 | ||
3344 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, | |
3345 | memorystatus_available_pages, priority_max, 0, 0, 0); | |
3346 | ||
39236c6e | 3347 | proc_list_lock(); |
3e170ce0 | 3348 | |
39236c6e A |
3349 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
3350 | while (next_p) { | |
3e170ce0 A |
3351 | #if DEVELOPMENT || DEBUG |
3352 | int activeProcess; | |
3353 | int procSuspendedForDiagnosis; | |
3354 | #endif /* DEVELOPMENT || DEBUG */ | |
39236c6e | 3355 | |
3e170ce0 A |
3356 | if ((unsigned int)(next_p->p_memstat_effectivepriority) != i) { |
3357 | ||
3358 | /* | |
3359 | * We have raced with next_p running on another core, as it has | |
3360 | * moved to a different jetsam priority band. This means we have | |
3361 | * lost our place in line while traversing the jetsam list. We | |
3362 | * attempt to recover by rewinding to the beginning of the band | |
3363 | * we were already traversing. By doing this, we do not guarantee | |
3364 | * that no process escapes this aggressive march, but we can make | |
3365 | * skipping an entire range of processes less likely. (PR-21069019) | |
3366 | */ | |
3367 | ||
3368 | MEMORYSTATUS_DEBUG(1, "memorystatus: aggressive%d: rewinding %s moved from band %d --> %d\n", | |
3369 | aggr_count, next_p->p_comm, i, next_p->p_memstat_effectivepriority); | |
3370 | ||
3371 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
3372 | continue; | |
3373 | } | |
3374 | ||
3375 | p = next_p; | |
3376 | next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
3377 | ||
3378 | if (p->p_memstat_effectivepriority > priority_max) { | |
3379 | /* | |
3380 | * Bail out of this killing spree if we have | |
3381 | * reached beyond the priority_max jetsam band. | |
3382 | * That is, we kill up to and through the | |
3383 | * priority_max jetsam band. | |
3384 | */ | |
3385 | proc_list_unlock(); | |
3386 | goto exit; | |
3387 | } | |
3388 | ||
3389 | #if DEVELOPMENT || DEBUG | |
3390 | activeProcess = p->p_memstat_state & P_MEMSTAT_FOREGROUND; | |
3391 | procSuspendedForDiagnosis = p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED; | |
3392 | #endif /* DEVELOPMENT || DEBUG */ | |
3393 | ||
3394 | aPid = p->p_pid; | |
3395 | aPid_ep = p->p_memstat_effectivepriority; | |
3396 | ||
3397 | if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { | |
3398 | continue; | |
3399 | } | |
3400 | ||
3401 | #if DEVELOPMENT || DEBUG | |
3402 | if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) { | |
3403 | printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid); | |
3404 | continue; | |
3405 | } | |
3406 | #endif /* DEVELOPMENT || DEBUG */ | |
3407 | ||
3408 | /* | |
3409 | * Capture a snapshot if none exists. | |
3410 | */ | |
3411 | if (memorystatus_jetsam_snapshot_count == 0) { | |
3412 | memorystatus_init_jetsam_snapshot_locked(NULL,0); | |
3413 | new_snapshot = TRUE; | |
3414 | } | |
3415 | ||
3416 | /* | |
3417 | * Mark as terminated so that if exit1() indicates success, but the process (for example) | |
3418 | * is blocked in task_exception_notify(), it'll be skipped if encountered again - see | |
3419 | * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the | |
3420 | * acquisition of the proc lock. | |
3421 | */ | |
3422 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; | |
3423 | ||
3424 | /* Shift queue, update stats */ | |
3425 | memorystatus_update_jetsam_snapshot_entry_locked(p, cause); | |
3426 | ||
3427 | /* | |
3428 | * In order to kill the target process, we will drop the proc_list_lock. | |
3429 | * To guaranteee that p and next_p don't disappear out from under the lock, | |
3430 | * we must take a ref on both. | |
3431 | * If we cannot get a reference, then it's likely we've raced with | |
3432 | * that process exiting on another core. | |
3433 | */ | |
3434 | if (proc_ref_locked(p) == p) { | |
3435 | if (next_p) { | |
3436 | while (next_p && (proc_ref_locked(next_p) != next_p)) { | |
3437 | proc_t temp_p; | |
3438 | ||
3439 | /* | |
3440 | * We must have raced with next_p exiting on another core. | |
3441 | * Recover by getting the next eligible process in the band. | |
3442 | */ | |
3443 | ||
3444 | MEMORYSTATUS_DEBUG(1, "memorystatus: aggressive%d: skipping %d [%s] (exiting?)\n", | |
3445 | aggr_count, next_p->p_pid, (next_p->p_comm ? next_p->p_comm : "(unknown)")); | |
3446 | ||
3447 | temp_p = next_p; | |
3448 | next_p = memorystatus_get_next_proc_locked(&i, temp_p, TRUE); | |
3449 | } | |
3450 | } | |
3451 | proc_list_unlock(); | |
3452 | ||
3453 | printf("memorystatus: aggressive%d: %s %d [%s] (%s %d) - memorystatus_available_pages: %d\n", | |
3454 | aggr_count, | |
3455 | ((aPid_ep == JETSAM_PRIORITY_IDLE) ? "idle exiting pid" : "jetsam killing pid"), | |
3456 | aPid, (p->p_comm ? p->p_comm : "(unknown)"), | |
3457 | jetsam_kill_cause_name[cause], aPid_ep, memorystatus_available_pages); | |
3458 | ||
3459 | killed = memorystatus_do_kill(p, cause); | |
3460 | ||
3461 | /* Success? */ | |
3462 | if (killed) { | |
3463 | proc_rele(p); | |
3464 | kill_count++; | |
3465 | p = NULL; | |
3466 | killed = FALSE; | |
3467 | ||
3468 | /* | |
3469 | * Continue the killing spree. | |
3470 | */ | |
3471 | proc_list_lock(); | |
3472 | if (next_p) { | |
3473 | proc_rele_locked(next_p); | |
3474 | } | |
3475 | continue; | |
3476 | } | |
3477 | ||
3478 | /* | |
3479 | * Failure - first unwind the state, | |
3480 | * then fall through to restart the search. | |
3481 | */ | |
3482 | proc_list_lock(); | |
3483 | proc_rele_locked(p); | |
3484 | if (next_p) { | |
3485 | proc_rele_locked(next_p); | |
3486 | } | |
3487 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
3488 | p->p_memstat_state |= P_MEMSTAT_ERROR; | |
3489 | *errors += 1; | |
3490 | p = NULL; | |
3491 | } | |
3492 | ||
3493 | /* | |
3494 | * Failure - restart the search at the beginning of | |
3495 | * the band we were already traversing. | |
3496 | * | |
3497 | * We might have raced with "p" exiting on another core, resulting in no | |
3498 | * ref on "p". Or, we may have failed to kill "p". | |
3499 | * | |
3500 | * Either way, we fall thru to here, leaving the proc in the | |
3501 | * P_MEMSTAT_TERMINATED or P_MEMSTAT_ERROR state. | |
3502 | * | |
3503 | * And, we hold the the proc_list_lock at this point. | |
3504 | */ | |
3505 | ||
3506 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
3507 | } | |
3508 | ||
3509 | proc_list_unlock(); | |
3510 | ||
3511 | exit: | |
3512 | /* Clear snapshot if freshly captured and no target was found */ | |
3513 | if (new_snapshot && (kill_count == 0)) { | |
3514 | memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
3515 | } | |
3516 | ||
3517 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, | |
3518 | memorystatus_available_pages, killed ? aPid : 0, kill_count, 0, 0); | |
3519 | ||
3520 | if (kill_count > 0) { | |
3521 | return(TRUE); | |
3522 | } | |
3523 | else { | |
3524 | return(FALSE); | |
3525 | } | |
3526 | } | |
3527 | ||
3528 | #if LEGACY_HIWATER | |
3529 | ||
3530 | static boolean_t | |
3531 | memorystatus_kill_hiwat_proc(uint32_t *errors) | |
3532 | { | |
3533 | pid_t aPid = 0; | |
3534 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
3535 | boolean_t new_snapshot = FALSE, killed = FALSE; | |
3536 | int kill_count = 0; | |
3537 | unsigned int i = 0; | |
3538 | uint32_t aPid_ep; | |
3539 | ||
3540 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START, | |
3541 | memorystatus_available_pages, 0, 0, 0, 0); | |
3542 | ||
3543 | proc_list_lock(); | |
3544 | ||
3545 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
3546 | while (next_p) { | |
3547 | uint32_t footprint; | |
3548 | boolean_t skip; | |
3549 | ||
3550 | p = next_p; | |
3551 | next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
3552 | ||
39236c6e | 3553 | aPid = p->p_pid; |
3e170ce0 | 3554 | aPid_ep = p->p_memstat_effectivepriority; |
316670eb | 3555 | |
39236c6e A |
3556 | if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { |
3557 | continue; | |
3558 | } | |
3559 | ||
3560 | /* skip if no limit set */ | |
3561 | if (p->p_memstat_memlimit <= 0) { | |
3562 | continue; | |
d1ecb069 | 3563 | } |
3e170ce0 A |
3564 | |
3565 | #if 0 | |
3566 | /* | |
3567 | * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore. | |
3568 | * Background limits are described via the inactive limit slots. | |
3569 | * Their fatal/non-fatal setting will drive whether or not to be | |
3570 | * considered in this kill path. | |
3571 | */ | |
3572 | ||
39236c6e A |
3573 | /* skip if a currently inapplicable limit is encountered */ |
3574 | if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { | |
3575 | continue; | |
3576 | } | |
3e170ce0 | 3577 | #endif |
39236c6e A |
3578 | |
3579 | footprint = (uint32_t)(get_task_phys_footprint(p->task) / (1024 * 1024)); | |
3580 | skip = (((int32_t)footprint) <= p->p_memstat_memlimit); | |
3e170ce0 | 3581 | |
6d2010ae | 3582 | #if DEVELOPMENT || DEBUG |
39236c6e A |
3583 | if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) { |
3584 | if (p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED) { | |
3585 | continue; | |
6d2010ae | 3586 | } |
39236c6e | 3587 | } |
6d2010ae | 3588 | #endif /* DEVELOPMENT || DEBUG */ |
316670eb | 3589 | |
6d2010ae | 3590 | #if CONFIG_FREEZE |
39236c6e A |
3591 | if (!skip) { |
3592 | if (p->p_memstat_state & P_MEMSTAT_LOCKED) { | |
3593 | skip = TRUE; | |
3594 | } else { | |
3595 | skip = FALSE; | |
3596 | } | |
3597 | } | |
6d2010ae | 3598 | #endif |
316670eb | 3599 | |
39236c6e A |
3600 | if (skip) { |
3601 | continue; | |
3602 | } else { | |
fe8ab488 A |
3603 | MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d Mb > 1 (%d Mb)\n", |
3604 | (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, footprint, p->p_memstat_memlimit); | |
39236c6e A |
3605 | |
3606 | if (memorystatus_jetsam_snapshot_count == 0) { | |
3e170ce0 | 3607 | memorystatus_init_jetsam_snapshot_locked(NULL,0); |
39236c6e A |
3608 | new_snapshot = TRUE; |
3609 | } | |
3610 | ||
3611 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; | |
3612 | ||
6d2010ae | 3613 | #if DEVELOPMENT || DEBUG |
39236c6e A |
3614 | if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { |
3615 | MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages); | |
3e170ce0 | 3616 | memorystatus_update_jetsam_snapshot_entry_locked(p, kMemorystatusKilledDiagnostic); |
39236c6e A |
3617 | p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; |
3618 | ||
3619 | p = proc_ref_locked(p); | |
3620 | proc_list_unlock(); | |
3621 | if (p) { | |
6d2010ae A |
3622 | task_suspend(p->task); |
3623 | proc_rele(p); | |
39236c6e A |
3624 | killed = TRUE; |
3625 | } | |
3626 | ||
3627 | goto exit; | |
3628 | } else | |
6d2010ae | 3629 | #endif /* DEVELOPMENT || DEBUG */ |
39236c6e | 3630 | { |
3e170ce0 | 3631 | memorystatus_update_jetsam_snapshot_entry_locked(p, kMemorystatusKilledHiwat); |
39236c6e | 3632 | |
3e170ce0 A |
3633 | if (proc_ref_locked(p) == p) { |
3634 | proc_list_unlock(); | |
3635 | ||
3636 | printf("memorystatus: jetsam killing pid %d [%s] (highwater %d) - memorystatus_available_pages: %d\n", | |
3637 | aPid, (p->p_comm ? p->p_comm : "(unknown)"), aPid_ep, memorystatus_available_pages); | |
3638 | ||
3639 | killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat); | |
39236c6e | 3640 | |
3e170ce0 A |
3641 | /* Success? */ |
3642 | if (killed) { | |
3643 | proc_rele(p); | |
3644 | kill_count++; | |
3645 | goto exit; | |
3646 | } | |
3647 | ||
3648 | /* | |
3649 | * Failure - first unwind the state, | |
3650 | * then fall through to restart the search. | |
3651 | */ | |
3652 | proc_list_lock(); | |
3653 | proc_rele_locked(p); | |
3654 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
3655 | p->p_memstat_state |= P_MEMSTAT_ERROR; | |
3656 | *errors += 1; | |
6d2010ae | 3657 | } |
6d2010ae | 3658 | |
3e170ce0 A |
3659 | /* |
3660 | * Failure - restart the search. | |
3661 | * | |
3662 | * We might have raced with "p" exiting on another core, resulting in no | |
3663 | * ref on "p". Or, we may have failed to kill "p". | |
3664 | * | |
3665 | * Either way, we fall thru to here, leaving the proc in the | |
3666 | * P_MEMSTAT_TERMINATED state. | |
3667 | * | |
3668 | * And, we hold the the proc_list_lock at this point. | |
3669 | */ | |
3670 | ||
39236c6e A |
3671 | i = 0; |
3672 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
3673 | } | |
6d2010ae A |
3674 | } |
3675 | } | |
316670eb | 3676 | |
39236c6e | 3677 | proc_list_unlock(); |
316670eb | 3678 | |
39236c6e A |
3679 | exit: |
3680 | /* Clear snapshot if freshly captured and no target was found */ | |
3681 | if (new_snapshot && !killed) { | |
3682 | memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
316670eb A |
3683 | } |
3684 | ||
39236c6e | 3685 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END, |
3e170ce0 | 3686 | memorystatus_available_pages, killed ? aPid : 0, kill_count, 0, 0); |
6d2010ae | 3687 | |
39236c6e | 3688 | return killed; |
316670eb | 3689 | } |
2d21ac55 | 3690 | |
39236c6e | 3691 | #endif /* LEGACY_HIWATER */ |
316670eb | 3692 | |
39236c6e A |
3693 | static boolean_t |
3694 | memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause) { | |
3695 | /* TODO: allow a general async path */ | |
fe8ab488 A |
3696 | if ((victim_pid != -1) || (cause != kMemorystatusKilledVMPageShortage && cause != kMemorystatusKilledVMThrashing && |
3697 | cause != kMemorystatusKilledFCThrashing)) { | |
39236c6e | 3698 | return FALSE; |
316670eb | 3699 | } |
39236c6e | 3700 | |
fe8ab488 | 3701 | kill_under_pressure_cause = cause; |
39236c6e A |
3702 | memorystatus_thread_wake(); |
3703 | return TRUE; | |
3704 | } | |
2d21ac55 | 3705 | |
39236c6e A |
3706 | static boolean_t |
3707 | memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) { | |
3708 | boolean_t res; | |
3709 | uint32_t errors = 0; | |
3710 | ||
3711 | if (victim_pid == -1) { | |
3712 | /* No pid, so kill first process */ | |
3e170ce0 | 3713 | res = memorystatus_kill_top_process(TRUE, TRUE, cause, NULL, &errors); |
39236c6e A |
3714 | } else { |
3715 | res = memorystatus_kill_specific_process(victim_pid, cause); | |
3716 | } | |
3717 | ||
3718 | if (errors) { | |
3719 | memorystatus_clear_errors(); | |
3720 | } | |
3721 | ||
3722 | if (res == TRUE) { | |
3723 | /* Fire off snapshot notification */ | |
3724 | size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + | |
3725 | sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count; | |
3e170ce0 A |
3726 | uint64_t timestamp_now = mach_absolute_time(); |
3727 | memorystatus_jetsam_snapshot->notification_time = timestamp_now; | |
3728 | if (memorystatus_jetsam_snapshot_last_timestamp == 0 || | |
3729 | timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout) { | |
3730 | int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); | |
3731 | if (!ret) { | |
3732 | proc_list_lock(); | |
3733 | memorystatus_jetsam_snapshot_last_timestamp = timestamp_now; | |
3734 | proc_list_unlock(); | |
3735 | } | |
3736 | } | |
39236c6e A |
3737 | } |
3738 | ||
3739 | return res; | |
3740 | } | |
b0d623f7 | 3741 | |
39236c6e A |
3742 | boolean_t |
3743 | memorystatus_kill_on_VM_page_shortage(boolean_t async) { | |
3744 | if (async) { | |
3745 | return memorystatus_kill_process_async(-1, kMemorystatusKilledVMPageShortage); | |
3746 | } else { | |
3747 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMPageShortage); | |
3748 | } | |
3749 | } | |
2d21ac55 | 3750 | |
39236c6e A |
3751 | boolean_t |
3752 | memorystatus_kill_on_VM_thrashing(boolean_t async) { | |
3753 | if (async) { | |
3754 | return memorystatus_kill_process_async(-1, kMemorystatusKilledVMThrashing); | |
3755 | } else { | |
3756 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMThrashing); | |
2d21ac55 A |
3757 | } |
3758 | } | |
b0d623f7 | 3759 | |
fe8ab488 A |
3760 | boolean_t |
3761 | memorystatus_kill_on_FC_thrashing(boolean_t async) { | |
3762 | if (async) { | |
3763 | return memorystatus_kill_process_async(-1, kMemorystatusKilledFCThrashing); | |
3764 | } else { | |
3765 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledFCThrashing); | |
3766 | } | |
3767 | } | |
3768 | ||
39236c6e A |
3769 | boolean_t |
3770 | memorystatus_kill_on_vnode_limit(void) { | |
3771 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes); | |
3772 | } | |
3773 | ||
316670eb A |
3774 | #endif /* CONFIG_JETSAM */ |
3775 | ||
6d2010ae A |
3776 | #if CONFIG_FREEZE |
3777 | ||
3778 | __private_extern__ void | |
316670eb | 3779 | memorystatus_freeze_init(void) |
6d2010ae | 3780 | { |
316670eb A |
3781 | kern_return_t result; |
3782 | thread_t thread; | |
3e170ce0 A |
3783 | |
3784 | freezer_lck_grp_attr = lck_grp_attr_alloc_init(); | |
3785 | freezer_lck_grp = lck_grp_alloc_init("freezer", freezer_lck_grp_attr); | |
3786 | ||
3787 | lck_mtx_init(&freezer_mutex, freezer_lck_grp, NULL); | |
39236c6e | 3788 | |
316670eb A |
3789 | result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread); |
3790 | if (result == KERN_SUCCESS) { | |
3791 | thread_deallocate(thread); | |
3792 | } else { | |
3793 | panic("Could not create memorystatus_freeze_thread"); | |
3794 | } | |
6d2010ae A |
3795 | } |
3796 | ||
3e170ce0 A |
3797 | /* |
3798 | * Synchronously freeze the passed proc. Called with a reference to the proc held. | |
3799 | * | |
3800 | * Returns EINVAL or the value returned by task_freeze(). | |
3801 | */ | |
3802 | int | |
3803 | memorystatus_freeze_process_sync(proc_t p) | |
3804 | { | |
3805 | int ret = EINVAL; | |
3806 | pid_t aPid = 0; | |
3807 | boolean_t memorystatus_freeze_swap_low = FALSE; | |
3808 | ||
3809 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, | |
3810 | memorystatus_available_pages, 0, 0, 0, 0); | |
3811 | ||
3812 | lck_mtx_lock(&freezer_mutex); | |
3813 | ||
3814 | if (p == NULL) { | |
3815 | goto exit; | |
3816 | } | |
3817 | ||
3818 | if (memorystatus_freeze_enabled == FALSE) { | |
3819 | goto exit; | |
3820 | } | |
3821 | ||
3822 | if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { | |
3823 | goto exit; | |
3824 | } | |
3825 | ||
3826 | if (memorystatus_freeze_update_throttle()) { | |
3827 | printf("memorystatus_freeze_process_sync: in throttle, ignorning freeze\n"); | |
3828 | memorystatus_freeze_throttle_count++; | |
3829 | goto exit; | |
3830 | } | |
3831 | ||
3832 | proc_list_lock(); | |
3833 | ||
3834 | if (p != NULL) { | |
3835 | uint32_t purgeable, wired, clean, dirty, state; | |
3836 | uint32_t max_pages, pages, i; | |
3837 | boolean_t shared; | |
3838 | ||
3839 | aPid = p->p_pid; | |
3840 | state = p->p_memstat_state; | |
3841 | ||
3842 | /* Ensure the process is eligible for freezing */ | |
3843 | if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FROZEN)) || !(state & P_MEMSTAT_SUSPENDED)) { | |
3844 | proc_list_unlock(); | |
3845 | goto exit; | |
3846 | } | |
3847 | ||
3848 | /* Only freeze processes meeting our minimum resident page criteria */ | |
3849 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); | |
3850 | if (pages < memorystatus_freeze_pages_min) { | |
3851 | proc_list_unlock(); | |
3852 | goto exit; | |
3853 | } | |
3854 | ||
3855 | if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { | |
3856 | ||
3857 | unsigned int avail_swap_space = 0; /* in pages. */ | |
3858 | ||
3859 | if (DEFAULT_FREEZER_IS_ACTIVE) { | |
3860 | /* | |
3861 | * Freezer backed by default pager and swap file(s). | |
3862 | */ | |
3863 | avail_swap_space = default_pager_swap_pages_free(); | |
3864 | } else { | |
3865 | /* | |
3866 | * Freezer backed by the compressor and swap file(s) | |
3867 | * while will hold compressed data. | |
3868 | */ | |
3869 | avail_swap_space = vm_swap_get_free_space() / PAGE_SIZE_64; | |
3870 | } | |
3871 | ||
3872 | max_pages = MIN(avail_swap_space, memorystatus_freeze_pages_max); | |
3873 | ||
3874 | if (max_pages < memorystatus_freeze_pages_min) { | |
3875 | proc_list_unlock(); | |
3876 | goto exit; | |
3877 | } | |
3878 | } else { | |
3879 | /* | |
3880 | * We only have the compressor without any swap. | |
3881 | */ | |
3882 | max_pages = UINT32_MAX - 1; | |
3883 | } | |
3884 | ||
3885 | /* Mark as locked temporarily to avoid kill */ | |
3886 | p->p_memstat_state |= P_MEMSTAT_LOCKED; | |
3887 | proc_list_unlock(); | |
3888 | ||
3889 | ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); | |
3890 | ||
3891 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_process_sync: task_freeze %s for pid %d [%s] - " | |
3892 | "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", | |
3893 | (ret == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"), | |
3894 | memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free()); | |
3895 | ||
3896 | proc_list_lock(); | |
3897 | p->p_memstat_state &= ~P_MEMSTAT_LOCKED; | |
3898 | ||
3899 | if (ret == KERN_SUCCESS) { | |
3900 | memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; | |
3901 | ||
3902 | memorystatus_frozen_count++; | |
3903 | ||
3904 | p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM)); | |
3905 | ||
3906 | if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { | |
3907 | /* Update stats */ | |
3908 | for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { | |
3909 | throttle_intervals[i].pageouts += dirty; | |
3910 | } | |
3911 | } | |
3912 | ||
3913 | memorystatus_freeze_pageouts += dirty; | |
3914 | memorystatus_freeze_count++; | |
3915 | ||
3916 | proc_list_unlock(); | |
3917 | ||
3918 | memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); | |
3919 | } else { | |
3920 | proc_list_unlock(); | |
3921 | } | |
3922 | } | |
3923 | ||
3924 | exit: | |
3925 | lck_mtx_unlock(&freezer_mutex); | |
3926 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, | |
3927 | memorystatus_available_pages, aPid, 0, 0, 0); | |
3928 | ||
3929 | return ret; | |
3930 | } | |
3931 | ||
316670eb | 3932 | static int |
39236c6e | 3933 | memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low) |
6d2010ae | 3934 | { |
39236c6e A |
3935 | pid_t aPid = 0; |
3936 | int ret = -1; | |
3937 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
3938 | unsigned int i = 0; | |
6d2010ae | 3939 | |
39236c6e A |
3940 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, |
3941 | memorystatus_available_pages, 0, 0, 0, 0); | |
3942 | ||
3943 | proc_list_lock(); | |
6d2010ae | 3944 | |
39236c6e A |
3945 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
3946 | while (next_p) { | |
3947 | kern_return_t kr; | |
3948 | uint32_t purgeable, wired, clean, dirty; | |
3949 | boolean_t shared; | |
3950 | uint32_t pages; | |
3951 | uint32_t max_pages = 0; | |
316670eb A |
3952 | uint32_t state; |
3953 | ||
39236c6e A |
3954 | p = next_p; |
3955 | next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
6d2010ae | 3956 | |
39236c6e A |
3957 | aPid = p->p_pid; |
3958 | state = p->p_memstat_state; | |
6d2010ae | 3959 | |
316670eb | 3960 | /* Ensure the process is eligible for freezing */ |
39236c6e | 3961 | if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FROZEN)) || !(state & P_MEMSTAT_SUSPENDED)) { |
316670eb A |
3962 | continue; // with lock held |
3963 | } | |
316670eb | 3964 | |
39236c6e | 3965 | /* Only freeze processes meeting our minimum resident page criteria */ |
fe8ab488 | 3966 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); |
39236c6e A |
3967 | if (pages < memorystatus_freeze_pages_min) { |
3968 | continue; // with lock held | |
3969 | } | |
6d2010ae | 3970 | |
fe8ab488 | 3971 | if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { |
3e170ce0 A |
3972 | |
3973 | /* Ensure there's enough free space to freeze this process. */ | |
3974 | ||
3975 | unsigned int avail_swap_space = 0; /* in pages. */ | |
3976 | ||
3977 | if (DEFAULT_FREEZER_IS_ACTIVE) { | |
3978 | /* | |
3979 | * Freezer backed by default pager and swap file(s). | |
3980 | */ | |
3981 | avail_swap_space = default_pager_swap_pages_free(); | |
3982 | } else { | |
3983 | /* | |
3984 | * Freezer backed by the compressor and swap file(s) | |
3985 | * while will hold compressed data. | |
3986 | */ | |
3987 | avail_swap_space = vm_swap_get_free_space() / PAGE_SIZE_64; | |
3988 | } | |
3989 | ||
3990 | max_pages = MIN(avail_swap_space, memorystatus_freeze_pages_max); | |
3991 | ||
316670eb A |
3992 | if (max_pages < memorystatus_freeze_pages_min) { |
3993 | *memorystatus_freeze_swap_low = TRUE; | |
39236c6e A |
3994 | proc_list_unlock(); |
3995 | goto exit; | |
316670eb | 3996 | } |
39236c6e | 3997 | } else { |
3e170ce0 A |
3998 | /* |
3999 | * We only have the compressor pool. | |
4000 | */ | |
39236c6e A |
4001 | max_pages = UINT32_MAX - 1; |
4002 | } | |
4003 | ||
4004 | /* Mark as locked temporarily to avoid kill */ | |
4005 | p->p_memstat_state |= P_MEMSTAT_LOCKED; | |
4006 | ||
4007 | p = proc_ref_locked(p); | |
4008 | proc_list_unlock(); | |
4009 | if (!p) { | |
4010 | goto exit; | |
4011 | } | |
4012 | ||
4013 | kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); | |
4014 | ||
4015 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - " | |
4016 | "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", | |
4017 | (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"), | |
4018 | memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free()); | |
4019 | ||
4020 | proc_list_lock(); | |
4021 | p->p_memstat_state &= ~P_MEMSTAT_LOCKED; | |
4022 | ||
4023 | /* Success? */ | |
4024 | if (KERN_SUCCESS == kr) { | |
4025 | memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; | |
316670eb | 4026 | |
39236c6e | 4027 | memorystatus_frozen_count++; |
316670eb | 4028 | |
39236c6e A |
4029 | p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM)); |
4030 | ||
3e170ce0 A |
4031 | if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { |
4032 | /* Update stats */ | |
4033 | for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { | |
4034 | throttle_intervals[i].pageouts += dirty; | |
4035 | } | |
39236c6e | 4036 | } |
3e170ce0 | 4037 | |
39236c6e A |
4038 | memorystatus_freeze_pageouts += dirty; |
4039 | memorystatus_freeze_count++; | |
4040 | ||
4041 | proc_list_unlock(); | |
6d2010ae | 4042 | |
39236c6e | 4043 | memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); |
6d2010ae | 4044 | |
3e170ce0 A |
4045 | /* Return KERN_SUCESS */ |
4046 | ret = kr; | |
6d2010ae | 4047 | |
39236c6e A |
4048 | } else { |
4049 | proc_list_unlock(); | |
316670eb | 4050 | } |
39236c6e A |
4051 | |
4052 | proc_rele(p); | |
4053 | goto exit; | |
6d2010ae | 4054 | } |
316670eb | 4055 | |
39236c6e A |
4056 | proc_list_unlock(); |
4057 | ||
4058 | exit: | |
4059 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, | |
4060 | memorystatus_available_pages, aPid, 0, 0, 0); | |
316670eb | 4061 | |
39236c6e | 4062 | return ret; |
6d2010ae A |
4063 | } |
4064 | ||
316670eb A |
4065 | static inline boolean_t |
4066 | memorystatus_can_freeze_processes(void) | |
6d2010ae | 4067 | { |
316670eb | 4068 | boolean_t ret; |
6d2010ae | 4069 | |
39236c6e | 4070 | proc_list_lock(); |
316670eb A |
4071 | |
4072 | if (memorystatus_suspended_count) { | |
4073 | uint32_t average_resident_pages, estimated_processes; | |
4074 | ||
4075 | /* Estimate the number of suspended processes we can fit */ | |
39236c6e | 4076 | average_resident_pages = memorystatus_suspended_footprint_total / memorystatus_suspended_count; |
316670eb A |
4077 | estimated_processes = memorystatus_suspended_count + |
4078 | ((memorystatus_available_pages - memorystatus_available_pages_critical) / average_resident_pages); | |
4079 | ||
4080 | /* If it's predicted that no freeze will occur, lower the threshold temporarily */ | |
4081 | if (estimated_processes <= FREEZE_SUSPENDED_THRESHOLD_DEFAULT) { | |
4082 | memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_LOW; | |
6d2010ae | 4083 | } else { |
39236c6e | 4084 | memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; |
6d2010ae | 4085 | } |
6d2010ae | 4086 | |
316670eb A |
4087 | MEMORYSTATUS_DEBUG(1, "memorystatus_can_freeze_processes: %d suspended processes, %d average resident pages / process, %d suspended processes estimated\n", |
4088 | memorystatus_suspended_count, average_resident_pages, estimated_processes); | |
6d2010ae | 4089 | |
316670eb A |
4090 | if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) { |
4091 | ret = TRUE; | |
4092 | } else { | |
4093 | ret = FALSE; | |
6d2010ae | 4094 | } |
316670eb A |
4095 | } else { |
4096 | ret = FALSE; | |
6d2010ae | 4097 | } |
316670eb | 4098 | |
39236c6e | 4099 | proc_list_unlock(); |
6d2010ae | 4100 | |
316670eb | 4101 | return ret; |
6d2010ae A |
4102 | } |
4103 | ||
316670eb A |
4104 | static boolean_t |
4105 | memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low) | |
6d2010ae | 4106 | { |
3e170ce0 A |
4107 | boolean_t can_freeze = TRUE; |
4108 | ||
316670eb A |
4109 | /* Only freeze if we're sufficiently low on memory; this holds off freeze right |
4110 | after boot, and is generally is a no-op once we've reached steady state. */ | |
4111 | if (memorystatus_available_pages > memorystatus_freeze_threshold) { | |
4112 | return FALSE; | |
4113 | } | |
4114 | ||
4115 | /* Check minimum suspended process threshold. */ | |
4116 | if (!memorystatus_can_freeze_processes()) { | |
4117 | return FALSE; | |
4118 | } | |
6d2010ae | 4119 | |
3e170ce0 A |
4120 | if (COMPRESSED_PAGER_IS_SWAPLESS || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) { |
4121 | /* | |
4122 | * In-core compressor used for freezing WITHOUT on-disk swap support. | |
4123 | */ | |
4124 | ||
4125 | if (vm_compressor_low_on_space()) { | |
4126 | if (*memorystatus_freeze_swap_low) { | |
4127 | *memorystatus_freeze_swap_low = TRUE; | |
4128 | } | |
4129 | ||
4130 | can_freeze = FALSE; | |
4131 | ||
4132 | } else { | |
4133 | if (*memorystatus_freeze_swap_low) { | |
4134 | *memorystatus_freeze_swap_low = FALSE; | |
4135 | } | |
4136 | ||
4137 | can_freeze = TRUE; | |
4138 | } | |
4139 | } else { | |
4140 | /* | |
4141 | * Freezing WITH on-disk swap support. | |
4142 | */ | |
4143 | ||
4144 | if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { | |
4145 | /* | |
4146 | * In-core compressor fronts the swap. | |
4147 | */ | |
4148 | if (vm_swap_low_on_space()) { | |
4149 | if (*memorystatus_freeze_swap_low) { | |
4150 | *memorystatus_freeze_swap_low = TRUE; | |
4151 | } | |
4152 | ||
4153 | can_freeze = FALSE; | |
4154 | } | |
4155 | ||
4156 | } else if (DEFAULT_FREEZER_IS_ACTIVE) { | |
4157 | /* | |
4158 | * Legacy freeze mode with no compressor support. | |
4159 | */ | |
4160 | if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) { | |
4161 | if (*memorystatus_freeze_swap_low) { | |
4162 | *memorystatus_freeze_swap_low = TRUE; | |
4163 | } | |
4164 | ||
4165 | can_freeze = FALSE; | |
4166 | } | |
4167 | } else { | |
4168 | panic("Not a valid freeze configuration.\n"); | |
316670eb | 4169 | } |
6d2010ae A |
4170 | } |
4171 | ||
3e170ce0 | 4172 | return can_freeze; |
6d2010ae A |
4173 | } |
4174 | ||
4175 | static void | |
316670eb | 4176 | memorystatus_freeze_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval) |
6d2010ae | 4177 | { |
3e170ce0 | 4178 | unsigned int freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE); |
6d2010ae A |
4179 | if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) { |
4180 | if (!interval->max_pageouts) { | |
3e170ce0 | 4181 | interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * freeze_daily_pageouts_max) / (24 * 60))); |
6d2010ae | 4182 | } else { |
316670eb | 4183 | printf("memorystatus_freeze_update_throttle_interval: %d minute throttle timeout, resetting\n", interval->mins); |
6d2010ae A |
4184 | } |
4185 | interval->ts.tv_sec = interval->mins * 60; | |
4186 | interval->ts.tv_nsec = 0; | |
4187 | ADD_MACH_TIMESPEC(&interval->ts, ts); | |
316670eb | 4188 | /* Since we update the throttle stats pre-freeze, adjust for overshoot here */ |
6d2010ae A |
4189 | if (interval->pageouts > interval->max_pageouts) { |
4190 | interval->pageouts -= interval->max_pageouts; | |
4191 | } else { | |
4192 | interval->pageouts = 0; | |
4193 | } | |
4194 | interval->throttle = FALSE; | |
4195 | } else if (!interval->throttle && interval->pageouts >= interval->max_pageouts) { | |
316670eb | 4196 | printf("memorystatus_freeze_update_throttle_interval: %d minute pageout limit exceeded; enabling throttle\n", interval->mins); |
6d2010ae A |
4197 | interval->throttle = TRUE; |
4198 | } | |
316670eb A |
4199 | |
4200 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n", | |
6d2010ae A |
4201 | interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60, |
4202 | interval->throttle ? "on" : "off"); | |
6d2010ae A |
4203 | } |
4204 | ||
4205 | static boolean_t | |
316670eb | 4206 | memorystatus_freeze_update_throttle(void) |
6d2010ae A |
4207 | { |
4208 | clock_sec_t sec; | |
4209 | clock_nsec_t nsec; | |
4210 | mach_timespec_t ts; | |
4211 | uint32_t i; | |
4212 | boolean_t throttled = FALSE; | |
4213 | ||
4214 | #if DEVELOPMENT || DEBUG | |
316670eb | 4215 | if (!memorystatus_freeze_throttle_enabled) |
6d2010ae A |
4216 | return FALSE; |
4217 | #endif | |
4218 | ||
4219 | clock_get_system_nanotime(&sec, &nsec); | |
4220 | ts.tv_sec = sec; | |
4221 | ts.tv_nsec = nsec; | |
4222 | ||
316670eb | 4223 | /* Check freeze pageouts over multiple intervals and throttle if we've exceeded our budget. |
6d2010ae | 4224 | * |
316670eb | 4225 | * This ensures that periods of inactivity can't be used as 'credit' towards freeze if the device has |
6d2010ae A |
4226 | * remained dormant for a long period. We do, however, allow increased thresholds for shorter intervals in |
4227 | * order to allow for bursts of activity. | |
4228 | */ | |
4229 | for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { | |
316670eb | 4230 | memorystatus_freeze_update_throttle_interval(&ts, &throttle_intervals[i]); |
6d2010ae A |
4231 | if (throttle_intervals[i].throttle == TRUE) |
4232 | throttled = TRUE; | |
4233 | } | |
4234 | ||
4235 | return throttled; | |
4236 | } | |
4237 | ||
4238 | static void | |
316670eb | 4239 | memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) |
6d2010ae | 4240 | { |
316670eb | 4241 | static boolean_t memorystatus_freeze_swap_low = FALSE; |
3e170ce0 A |
4242 | |
4243 | lck_mtx_lock(&freezer_mutex); | |
316670eb A |
4244 | if (memorystatus_freeze_enabled) { |
4245 | if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { | |
3e170ce0 A |
4246 | /* Only freeze if we've not exceeded our pageout budgets.*/ |
4247 | if (!memorystatus_freeze_update_throttle()) { | |
39236c6e | 4248 | memorystatus_freeze_top_process(&memorystatus_freeze_swap_low); |
316670eb A |
4249 | } else { |
4250 | printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n"); | |
4251 | memorystatus_freeze_throttle_count++; /* Throttled, update stats */ | |
4252 | } | |
4253 | } | |
4254 | } | |
3e170ce0 | 4255 | lck_mtx_unlock(&freezer_mutex); |
6d2010ae | 4256 | |
316670eb A |
4257 | assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT); |
4258 | thread_block((thread_continue_t) memorystatus_freeze_thread); | |
4259 | } | |
4260 | ||
4261 | #endif /* CONFIG_FREEZE */ | |
6d2010ae | 4262 | |
fe8ab488 | 4263 | #if VM_PRESSURE_EVENTS |
6d2010ae | 4264 | |
fe8ab488 | 4265 | #if CONFIG_MEMORYSTATUS |
316670eb | 4266 | |
fe8ab488 A |
4267 | static int |
4268 | memorystatus_send_note(int event_code, void *data, size_t data_length) { | |
4269 | int ret; | |
4270 | struct kev_msg ev_msg; | |
316670eb | 4271 | |
fe8ab488 A |
4272 | ev_msg.vendor_code = KEV_VENDOR_APPLE; |
4273 | ev_msg.kev_class = KEV_SYSTEM_CLASS; | |
4274 | ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS; | |
4275 | ||
4276 | ev_msg.event_code = event_code; | |
4277 | ||
4278 | ev_msg.dv[0].data_length = data_length; | |
4279 | ev_msg.dv[0].data_ptr = data; | |
4280 | ev_msg.dv[1].data_length = 0; | |
4281 | ||
4282 | ret = kev_post_msg(&ev_msg); | |
4283 | if (ret) { | |
4284 | printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); | |
316670eb | 4285 | } |
39236c6e | 4286 | |
fe8ab488 | 4287 | return ret; |
316670eb A |
4288 | } |
4289 | ||
fe8ab488 A |
4290 | boolean_t |
4291 | memorystatus_warn_process(pid_t pid, boolean_t critical) { | |
316670eb | 4292 | |
fe8ab488 | 4293 | boolean_t ret = FALSE; |
3e170ce0 | 4294 | boolean_t found_knote = FALSE; |
fe8ab488 | 4295 | struct knote *kn = NULL; |
316670eb | 4296 | |
fe8ab488 A |
4297 | /* |
4298 | * See comment in sysctl_memorystatus_vm_pressure_send. | |
4299 | */ | |
39236c6e | 4300 | |
fe8ab488 | 4301 | memorystatus_klist_lock(); |
3e170ce0 A |
4302 | |
4303 | SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { | |
4304 | proc_t knote_proc = kn->kn_kq->kq_p; | |
4305 | pid_t knote_pid = knote_proc->p_pid; | |
4306 | ||
4307 | if (knote_pid == pid) { | |
4308 | /* | |
4309 | * By setting the "fflags" here, we are forcing | |
4310 | * a process to deal with the case where it's | |
4311 | * bumping up into its memory limits. If we don't | |
4312 | * do this here, we will end up depending on the | |
4313 | * system pressure snapshot evaluation in | |
4314 | * filt_memorystatus(). | |
4315 | */ | |
39236c6e | 4316 | |
3e170ce0 A |
4317 | if (critical) { |
4318 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { | |
4319 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; | |
4320 | } else if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { | |
4321 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
4322 | } | |
4323 | } else { | |
4324 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { | |
4325 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
4326 | } | |
4327 | } | |
4328 | ||
4329 | found_knote = TRUE; | |
39236c6e | 4330 | } |
3e170ce0 A |
4331 | } |
4332 | ||
4333 | if (found_knote) { | |
4334 | KNOTE(&memorystatus_klist, 0); | |
4335 | ret = TRUE; | |
fe8ab488 A |
4336 | } else { |
4337 | if (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0) { | |
4338 | ret = TRUE; | |
6d2010ae A |
4339 | } |
4340 | } | |
3e170ce0 | 4341 | |
fe8ab488 | 4342 | memorystatus_klist_unlock(); |
6d2010ae | 4343 | |
fe8ab488 | 4344 | return ret; |
316670eb A |
4345 | } |
4346 | ||
3e170ce0 A |
4347 | /* |
4348 | * Can only be set by the current task on itself. | |
4349 | */ | |
4350 | int | |
4351 | memorystatus_low_mem_privileged_listener(uint32_t op_flags) | |
4352 | { | |
4353 | boolean_t set_privilege = FALSE; | |
4354 | /* | |
4355 | * Need an entitlement check here? | |
4356 | */ | |
4357 | if (op_flags == MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE) { | |
4358 | set_privilege = TRUE; | |
4359 | } else if (op_flags == MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE) { | |
4360 | set_privilege = FALSE; | |
4361 | } else { | |
4362 | return EINVAL; | |
4363 | } | |
4364 | ||
4365 | return (task_low_mem_privileged_listener(current_task(), set_privilege, NULL)); | |
4366 | } | |
4367 | ||
39236c6e | 4368 | int |
316670eb | 4369 | memorystatus_send_pressure_note(pid_t pid) { |
39236c6e A |
4370 | MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid); |
4371 | return memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid)); | |
6d2010ae A |
4372 | } |
4373 | ||
fe8ab488 A |
4374 | void |
4375 | memorystatus_send_low_swap_note(void) { | |
4376 | ||
4377 | struct knote *kn = NULL; | |
3e170ce0 | 4378 | |
fe8ab488 A |
4379 | memorystatus_klist_lock(); |
4380 | SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { | |
3e170ce0 A |
4381 | /* We call is_knote_registered_modify_task_pressure_bits to check if the sfflags for the |
4382 | * current note contain NOTE_MEMORYSTATUS_LOW_SWAP. Once we find one note in the memorystatus_klist | |
4383 | * that has the NOTE_MEMORYSTATUS_LOW_SWAP flags in its sfflags set, we call KNOTE with | |
4384 | * kMemoryStatusLowSwap as the hint to process and update all knotes on the memorystatus_klist accordingly. */ | |
fe8ab488 | 4385 | if (is_knote_registered_modify_task_pressure_bits(kn, NOTE_MEMORYSTATUS_LOW_SWAP, NULL, 0, 0) == TRUE) { |
3e170ce0 A |
4386 | KNOTE(&memorystatus_klist, kMemorystatusLowSwap); |
4387 | break; | |
fe8ab488 A |
4388 | } |
4389 | } | |
3e170ce0 | 4390 | |
fe8ab488 A |
4391 | memorystatus_klist_unlock(); |
4392 | } | |
4393 | ||
39236c6e A |
4394 | boolean_t |
4395 | memorystatus_bg_pressure_eligible(proc_t p) { | |
4396 | boolean_t eligible = FALSE; | |
4397 | ||
4398 | proc_list_lock(); | |
4399 | ||
4400 | MEMORYSTATUS_DEBUG(1, "memorystatus_bg_pressure_eligible: pid %d, state 0x%x\n", p->p_pid, p->p_memstat_state); | |
4401 | ||
4402 | /* Foreground processes have already been dealt with at this point, so just test for eligibility */ | |
4403 | if (!(p->p_memstat_state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN))) { | |
4404 | eligible = TRUE; | |
4405 | } | |
4406 | ||
4407 | proc_list_unlock(); | |
4408 | ||
4409 | return eligible; | |
4410 | } | |
4411 | ||
4412 | boolean_t | |
4413 | memorystatus_is_foreground_locked(proc_t p) { | |
4414 | return ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND) || | |
4415 | (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND_SUPPORT)); | |
4416 | } | |
fe8ab488 | 4417 | #endif /* CONFIG_MEMORYSTATUS */ |
39236c6e A |
4418 | |
4419 | /* | |
4420 | * Trigger levels to test the mechanism. | |
4421 | * Can be used via a sysctl. | |
4422 | */ | |
4423 | #define TEST_LOW_MEMORY_TRIGGER_ONE 1 | |
4424 | #define TEST_LOW_MEMORY_TRIGGER_ALL 2 | |
4425 | #define TEST_PURGEABLE_TRIGGER_ONE 3 | |
4426 | #define TEST_PURGEABLE_TRIGGER_ALL 4 | |
4427 | #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE 5 | |
4428 | #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL 6 | |
4429 | ||
4430 | boolean_t memorystatus_manual_testing_on = FALSE; | |
4431 | vm_pressure_level_t memorystatus_manual_testing_level = kVMPressureNormal; | |
4432 | ||
4433 | extern struct knote * | |
fe8ab488 | 4434 | vm_pressure_select_optimal_candidate_to_notify(struct klist *, int, boolean_t); |
39236c6e A |
4435 | |
4436 | extern | |
fe8ab488 | 4437 | kern_return_t vm_pressure_notification_without_levels(boolean_t); |
39236c6e A |
4438 | |
4439 | extern void vm_pressure_klist_lock(void); | |
4440 | extern void vm_pressure_klist_unlock(void); | |
4441 | ||
4442 | extern void vm_reset_active_list(void); | |
4443 | ||
4444 | extern void delay(int); | |
4445 | ||
4446 | #define INTER_NOTIFICATION_DELAY (250000) /* .25 second */ | |
4447 | ||
4448 | void memorystatus_on_pageout_scan_end(void) { | |
4449 | /* No-op */ | |
4450 | } | |
4451 | ||
4452 | /* | |
4453 | * kn_max - knote | |
4454 | * | |
4455 | * knote_pressure_level - to check if the knote is registered for this notification level. | |
4456 | * | |
4457 | * task - task whose bits we'll be modifying | |
4458 | * | |
4459 | * pressure_level_to_clear - if the task has been notified of this past level, clear that notification bit so that if/when we revert to that level, the task will be notified again. | |
4460 | * | |
4461 | * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately. | |
4462 | * | |
4463 | */ | |
39236c6e A |
4464 | |
4465 | boolean_t | |
4466 | is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pressure_level, task_t task, vm_pressure_level_t pressure_level_to_clear, vm_pressure_level_t pressure_level_to_set) | |
4467 | { | |
4468 | if (kn_max->kn_sfflags & knote_pressure_level) { | |
4469 | ||
4470 | if (task_has_been_notified(task, pressure_level_to_clear) == TRUE) { | |
4471 | ||
4472 | task_clear_has_been_notified(task, pressure_level_to_clear); | |
4473 | } | |
4474 | ||
4475 | task_mark_has_been_notified(task, pressure_level_to_set); | |
4476 | return TRUE; | |
4477 | } | |
4478 | ||
4479 | return FALSE; | |
4480 | } | |
4481 | ||
fe8ab488 A |
4482 | extern kern_return_t vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process); |
4483 | ||
4484 | #define VM_PRESSURE_DECREASED_SMOOTHING_PERIOD 5000 /* milliseconds */ | |
39236c6e A |
4485 | |
4486 | kern_return_t | |
fe8ab488 | 4487 | memorystatus_update_vm_pressure(boolean_t target_foreground_process) |
39236c6e A |
4488 | { |
4489 | struct knote *kn_max = NULL; | |
3e170ce0 | 4490 | struct knote *kn_cur = NULL, *kn_temp = NULL; /* for safe list traversal */ |
39236c6e A |
4491 | pid_t target_pid = -1; |
4492 | struct klist dispatch_klist = { NULL }; | |
4493 | proc_t target_proc = PROC_NULL; | |
39236c6e A |
4494 | struct task *task = NULL; |
4495 | boolean_t found_candidate = FALSE; | |
4496 | ||
fe8ab488 A |
4497 | static vm_pressure_level_t level_snapshot = kVMPressureNormal; |
4498 | static vm_pressure_level_t prev_level_snapshot = kVMPressureNormal; | |
4499 | boolean_t smoothing_window_started = FALSE; | |
4500 | struct timeval smoothing_window_start_tstamp = {0, 0}; | |
4501 | struct timeval curr_tstamp = {0, 0}; | |
4502 | int elapsed_msecs = 0; | |
4503 | ||
4504 | #if !CONFIG_JETSAM | |
4505 | #define MAX_IDLE_KILLS 100 /* limit the number of idle kills allowed */ | |
4506 | ||
4507 | int idle_kill_counter = 0; | |
4508 | ||
4509 | /* | |
4510 | * On desktop we take this opportunity to free up memory pressure | |
4511 | * by immediately killing idle exitable processes. We use a delay | |
4512 | * to avoid overkill. And we impose a max counter as a fail safe | |
4513 | * in case daemons re-launch too fast. | |
4514 | */ | |
4515 | while ((memorystatus_vm_pressure_level != kVMPressureNormal) && (idle_kill_counter < MAX_IDLE_KILLS)) { | |
4516 | if (memorystatus_idle_exit_from_VM() == FALSE) { | |
4517 | /* No idle exitable processes left to kill */ | |
4518 | break; | |
4519 | } | |
4520 | idle_kill_counter++; | |
3e170ce0 A |
4521 | |
4522 | if (memorystatus_manual_testing_on == TRUE) { | |
4523 | /* | |
4524 | * Skip the delay when testing | |
4525 | * the pressure notification scheme. | |
4526 | */ | |
4527 | } else { | |
4528 | delay(1000000); /* 1 second */ | |
4529 | } | |
fe8ab488 A |
4530 | } |
4531 | #endif /* !CONFIG_JETSAM */ | |
4532 | ||
39236c6e A |
4533 | while (1) { |
4534 | ||
4535 | /* | |
4536 | * There is a race window here. But it's not clear | |
4537 | * how much we benefit from having extra synchronization. | |
4538 | */ | |
4539 | level_snapshot = memorystatus_vm_pressure_level; | |
4540 | ||
fe8ab488 A |
4541 | if (prev_level_snapshot > level_snapshot) { |
4542 | /* | |
4543 | * Pressure decreased? Let's take a little breather | |
4544 | * and see if this condition stays. | |
4545 | */ | |
4546 | if (smoothing_window_started == FALSE) { | |
4547 | ||
4548 | smoothing_window_started = TRUE; | |
4549 | microuptime(&smoothing_window_start_tstamp); | |
4550 | } | |
4551 | ||
4552 | microuptime(&curr_tstamp); | |
4553 | timevalsub(&curr_tstamp, &smoothing_window_start_tstamp); | |
4554 | elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; | |
4555 | ||
4556 | if (elapsed_msecs < VM_PRESSURE_DECREASED_SMOOTHING_PERIOD) { | |
4557 | ||
4558 | delay(INTER_NOTIFICATION_DELAY); | |
4559 | continue; | |
4560 | } | |
4561 | } | |
4562 | ||
4563 | prev_level_snapshot = level_snapshot; | |
4564 | smoothing_window_started = FALSE; | |
4565 | ||
39236c6e | 4566 | memorystatus_klist_lock(); |
fe8ab488 | 4567 | kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot, target_foreground_process); |
39236c6e A |
4568 | |
4569 | if (kn_max == NULL) { | |
4570 | memorystatus_klist_unlock(); | |
4571 | ||
4572 | /* | |
4573 | * No more level-based clients to notify. | |
4574 | * Try the non-level based notification clients. | |
4575 | * | |
4576 | * However, these non-level clients don't understand | |
4577 | * the "return-to-normal" notification. | |
4578 | * | |
4579 | * So don't consider them for those notifications. Just | |
4580 | * return instead. | |
4581 | * | |
4582 | */ | |
4583 | ||
4584 | if (level_snapshot != kVMPressureNormal) { | |
4585 | goto try_dispatch_vm_clients; | |
4586 | } else { | |
4587 | return KERN_FAILURE; | |
4588 | } | |
4589 | } | |
4590 | ||
4591 | target_proc = kn_max->kn_kq->kq_p; | |
4592 | ||
4593 | proc_list_lock(); | |
4594 | if (target_proc != proc_ref_locked(target_proc)) { | |
4595 | target_proc = PROC_NULL; | |
4596 | proc_list_unlock(); | |
4597 | memorystatus_klist_unlock(); | |
4598 | continue; | |
4599 | } | |
4600 | proc_list_unlock(); | |
39236c6e A |
4601 | |
4602 | target_pid = target_proc->p_pid; | |
4603 | ||
4604 | task = (struct task *)(target_proc->task); | |
4605 | ||
4606 | if (level_snapshot != kVMPressureNormal) { | |
4607 | ||
4608 | if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) { | |
4609 | ||
4610 | if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_WARN, task, kVMPressureCritical, kVMPressureWarning) == TRUE) { | |
4611 | found_candidate = TRUE; | |
4612 | } | |
4613 | } else { | |
4614 | if (level_snapshot == kVMPressureCritical) { | |
4615 | ||
4616 | if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_CRITICAL, task, kVMPressureWarning, kVMPressureCritical) == TRUE) { | |
4617 | found_candidate = TRUE; | |
4618 | } | |
4619 | } | |
4620 | } | |
4621 | } else { | |
4622 | if (kn_max->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
4623 | ||
4624 | task_clear_has_been_notified(task, kVMPressureWarning); | |
4625 | task_clear_has_been_notified(task, kVMPressureCritical); | |
4626 | ||
4627 | found_candidate = TRUE; | |
6d2010ae A |
4628 | } |
4629 | } | |
39236c6e A |
4630 | |
4631 | if (found_candidate == FALSE) { | |
3e170ce0 A |
4632 | proc_rele(target_proc); |
4633 | memorystatus_klist_unlock(); | |
39236c6e A |
4634 | continue; |
4635 | } | |
4636 | ||
3e170ce0 A |
4637 | SLIST_FOREACH_SAFE(kn_cur, &memorystatus_klist, kn_selnext, kn_temp) { |
4638 | proc_t knote_proc = kn_cur->kn_kq->kq_p; | |
4639 | pid_t knote_pid = knote_proc->p_pid; | |
4640 | if (knote_pid == target_pid) { | |
4641 | KNOTE_DETACH(&memorystatus_klist, kn_cur); | |
4642 | KNOTE_ATTACH(&dispatch_klist, kn_cur); | |
4643 | } | |
4644 | } | |
39236c6e A |
4645 | |
4646 | KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure); | |
4647 | ||
3e170ce0 A |
4648 | SLIST_FOREACH_SAFE(kn_cur, &dispatch_klist, kn_selnext, kn_temp) { |
4649 | KNOTE_DETACH(&dispatch_klist, kn_cur); | |
4650 | KNOTE_ATTACH(&memorystatus_klist, kn_cur); | |
4651 | } | |
4652 | ||
39236c6e A |
4653 | memorystatus_klist_unlock(); |
4654 | ||
4655 | microuptime(&target_proc->vm_pressure_last_notify_tstamp); | |
4656 | proc_rele(target_proc); | |
4657 | ||
fe8ab488 | 4658 | if (memorystatus_manual_testing_on == TRUE && target_foreground_process == TRUE) { |
39236c6e A |
4659 | break; |
4660 | } | |
4661 | ||
4662 | try_dispatch_vm_clients: | |
fe8ab488 A |
4663 | if (kn_max == NULL && level_snapshot != kVMPressureNormal) { |
4664 | /* | |
4665 | * We will exit this loop when we are done with | |
4666 | * notification clients (level and non-level based). | |
39236c6e | 4667 | */ |
fe8ab488 | 4668 | if ((vm_pressure_notify_dispatch_vm_clients(target_foreground_process) == KERN_FAILURE) && (kn_max == NULL)) { |
39236c6e A |
4669 | /* |
4670 | * kn_max == NULL i.e. we didn't find any eligible clients for the level-based notifications | |
4671 | * AND | |
4672 | * we have failed to find any eligible clients for the non-level based notifications too. | |
4673 | * So, we are done. | |
4674 | */ | |
4675 | ||
4676 | return KERN_FAILURE; | |
4677 | } | |
4678 | } | |
4679 | ||
fe8ab488 A |
4680 | /* |
4681 | * LD: This block of code below used to be invoked in the older memory notification scheme on embedded everytime | |
4682 | * a process was sent a memory pressure notification. The "memorystatus_klist" list was used to hold these | |
4683 | * privileged listeners. But now we have moved to the newer scheme and are trying to move away from the extra | |
4684 | * notifications. So the code is here in case we break compat. and need to send out notifications to the privileged | |
4685 | * apps. | |
4686 | */ | |
4687 | #if 0 | |
4688 | #endif /* 0 */ | |
4689 | ||
4690 | if (memorystatus_manual_testing_on == TRUE) { | |
4691 | /* | |
4692 | * Testing out the pressure notification scheme. | |
4693 | * No need for delays etc. | |
4694 | */ | |
4695 | } else { | |
4696 | ||
4697 | uint32_t sleep_interval = INTER_NOTIFICATION_DELAY; | |
4698 | #if CONFIG_JETSAM | |
4699 | unsigned int page_delta = 0; | |
4700 | unsigned int skip_delay_page_threshold = 0; | |
4701 | ||
4702 | assert(memorystatus_available_pages_pressure >= memorystatus_available_pages_critical_base); | |
4703 | ||
4704 | page_delta = (memorystatus_available_pages_pressure - memorystatus_available_pages_critical_base) / 2; | |
4705 | skip_delay_page_threshold = memorystatus_available_pages_pressure - page_delta; | |
4706 | ||
4707 | if (memorystatus_available_pages <= skip_delay_page_threshold) { | |
4708 | /* | |
4709 | * We are nearing the critcal mark fast and can't afford to wait between | |
4710 | * notifications. | |
4711 | */ | |
4712 | sleep_interval = 0; | |
4713 | } | |
4714 | #endif /* CONFIG_JETSAM */ | |
4715 | ||
4716 | if (sleep_interval) { | |
4717 | delay(sleep_interval); | |
4718 | } | |
39236c6e | 4719 | } |
6d2010ae | 4720 | } |
39236c6e A |
4721 | |
4722 | return KERN_SUCCESS; | |
6d2010ae A |
4723 | } |
4724 | ||
39236c6e A |
4725 | vm_pressure_level_t |
4726 | convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); | |
4727 | ||
4728 | vm_pressure_level_t | |
4729 | convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t internal_pressure_level) | |
4730 | { | |
4731 | vm_pressure_level_t dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; | |
4732 | ||
4733 | switch (internal_pressure_level) { | |
4734 | ||
4735 | case kVMPressureNormal: | |
4736 | { | |
4737 | dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; | |
4738 | break; | |
4739 | } | |
4740 | ||
4741 | case kVMPressureWarning: | |
4742 | case kVMPressureUrgent: | |
4743 | { | |
4744 | dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
4745 | break; | |
4746 | } | |
4747 | ||
4748 | case kVMPressureCritical: | |
4749 | { | |
4750 | dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; | |
4751 | break; | |
4752 | } | |
4753 | ||
4754 | default: | |
4755 | break; | |
4756 | } | |
316670eb | 4757 | |
39236c6e A |
4758 | return dispatch_level; |
4759 | } | |
6d2010ae | 4760 | |
b0d623f7 | 4761 | static int |
39236c6e | 4762 | sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS |
b0d623f7 | 4763 | { |
39236c6e | 4764 | #pragma unused(arg1, arg2, oidp) |
39236c6e A |
4765 | vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level); |
4766 | ||
4767 | return SYSCTL_OUT(req, &dispatch_level, sizeof(dispatch_level)); | |
4768 | } | |
4769 | ||
fe8ab488 A |
4770 | #if DEBUG || DEVELOPMENT |
4771 | ||
39236c6e A |
4772 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED, |
4773 | 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); | |
4774 | ||
fe8ab488 A |
4775 | #else /* DEBUG || DEVELOPMENT */ |
4776 | ||
4777 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED, | |
4778 | 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); | |
4779 | ||
4780 | #endif /* DEBUG || DEVELOPMENT */ | |
b0d623f7 | 4781 | |
39236c6e A |
4782 | extern int memorystatus_purge_on_warning; |
4783 | extern int memorystatus_purge_on_critical; | |
4784 | ||
4785 | static int | |
4786 | sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS | |
4787 | { | |
4788 | #pragma unused(arg1, arg2) | |
b0d623f7 | 4789 | |
39236c6e A |
4790 | int level = 0; |
4791 | int error = 0; | |
4792 | int pressure_level = 0; | |
4793 | int trigger_request = 0; | |
4794 | int force_purge; | |
4795 | ||
4796 | error = sysctl_handle_int(oidp, &level, 0, req); | |
4797 | if (error || !req->newptr) { | |
4798 | return (error); | |
4799 | } | |
4800 | ||
4801 | memorystatus_manual_testing_on = TRUE; | |
4802 | ||
4803 | trigger_request = (level >> 16) & 0xFFFF; | |
4804 | pressure_level = (level & 0xFFFF); | |
4805 | ||
4806 | if (trigger_request < TEST_LOW_MEMORY_TRIGGER_ONE || | |
4807 | trigger_request > TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL) { | |
4808 | return EINVAL; | |
4809 | } | |
4810 | switch (pressure_level) { | |
4811 | case NOTE_MEMORYSTATUS_PRESSURE_NORMAL: | |
4812 | case NOTE_MEMORYSTATUS_PRESSURE_WARN: | |
4813 | case NOTE_MEMORYSTATUS_PRESSURE_CRITICAL: | |
4814 | break; | |
4815 | default: | |
b0d623f7 A |
4816 | return EINVAL; |
4817 | } | |
b0d623f7 | 4818 | |
39236c6e A |
4819 | /* |
4820 | * The pressure level is being set from user-space. | |
4821 | * And user-space uses the constants in sys/event.h | |
4822 | * So we translate those events to our internal levels here. | |
4823 | */ | |
4824 | if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
4825 | ||
4826 | memorystatus_manual_testing_level = kVMPressureNormal; | |
4827 | force_purge = 0; | |
4828 | ||
4829 | } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_WARN) { | |
4830 | ||
4831 | memorystatus_manual_testing_level = kVMPressureWarning; | |
4832 | force_purge = memorystatus_purge_on_warning; | |
4833 | ||
4834 | } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { | |
4835 | ||
4836 | memorystatus_manual_testing_level = kVMPressureCritical; | |
4837 | force_purge = memorystatus_purge_on_critical; | |
b0d623f7 A |
4838 | } |
4839 | ||
39236c6e | 4840 | memorystatus_vm_pressure_level = memorystatus_manual_testing_level; |
316670eb | 4841 | |
39236c6e A |
4842 | /* purge according to the new pressure level */ |
4843 | switch (trigger_request) { | |
4844 | case TEST_PURGEABLE_TRIGGER_ONE: | |
4845 | case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE: | |
4846 | if (force_purge == 0) { | |
4847 | /* no purging requested */ | |
4848 | break; | |
4849 | } | |
4850 | vm_purgeable_object_purge_one_unlocked(force_purge); | |
4851 | break; | |
4852 | case TEST_PURGEABLE_TRIGGER_ALL: | |
4853 | case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL: | |
4854 | if (force_purge == 0) { | |
4855 | /* no purging requested */ | |
4856 | break; | |
4857 | } | |
4858 | while (vm_purgeable_object_purge_one_unlocked(force_purge)); | |
4859 | break; | |
4860 | } | |
4861 | ||
4862 | if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ONE) || | |
4863 | (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE)) { | |
4864 | ||
4865 | memorystatus_update_vm_pressure(TRUE); | |
4866 | } | |
4867 | ||
4868 | if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ALL) || | |
4869 | (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL)) { | |
4870 | ||
4871 | while (memorystatus_update_vm_pressure(FALSE) == KERN_SUCCESS) { | |
4872 | continue; | |
4873 | } | |
4874 | } | |
4875 | ||
4876 | if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
4877 | memorystatus_manual_testing_on = FALSE; | |
4878 | ||
4879 | vm_pressure_klist_lock(); | |
4880 | vm_reset_active_list(); | |
4881 | vm_pressure_klist_unlock(); | |
4882 | } else { | |
4883 | ||
4884 | vm_pressure_klist_lock(); | |
fe8ab488 | 4885 | vm_pressure_notification_without_levels(FALSE); |
39236c6e A |
4886 | vm_pressure_klist_unlock(); |
4887 | } | |
4888 | ||
4889 | return 0; | |
b0d623f7 A |
4890 | } |
4891 | ||
39236c6e A |
4892 | SYSCTL_PROC(_kern, OID_AUTO, memorypressure_manual_trigger, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, |
4893 | 0, 0, &sysctl_memorypressure_manual_trigger, "I", ""); | |
4894 | ||
4895 | ||
4896 | extern int memorystatus_purge_on_warning; | |
4897 | extern int memorystatus_purge_on_urgent; | |
4898 | extern int memorystatus_purge_on_critical; | |
4899 | ||
4900 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_warning, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_warning, 0, ""); | |
4901 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_urgent, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_urgent, 0, ""); | |
4902 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_critical, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_critical, 0, ""); | |
4903 | ||
4904 | ||
fe8ab488 | 4905 | #endif /* VM_PRESSURE_EVENTS */ |
39236c6e A |
4906 | |
4907 | /* Return both allocated and actual size, since there's a race between allocation and list compilation */ | |
b0d623f7 | 4908 | static int |
39236c6e | 4909 | memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t *buffer_size, size_t *list_size, boolean_t size_only) |
b0d623f7 | 4910 | { |
316670eb | 4911 | uint32_t list_count, i = 0; |
39236c6e A |
4912 | memorystatus_priority_entry_t *list_entry; |
4913 | proc_t p; | |
4914 | ||
316670eb | 4915 | list_count = memorystatus_list_count; |
39236c6e A |
4916 | *list_size = sizeof(memorystatus_priority_entry_t) * list_count; |
4917 | ||
4918 | /* Just a size check? */ | |
4919 | if (size_only) { | |
4920 | return 0; | |
4921 | } | |
4922 | ||
4923 | /* Otherwise, validate the size of the buffer */ | |
4924 | if (*buffer_size < *list_size) { | |
4925 | return EINVAL; | |
4926 | } | |
4927 | ||
4928 | *list_ptr = (memorystatus_priority_entry_t*)kalloc(*list_size); | |
4929 | if (!list_ptr) { | |
316670eb A |
4930 | return ENOMEM; |
4931 | } | |
4932 | ||
39236c6e A |
4933 | memset(*list_ptr, 0, *list_size); |
4934 | ||
4935 | *buffer_size = *list_size; | |
4936 | *list_size = 0; | |
4937 | ||
4938 | list_entry = *list_ptr; | |
4939 | ||
4940 | proc_list_lock(); | |
4941 | ||
4942 | p = memorystatus_get_first_proc_locked(&i, TRUE); | |
4943 | while (p && (*list_size < *buffer_size)) { | |
4944 | list_entry->pid = p->p_pid; | |
4945 | list_entry->priority = p->p_memstat_effectivepriority; | |
4946 | list_entry->user_data = p->p_memstat_userdata; | |
4947 | #if LEGACY_HIWATER | |
3e170ce0 A |
4948 | |
4949 | /* | |
4950 | * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore. | |
4951 | * Background limits are described via the inactive limit slots. | |
4952 | * So, here, the cached limit should always be valid. | |
4953 | */ | |
4954 | ||
4955 | if (p->p_memstat_memlimit <= 0) { | |
4956 | task_get_phys_footprint_limit(p->task, &list_entry->limit); | |
4957 | } else { | |
4958 | list_entry->limit = p->p_memstat_memlimit; | |
4959 | } | |
39236c6e A |
4960 | #else |
4961 | task_get_phys_footprint_limit(p->task, &list_entry->limit); | |
4962 | #endif | |
4963 | list_entry->state = memorystatus_build_state(p); | |
4964 | list_entry++; | |
4965 | ||
4966 | *list_size += sizeof(memorystatus_priority_entry_t); | |
4967 | ||
4968 | p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
316670eb A |
4969 | } |
4970 | ||
39236c6e | 4971 | proc_list_unlock(); |
316670eb | 4972 | |
39236c6e | 4973 | MEMORYSTATUS_DEBUG(1, "memorystatus_get_priority_list: returning %lu for size\n", (unsigned long)*list_size); |
316670eb | 4974 | |
39236c6e A |
4975 | return 0; |
4976 | } | |
b0d623f7 | 4977 | |
39236c6e A |
4978 | static int |
4979 | memorystatus_cmd_get_priority_list(user_addr_t buffer, size_t buffer_size, int32_t *retval) { | |
4980 | int error = EINVAL; | |
4981 | boolean_t size_only; | |
4982 | memorystatus_priority_entry_t *list = NULL; | |
4983 | size_t list_size; | |
316670eb | 4984 | |
39236c6e A |
4985 | size_only = ((buffer == USER_ADDR_NULL) ? TRUE: FALSE); |
4986 | ||
4987 | error = memorystatus_get_priority_list(&list, &buffer_size, &list_size, size_only); | |
4988 | if (error) { | |
4989 | goto out; | |
4990 | } | |
4991 | ||
4992 | if (!size_only) { | |
4993 | error = copyout(list, buffer, list_size); | |
4994 | } | |
4995 | ||
4996 | if (error == 0) { | |
4997 | *retval = list_size; | |
4998 | } | |
4999 | out: | |
5000 | ||
5001 | if (list) { | |
5002 | kfree(list, buffer_size); | |
5003 | } | |
5004 | ||
5005 | return error; | |
316670eb | 5006 | } |
b0d623f7 | 5007 | |
39236c6e A |
5008 | #if CONFIG_JETSAM |
5009 | ||
5010 | static void | |
5011 | memorystatus_clear_errors(void) | |
5012 | { | |
5013 | proc_t p; | |
5014 | unsigned int i = 0; | |
5015 | ||
5016 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_START, 0, 0, 0, 0, 0); | |
5017 | ||
5018 | proc_list_lock(); | |
5019 | ||
5020 | p = memorystatus_get_first_proc_locked(&i, TRUE); | |
5021 | while (p) { | |
5022 | if (p->p_memstat_state & P_MEMSTAT_ERROR) { | |
5023 | p->p_memstat_state &= ~P_MEMSTAT_ERROR; | |
5024 | } | |
5025 | p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
5026 | } | |
5027 | ||
5028 | proc_list_unlock(); | |
5029 | ||
5030 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_END, 0, 0, 0, 0, 0); | |
5031 | } | |
b0d623f7 | 5032 | |
316670eb | 5033 | static void |
39236c6e | 5034 | memorystatus_update_levels_locked(boolean_t critical_only) { |
fe8ab488 | 5035 | |
39236c6e | 5036 | memorystatus_available_pages_critical = memorystatus_available_pages_critical_base; |
fe8ab488 A |
5037 | |
5038 | /* | |
5039 | * If there's an entry in the first bucket, we have idle processes. | |
5040 | */ | |
5041 | memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; | |
5042 | if (first_bucket->count) { | |
5043 | memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset; | |
5044 | ||
5045 | if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure ) { | |
5046 | /* | |
5047 | * The critical threshold must never exceed the pressure threshold | |
5048 | */ | |
5049 | memorystatus_available_pages_critical = memorystatus_available_pages_pressure; | |
39236c6e A |
5050 | } |
5051 | } | |
fe8ab488 | 5052 | |
316670eb A |
5053 | #if DEBUG || DEVELOPMENT |
5054 | if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { | |
5055 | memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_diagnostic; | |
fe8ab488 A |
5056 | |
5057 | if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure ) { | |
5058 | /* | |
5059 | * The critical threshold must never exceed the pressure threshold | |
5060 | */ | |
5061 | memorystatus_available_pages_critical = memorystatus_available_pages_pressure; | |
5062 | } | |
39236c6e A |
5063 | } |
5064 | #endif | |
5065 | ||
5066 | if (critical_only) { | |
5067 | return; | |
5068 | } | |
5069 | ||
316670eb | 5070 | #if VM_PRESSURE_EVENTS |
39236c6e A |
5071 | memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta; |
5072 | #if DEBUG || DEVELOPMENT | |
5073 | if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { | |
316670eb | 5074 | memorystatus_available_pages_pressure += memorystatus_jetsam_policy_offset_pages_diagnostic; |
316670eb A |
5075 | } |
5076 | #endif | |
39236c6e A |
5077 | #endif |
5078 | } | |
5079 | ||
3e170ce0 A |
5080 | /* |
5081 | * Get the at_boot snapshot | |
5082 | */ | |
39236c6e | 5083 | static int |
3e170ce0 | 5084 | memorystatus_get_at_boot_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { |
39236c6e | 5085 | size_t input_size = *snapshot_size; |
3e170ce0 A |
5086 | |
5087 | /* | |
5088 | * The at_boot snapshot has no entry list. | |
5089 | */ | |
5090 | *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t); | |
5091 | ||
5092 | if (size_only) { | |
5093 | return 0; | |
5094 | } | |
5095 | ||
5096 | /* | |
5097 | * Validate the size of the snapshot buffer | |
5098 | */ | |
5099 | if (input_size < *snapshot_size) { | |
5100 | return EINVAL; | |
5101 | } | |
5102 | ||
5103 | /* | |
5104 | * Update the notification_time only | |
5105 | */ | |
5106 | memorystatus_at_boot_snapshot.notification_time = mach_absolute_time(); | |
5107 | *snapshot = &memorystatus_at_boot_snapshot; | |
5108 | ||
5109 | MEMORYSTATUS_DEBUG(7, "memorystatus_get_at_boot_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%d)\n", | |
5110 | (long)input_size, (long)*snapshot_size, 0); | |
5111 | return 0; | |
5112 | } | |
5113 | ||
5114 | static int | |
5115 | memorystatus_get_on_demand_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { | |
5116 | size_t input_size = *snapshot_size; | |
5117 | uint32_t ods_list_count = memorystatus_list_count; | |
5118 | memorystatus_jetsam_snapshot_t *ods = NULL; /* The on_demand snapshot buffer */ | |
5119 | ||
5120 | *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (ods_list_count)); | |
5121 | ||
5122 | if (size_only) { | |
5123 | return 0; | |
5124 | } | |
5125 | ||
5126 | /* | |
5127 | * Validate the size of the snapshot buffer. | |
5128 | * This is inherently racey. May want to revisit | |
5129 | * this error condition and trim the output when | |
5130 | * it doesn't fit. | |
5131 | */ | |
5132 | if (input_size < *snapshot_size) { | |
5133 | return EINVAL; | |
5134 | } | |
5135 | ||
5136 | /* | |
5137 | * Allocate and initialize a snapshot buffer. | |
5138 | */ | |
5139 | ods = (memorystatus_jetsam_snapshot_t *)kalloc(*snapshot_size); | |
5140 | if (!ods) { | |
5141 | return (ENOMEM); | |
5142 | } | |
5143 | ||
5144 | memset(ods, 0, *snapshot_size); | |
5145 | ||
5146 | proc_list_lock(); | |
5147 | memorystatus_init_jetsam_snapshot_locked(ods, ods_list_count); | |
5148 | proc_list_unlock(); | |
5149 | ||
5150 | /* | |
5151 | * Return the kernel allocated, on_demand buffer. | |
5152 | * The caller of this routine will copy the data out | |
5153 | * to user space and then free the kernel allocated | |
5154 | * buffer. | |
5155 | */ | |
5156 | *snapshot = ods; | |
5157 | ||
5158 | MEMORYSTATUS_DEBUG(7, "memorystatus_get_on_demand_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n", | |
5159 | (long)input_size, (long)*snapshot_size, (long)ods_list_count); | |
316670eb | 5160 | |
3e170ce0 A |
5161 | return 0; |
5162 | } | |
5163 | ||
5164 | static int | |
5165 | memorystatus_get_jetsam_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { | |
5166 | size_t input_size = *snapshot_size; | |
5167 | ||
39236c6e A |
5168 | if (memorystatus_jetsam_snapshot_count > 0) { |
5169 | *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count)); | |
5170 | } else { | |
5171 | *snapshot_size = 0; | |
5172 | } | |
5173 | ||
5174 | if (size_only) { | |
5175 | return 0; | |
316670eb | 5176 | } |
39236c6e A |
5177 | |
5178 | if (input_size < *snapshot_size) { | |
5179 | return EINVAL; | |
5180 | } | |
5181 | ||
5182 | *snapshot = memorystatus_jetsam_snapshot; | |
3e170ce0 A |
5183 | |
5184 | MEMORYSTATUS_DEBUG(7, "memorystatus_get_jetsam_snapshot: returned inputsize (%ld), snapshot_size(%ld), listcount(%ld)\n", | |
5185 | (long)input_size, (long)*snapshot_size, (long)memorystatus_jetsam_snapshot_count); | |
5186 | ||
39236c6e | 5187 | return 0; |
316670eb A |
5188 | } |
5189 | ||
fe8ab488 | 5190 | |
316670eb | 5191 | static int |
3e170ce0 | 5192 | memorystatus_cmd_get_jetsam_snapshot(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval) { |
39236c6e A |
5193 | int error = EINVAL; |
5194 | boolean_t size_only; | |
3e170ce0 A |
5195 | boolean_t is_default_snapshot = FALSE; |
5196 | boolean_t is_on_demand_snapshot = FALSE; | |
5197 | boolean_t is_at_boot_snapshot = FALSE; | |
39236c6e | 5198 | memorystatus_jetsam_snapshot_t *snapshot; |
3e170ce0 | 5199 | |
39236c6e | 5200 | size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE); |
3e170ce0 A |
5201 | |
5202 | if (flags == 0) { | |
5203 | /* Default */ | |
5204 | is_default_snapshot = TRUE; | |
5205 | error = memorystatus_get_jetsam_snapshot(&snapshot, &buffer_size, size_only); | |
5206 | } else { | |
5207 | if (flags & ~(MEMORYSTATUS_SNAPSHOT_ON_DEMAND | MEMORYSTATUS_SNAPSHOT_AT_BOOT)) { | |
5208 | /* | |
5209 | * Unsupported bit set in flag. | |
5210 | */ | |
5211 | return EINVAL; | |
5212 | } | |
5213 | ||
5214 | if ((flags & (MEMORYSTATUS_SNAPSHOT_ON_DEMAND | MEMORYSTATUS_SNAPSHOT_AT_BOOT)) == | |
5215 | (MEMORYSTATUS_SNAPSHOT_ON_DEMAND | MEMORYSTATUS_SNAPSHOT_AT_BOOT)) { | |
5216 | /* | |
5217 | * Can't have both set at the same time. | |
5218 | */ | |
5219 | return EINVAL; | |
5220 | } | |
5221 | ||
5222 | if (flags & MEMORYSTATUS_SNAPSHOT_ON_DEMAND) { | |
5223 | is_on_demand_snapshot = TRUE; | |
5224 | /* | |
5225 | * When not requesting the size only, the following call will allocate | |
5226 | * an on_demand snapshot buffer, which is freed below. | |
5227 | */ | |
5228 | error = memorystatus_get_on_demand_snapshot(&snapshot, &buffer_size, size_only); | |
5229 | ||
5230 | } else if (flags & MEMORYSTATUS_SNAPSHOT_AT_BOOT) { | |
5231 | is_at_boot_snapshot = TRUE; | |
5232 | error = memorystatus_get_at_boot_snapshot(&snapshot, &buffer_size, size_only); | |
5233 | } else { | |
5234 | /* | |
5235 | * Invalid flag setting. | |
5236 | */ | |
5237 | return EINVAL; | |
5238 | } | |
5239 | } | |
5240 | ||
39236c6e A |
5241 | if (error) { |
5242 | goto out; | |
5243 | } | |
316670eb | 5244 | |
3e170ce0 A |
5245 | /* |
5246 | * Copy the data out to user space and clear the snapshot buffer. | |
5247 | * If working with the jetsam snapshot, | |
5248 | * clearing the buffer means, reset the count. | |
5249 | * If working with an on_demand snapshot | |
5250 | * clearing the buffer means, free it. | |
5251 | * If working with the at_boot snapshot | |
5252 | * there is nothing to clear or update. | |
5253 | */ | |
39236c6e A |
5254 | if (!size_only) { |
5255 | if ((error = copyout(snapshot, buffer, buffer_size)) == 0) { | |
3e170ce0 A |
5256 | if (is_default_snapshot) { |
5257 | /* | |
5258 | * The jetsam snapshot is never freed, its count is simply reset. | |
5259 | */ | |
5260 | snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
5261 | ||
5262 | proc_list_lock(); | |
5263 | memorystatus_jetsam_snapshot_last_timestamp = 0; | |
5264 | proc_list_unlock(); | |
5265 | } | |
5266 | } | |
5267 | ||
5268 | if (is_on_demand_snapshot) { | |
5269 | /* | |
5270 | * The on_demand snapshot is always freed, | |
5271 | * even if the copyout failed. | |
5272 | */ | |
5273 | if(snapshot) { | |
5274 | kfree(snapshot, buffer_size); | |
5275 | } | |
39236c6e A |
5276 | } |
5277 | } | |
316670eb | 5278 | |
39236c6e A |
5279 | if (error == 0) { |
5280 | *retval = buffer_size; | |
5281 | } | |
5282 | out: | |
5283 | return error; | |
5284 | } | |
316670eb | 5285 | |
fe8ab488 A |
5286 | /* |
5287 | * Routine: memorystatus_cmd_grp_set_properties | |
5288 | * Purpose: Update properties for a group of processes. | |
5289 | * | |
5290 | * Supported Properties: | |
5291 | * [priority] | |
5292 | * Move each process out of its effective priority | |
5293 | * band and into a new priority band. | |
5294 | * Maintains relative order from lowest to highest priority. | |
5295 | * In single band, maintains relative order from head to tail. | |
5296 | * | |
5297 | * eg: before [effectivepriority | pid] | |
5298 | * [18 | p101 ] | |
5299 | * [17 | p55, p67, p19 ] | |
5300 | * [12 | p103 p10 ] | |
5301 | * [ 7 | p25 ] | |
5302 | * [ 0 | p71, p82, ] | |
5303 | * | |
5304 | * after [ new band | pid] | |
5305 | * [ xxx | p71, p82, p25, p103, p10, p55, p67, p19, p101] | |
5306 | * | |
5307 | * Returns: 0 on success, else non-zero. | |
5308 | * | |
5309 | * Caveat: We know there is a race window regarding recycled pids. | |
5310 | * A process could be killed before the kernel can act on it here. | |
5311 | * If a pid cannot be found in any of the jetsam priority bands, | |
5312 | * then we simply ignore it. No harm. | |
5313 | * But, if the pid has been recycled then it could be an issue. | |
5314 | * In that scenario, we might move an unsuspecting process to the new | |
5315 | * priority band. It's not clear how the kernel can safeguard | |
5316 | * against this, but it would be an extremely rare case anyway. | |
5317 | * The caller of this api might avoid such race conditions by | |
5318 | * ensuring that the processes passed in the pid list are suspended. | |
5319 | */ | |
5320 | ||
5321 | ||
5322 | /* This internal structure can expand when we add support for more properties */ | |
5323 | typedef struct memorystatus_internal_properties | |
5324 | { | |
5325 | proc_t proc; | |
5326 | int32_t priority; /* see memorytstatus_priority_entry_t : priority */ | |
5327 | } memorystatus_internal_properties_t; | |
5328 | ||
5329 | ||
5330 | static int | |
5331 | memorystatus_cmd_grp_set_properties(int32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { | |
5332 | ||
5333 | #pragma unused (flags) | |
5334 | ||
5335 | /* | |
5336 | * We only handle setting priority | |
5337 | * per process | |
5338 | */ | |
5339 | ||
5340 | int error = 0; | |
5341 | memorystatus_priority_entry_t *entries = NULL; | |
5342 | uint32_t entry_count = 0; | |
5343 | ||
5344 | /* This will be the ordered proc list */ | |
5345 | memorystatus_internal_properties_t *table = NULL; | |
5346 | size_t table_size = 0; | |
5347 | uint32_t table_count = 0; | |
5348 | ||
5349 | uint32_t i = 0; | |
5350 | uint32_t bucket_index = 0; | |
5351 | boolean_t head_insert; | |
5352 | int32_t new_priority; | |
5353 | ||
5354 | proc_t p; | |
5355 | ||
5356 | /* Verify inputs */ | |
5357 | if ((buffer == USER_ADDR_NULL) || (buffer_size == 0) || ((buffer_size % sizeof(memorystatus_priority_entry_t)) != 0)) { | |
5358 | error = EINVAL; | |
5359 | goto out; | |
5360 | } | |
5361 | ||
5362 | entry_count = (buffer_size / sizeof(memorystatus_priority_entry_t)); | |
5363 | if ((entries = (memorystatus_priority_entry_t *)kalloc(buffer_size)) == NULL) { | |
5364 | error = ENOMEM; | |
5365 | goto out; | |
5366 | } | |
5367 | ||
5368 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_START, entry_count, 0, 0, 0, 0); | |
5369 | ||
5370 | if ((error = copyin(buffer, entries, buffer_size)) != 0) { | |
5371 | goto out; | |
5372 | } | |
5373 | ||
5374 | /* Verify sanity of input priorities */ | |
5375 | for (i=0; i < entry_count; i++) { | |
5376 | if (entries[i].priority == -1) { | |
5377 | /* Use as shorthand for default priority */ | |
5378 | entries[i].priority = JETSAM_PRIORITY_DEFAULT; | |
5379 | } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
5380 | /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; | |
5381 | * if requested, adjust to JETSAM_PRIORITY_IDLE. */ | |
5382 | entries[i].priority = JETSAM_PRIORITY_IDLE; | |
5383 | } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_HEAD) { | |
5384 | /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle | |
5385 | * queue */ | |
5386 | /* Deal with this later */ | |
5387 | } else if ((entries[i].priority < 0) || (entries[i].priority >= MEMSTAT_BUCKET_COUNT)) { | |
5388 | /* Sanity check */ | |
5389 | error = EINVAL; | |
5390 | goto out; | |
5391 | } | |
5392 | } | |
5393 | ||
5394 | table_size = sizeof(memorystatus_internal_properties_t) * entry_count; | |
5395 | if ( (table = (memorystatus_internal_properties_t *)kalloc(table_size)) == NULL) { | |
5396 | error = ENOMEM; | |
5397 | goto out; | |
5398 | } | |
5399 | memset(table, 0, table_size); | |
5400 | ||
5401 | ||
5402 | /* | |
5403 | * For each jetsam bucket entry, spin through the input property list. | |
5404 | * When a matching pid is found, populate an adjacent table with the | |
5405 | * appropriate proc pointer and new property values. | |
5406 | * This traversal automatically preserves order from lowest | |
5407 | * to highest priority. | |
5408 | */ | |
5409 | ||
5410 | bucket_index=0; | |
5411 | ||
5412 | proc_list_lock(); | |
5413 | ||
5414 | /* Create the ordered table */ | |
5415 | p = memorystatus_get_first_proc_locked(&bucket_index, TRUE); | |
5416 | while (p && (table_count < entry_count)) { | |
5417 | for (i=0; i < entry_count; i++ ) { | |
5418 | if (p->p_pid == entries[i].pid) { | |
5419 | /* Build the table data */ | |
5420 | table[table_count].proc = p; | |
5421 | table[table_count].priority = entries[i].priority; | |
5422 | table_count++; | |
5423 | break; | |
5424 | } | |
5425 | } | |
5426 | p = memorystatus_get_next_proc_locked(&bucket_index, p, TRUE); | |
5427 | } | |
5428 | ||
5429 | /* We now have ordered list of procs ready to move */ | |
5430 | for (i=0; i < table_count; i++) { | |
5431 | p = table[i].proc; | |
5432 | assert(p != NULL); | |
5433 | ||
5434 | /* Allow head inserts -- but relative order is now */ | |
5435 | if (table[i].priority == JETSAM_PRIORITY_IDLE_HEAD) { | |
5436 | new_priority = JETSAM_PRIORITY_IDLE; | |
5437 | head_insert = true; | |
5438 | } else { | |
5439 | new_priority = table[i].priority; | |
5440 | head_insert = false; | |
5441 | } | |
5442 | ||
5443 | /* Not allowed */ | |
5444 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { | |
5445 | continue; | |
5446 | } | |
5447 | ||
5448 | /* | |
5449 | * Take appropriate steps if moving proc out of the | |
5450 | * JETSAM_PRIORITY_IDLE_DEFERRED band. | |
5451 | */ | |
5452 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
5453 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
5454 | } | |
5455 | ||
5456 | memorystatus_update_priority_locked(p, new_priority, head_insert); | |
5457 | } | |
5458 | ||
5459 | proc_list_unlock(); | |
5460 | ||
5461 | /* | |
5462 | * if (table_count != entry_count) | |
5463 | * then some pids were not found in a jetsam band. | |
5464 | * harmless but interesting... | |
5465 | */ | |
5466 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_END, entry_count, table_count, 0, 0, 0); | |
5467 | ||
5468 | out: | |
5469 | if (entries) | |
5470 | kfree(entries, buffer_size); | |
5471 | if (table) | |
5472 | kfree(table, table_size); | |
5473 | ||
5474 | return (error); | |
5475 | } | |
5476 | ||
5477 | ||
5478 | /* | |
3e170ce0 A |
5479 | * This routine is used to update a process's jetsam priority position and stored user_data. |
5480 | * It is not used for the setting of memory limits, which is why the last 6 args to the | |
5481 | * memorystatus_update() call are 0 or FALSE. | |
fe8ab488 A |
5482 | */ |
5483 | ||
39236c6e A |
5484 | static int |
5485 | memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { | |
3e170ce0 A |
5486 | int error = 0; |
5487 | memorystatus_priority_properties_t mpp_entry; | |
5488 | ||
39236c6e | 5489 | /* Validate inputs */ |
3e170ce0 | 5490 | if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_priority_properties_t))) { |
39236c6e A |
5491 | return EINVAL; |
5492 | } | |
5493 | ||
3e170ce0 A |
5494 | error = copyin(buffer, &mpp_entry, buffer_size); |
5495 | ||
5496 | if (error == 0) { | |
39236c6e A |
5497 | proc_t p; |
5498 | ||
39236c6e A |
5499 | p = proc_find(pid); |
5500 | if (!p) { | |
3e170ce0 | 5501 | return ESRCH; |
39236c6e A |
5502 | } |
5503 | ||
5504 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { | |
39236c6e | 5505 | proc_rele(p); |
3e170ce0 | 5506 | return EPERM; |
39236c6e | 5507 | } |
fe8ab488 | 5508 | |
3e170ce0 | 5509 | error = memorystatus_update(p, mpp_entry.priority, mpp_entry.user_data, FALSE, FALSE, 0, 0, FALSE, FALSE, FALSE); |
39236c6e A |
5510 | proc_rele(p); |
5511 | } | |
5512 | ||
3e170ce0 A |
5513 | return(error); |
5514 | } | |
5515 | ||
5516 | static int | |
5517 | memorystatus_cmd_set_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { | |
5518 | int error = 0; | |
5519 | memorystatus_memlimit_properties_t mmp_entry; | |
5520 | ||
5521 | /* Validate inputs */ | |
5522 | if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_memlimit_properties_t))) { | |
5523 | return EINVAL; | |
5524 | } | |
5525 | ||
5526 | error = copyin(buffer, &mmp_entry, buffer_size); | |
5527 | ||
5528 | if (error == 0) { | |
5529 | error = memorystatus_set_memlimit_properties(pid, &mmp_entry); | |
5530 | } | |
5531 | ||
5532 | return(error); | |
5533 | } | |
5534 | ||
5535 | /* | |
5536 | * When getting the memlimit settings, we can't simply call task_get_phys_footprint_limit(). | |
5537 | * That gets the proc's cached memlimit and there is no guarantee that the active/inactive | |
5538 | * limits will be the same in the no-limit case. Instead we convert limits <= 0 using | |
5539 | * task_convert_phys_footprint_limit(). It computes the same limit value that would be written | |
5540 | * to the task's ledgers via task_set_phys_footprint_limit(). | |
5541 | */ | |
5542 | static int | |
5543 | memorystatus_cmd_get_memlimit_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { | |
5544 | int error = 0; | |
5545 | memorystatus_memlimit_properties_t mmp_entry; | |
5546 | ||
5547 | /* Validate inputs */ | |
5548 | if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size != sizeof(memorystatus_memlimit_properties_t))) { | |
5549 | return EINVAL; | |
5550 | } | |
5551 | ||
5552 | memset (&mmp_entry, 0, sizeof(memorystatus_memlimit_properties_t)); | |
5553 | ||
5554 | proc_t p = proc_find(pid); | |
5555 | if (!p) { | |
5556 | return ESRCH; | |
5557 | } | |
5558 | ||
5559 | /* | |
5560 | * Get the active limit and attributes. | |
5561 | * No locks taken since we hold a reference to the proc. | |
5562 | */ | |
5563 | ||
5564 | if (p->p_memstat_memlimit_active > 0 ) { | |
5565 | mmp_entry.memlimit_active = p->p_memstat_memlimit_active; | |
5566 | } else { | |
5567 | task_convert_phys_footprint_limit(-1, &mmp_entry.memlimit_active); | |
5568 | } | |
5569 | ||
5570 | if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_ACTIVE_FATAL) { | |
5571 | mmp_entry.memlimit_active_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
5572 | } | |
5573 | ||
5574 | /* | |
5575 | * Get the inactive limit and attributes | |
5576 | */ | |
5577 | if (p->p_memstat_memlimit_inactive <= 0) { | |
5578 | task_convert_phys_footprint_limit(-1, &mmp_entry.memlimit_inactive); | |
5579 | } else { | |
5580 | mmp_entry.memlimit_inactive = p->p_memstat_memlimit_inactive; | |
5581 | } | |
5582 | if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_INACTIVE_FATAL) { | |
5583 | mmp_entry.memlimit_inactive_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
5584 | } | |
5585 | proc_rele(p); | |
5586 | ||
5587 | error = copyout(&mmp_entry, buffer, buffer_size); | |
5588 | ||
5589 | return(error); | |
b0d623f7 A |
5590 | } |
5591 | ||
3e170ce0 | 5592 | |
39236c6e A |
5593 | static int |
5594 | memorystatus_cmd_get_pressure_status(int32_t *retval) { | |
5595 | int error; | |
5596 | ||
5597 | /* Need privilege for check */ | |
5598 | error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); | |
5599 | if (error) { | |
5600 | return (error); | |
5601 | } | |
5602 | ||
5603 | /* Inherently racy, so it's not worth taking a lock here */ | |
5604 | *retval = (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0; | |
5605 | ||
5606 | return error; | |
5607 | } | |
316670eb | 5608 | |
3e170ce0 A |
5609 | int |
5610 | memorystatus_get_pressure_status_kdp() { | |
5611 | return (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0; | |
5612 | } | |
5613 | ||
fe8ab488 A |
5614 | /* |
5615 | * Every process, including a P_MEMSTAT_INTERNAL process (currently only pid 1), is allowed to set a HWM. | |
3e170ce0 A |
5616 | * |
5617 | * This call is inflexible -- it does not distinguish between active/inactive, fatal/non-fatal | |
5618 | * So, with 2-level HWM preserving previous behavior will map as follows. | |
5619 | * - treat the limit passed in as both an active and inactive limit. | |
5620 | * - treat the is_fatal_limit flag as though it applies to both active and inactive limits. | |
5621 | * | |
5622 | * When invoked via MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK | |
5623 | * - the is_fatal_limit is FALSE, meaning the active and inactive limits are non-fatal/soft | |
5624 | * - so mapping is (active/non-fatal, inactive/non-fatal) | |
5625 | * | |
5626 | * When invoked via MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT | |
5627 | * - the is_fatal_limit is TRUE, meaning the process's active and inactive limits are fatal/hard | |
5628 | * - so mapping is (active/fatal, inactive/fatal) | |
fe8ab488 A |
5629 | */ |
5630 | ||
b0d623f7 | 5631 | static int |
fe8ab488 | 5632 | memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit) { |
39236c6e | 5633 | int error = 0; |
3e170ce0 A |
5634 | memorystatus_memlimit_properties_t entry; |
5635 | ||
5636 | entry.memlimit_active = high_water_mark; | |
5637 | entry.memlimit_active_attr = 0; | |
5638 | entry.memlimit_inactive = high_water_mark; | |
5639 | entry.memlimit_inactive_attr = 0; | |
5640 | ||
5641 | if (is_fatal_limit == TRUE) { | |
5642 | entry.memlimit_active_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
5643 | entry.memlimit_inactive_attr |= MEMORYSTATUS_MEMLIMIT_ATTR_FATAL; | |
5644 | } | |
5645 | ||
5646 | error = memorystatus_set_memlimit_properties(pid, &entry); | |
5647 | return (error); | |
5648 | } | |
5649 | ||
5650 | static int | |
5651 | memorystatus_set_memlimit_properties(pid_t pid, memorystatus_memlimit_properties_t *entry) { | |
5652 | ||
5653 | int32_t memlimit_active; | |
5654 | boolean_t memlimit_active_is_fatal; | |
5655 | int32_t memlimit_inactive; | |
5656 | boolean_t memlimit_inactive_is_fatal; | |
5657 | uint32_t valid_attrs = 0; | |
5658 | int error = 0; | |
39236c6e A |
5659 | |
5660 | proc_t p = proc_find(pid); | |
5661 | if (!p) { | |
5662 | return ESRCH; | |
5663 | } | |
3e170ce0 A |
5664 | |
5665 | /* | |
5666 | * Check for valid attribute flags. | |
5667 | */ | |
5668 | valid_attrs |= (MEMORYSTATUS_MEMLIMIT_ATTR_FATAL); | |
5669 | if ((entry->memlimit_active_attr & (~valid_attrs)) != 0) { | |
5670 | proc_rele(p); | |
5671 | return EINVAL; | |
5672 | } | |
5673 | if ((entry->memlimit_inactive_attr & (~valid_attrs)) != 0) { | |
5674 | proc_rele(p); | |
5675 | return EINVAL; | |
39236c6e | 5676 | } |
fe8ab488 | 5677 | |
3e170ce0 A |
5678 | /* |
5679 | * Setup the active memlimit properties | |
5680 | */ | |
5681 | memlimit_active = entry->memlimit_active; | |
5682 | if (entry->memlimit_active_attr & MEMORYSTATUS_MEMLIMIT_ATTR_FATAL) { | |
5683 | memlimit_active_is_fatal = TRUE; | |
5684 | } else { | |
5685 | memlimit_active_is_fatal = FALSE; | |
5686 | } | |
fe8ab488 | 5687 | |
3e170ce0 A |
5688 | /* |
5689 | * Setup the inactive memlimit properties | |
5690 | */ | |
5691 | memlimit_inactive = entry->memlimit_inactive; | |
5692 | if (entry->memlimit_inactive_attr & MEMORYSTATUS_MEMLIMIT_ATTR_FATAL) { | |
5693 | memlimit_inactive_is_fatal = TRUE; | |
5694 | } else { | |
5695 | memlimit_inactive_is_fatal = FALSE; | |
39236c6e A |
5696 | } |
5697 | ||
3e170ce0 A |
5698 | /* |
5699 | * Setting a limit of <= 0 implies that the process has no | |
5700 | * high-water-mark and has no per-task-limit. That means | |
5701 | * the system_wide task limit is in place, which by the way, | |
5702 | * is always fatal. | |
5703 | */ | |
5704 | ||
5705 | if (memlimit_active <= 0) { | |
5706 | /* | |
5707 | * Enforce the fatal system_wide task limit while process is active. | |
5708 | */ | |
5709 | memlimit_active = -1; | |
5710 | memlimit_active_is_fatal = TRUE; | |
5711 | } | |
5712 | ||
5713 | if (memlimit_inactive <= 0) { | |
5714 | /* | |
5715 | * Enforce the fatal system_wide task limit while process is inactive. | |
5716 | */ | |
5717 | memlimit_inactive = -1; | |
5718 | memlimit_inactive_is_fatal = TRUE; | |
5719 | } | |
5720 | ||
5721 | proc_list_lock(); | |
5722 | ||
5723 | /* | |
5724 | * Store the active limit variants in the proc. | |
5725 | */ | |
5726 | SET_ACTIVE_LIMITS_LOCKED(p, memlimit_active, memlimit_active_is_fatal); | |
5727 | ||
5728 | /* | |
5729 | * Store the inactive limit variants in the proc. | |
5730 | */ | |
5731 | SET_INACTIVE_LIMITS_LOCKED(p, memlimit_inactive, memlimit_inactive_is_fatal); | |
5732 | ||
5733 | /* | |
5734 | * Enforce appropriate limit variant by updating the cached values | |
5735 | * and writing the ledger. | |
5736 | * Limit choice is based on process active/inactive state. | |
5737 | */ | |
5738 | ||
5739 | if (memorystatus_highwater_enabled) { | |
5740 | boolean_t trigger_exception; | |
5741 | /* | |
5742 | * No need to consider P_MEMSTAT_MEMLIMIT_BACKGROUND anymore. | |
5743 | * Background limits are described via the inactive limit slots. | |
5744 | */ | |
5745 | ||
5746 | if (proc_jetsam_state_is_active_locked(p) == TRUE) { | |
5747 | CACHE_ACTIVE_LIMITS_LOCKED(p, trigger_exception); | |
fe8ab488 | 5748 | } else { |
3e170ce0 | 5749 | CACHE_INACTIVE_LIMITS_LOCKED(p, trigger_exception); |
fe8ab488 | 5750 | } |
3e170ce0 A |
5751 | |
5752 | /* Enforce the limit by writing to the ledgers */ | |
5753 | assert(trigger_exception == TRUE); | |
5754 | error = (task_set_phys_footprint_limit_internal(p->task, ((p->p_memstat_memlimit > 0) ? p->p_memstat_memlimit : -1), NULL, trigger_exception) == 0) ? 0 : EINVAL; | |
5755 | ||
5756 | MEMORYSTATUS_DEBUG(3, "memorystatus_set_memlimit_properties: new limit on pid %d (%dMB %s) current priority (%d) dirty_state?=0x%x %s\n", | |
5757 | p->p_pid, (p->p_memstat_memlimit > 0 ? p->p_memstat_memlimit : -1), | |
5758 | (p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT ? "F " : "NF"), p->p_memstat_effectivepriority, p->p_memstat_dirty, | |
5759 | (p->p_memstat_dirty ? ((p->p_memstat_dirty & P_DIRTY) ? "isdirty" : "isclean") : "")); | |
fe8ab488 A |
5760 | } |
5761 | ||
39236c6e A |
5762 | proc_list_unlock(); |
5763 | proc_rele(p); | |
5764 | ||
5765 | return error; | |
5766 | } | |
5767 | ||
fe8ab488 A |
5768 | /* |
5769 | * Returns the jetsam priority (effective or requested) of the process | |
5770 | * associated with this task. | |
5771 | */ | |
5772 | int | |
5773 | proc_get_memstat_priority(proc_t p, boolean_t effective_priority) | |
5774 | { | |
5775 | if (p) { | |
5776 | if (effective_priority) { | |
5777 | return p->p_memstat_effectivepriority; | |
5778 | } else { | |
5779 | return p->p_memstat_requestedpriority; | |
5780 | } | |
5781 | } | |
5782 | return 0; | |
5783 | } | |
3e170ce0 A |
5784 | |
5785 | /* | |
5786 | * Description: | |
5787 | * Evaluates active vs. inactive process state. | |
5788 | * Processes that opt into dirty tracking are evaluated | |
5789 | * based on clean vs dirty state. | |
5790 | * dirty ==> active | |
5791 | * clean ==> inactive | |
5792 | * | |
5793 | * Process that do not opt into dirty tracking are | |
5794 | * evalulated based on priority level. | |
5795 | * Foreground or above ==> active | |
5796 | * Below Foreground ==> inactive | |
5797 | * | |
5798 | * Return: TRUE if active | |
5799 | * False if inactive | |
5800 | */ | |
5801 | ||
5802 | static boolean_t | |
5803 | proc_jetsam_state_is_active_locked(proc_t p) { | |
5804 | ||
5805 | if (p->p_memstat_dirty & P_DIRTY_TRACK) { | |
5806 | /* | |
5807 | * process has opted into dirty tracking | |
5808 | * active state is based on dirty vs. clean | |
5809 | */ | |
5810 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { | |
5811 | /* | |
5812 | * process is dirty | |
5813 | * implies active state | |
5814 | */ | |
5815 | return TRUE; | |
5816 | } else { | |
5817 | /* | |
5818 | * process is clean | |
5819 | * implies inactive state | |
5820 | */ | |
5821 | return FALSE; | |
5822 | } | |
5823 | } else if (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND) { | |
5824 | /* | |
5825 | * process is Foreground or higher | |
5826 | * implies active state | |
5827 | */ | |
5828 | return TRUE; | |
5829 | } else { | |
5830 | /* | |
5831 | * process found below Foreground | |
5832 | * implies inactive state | |
5833 | */ | |
5834 | return FALSE; | |
5835 | } | |
5836 | } | |
5837 | ||
39236c6e | 5838 | #endif /* CONFIG_JETSAM */ |
b0d623f7 | 5839 | |
39236c6e A |
5840 | int |
5841 | memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *args, int *ret) { | |
5842 | int error = EINVAL; | |
5843 | ||
5844 | #if !CONFIG_JETSAM | |
5845 | #pragma unused(ret) | |
5846 | #endif | |
5847 | ||
5848 | /* Root only for now */ | |
5849 | if (!kauth_cred_issuser(kauth_cred_get())) { | |
5850 | error = EPERM; | |
5851 | goto out; | |
b0d623f7 | 5852 | } |
39236c6e A |
5853 | |
5854 | /* Sanity check */ | |
5855 | if (args->buffersize > MEMORYSTATUS_BUFFERSIZE_MAX) { | |
5856 | error = EINVAL; | |
5857 | goto out; | |
5858 | } | |
5859 | ||
5860 | switch (args->command) { | |
5861 | case MEMORYSTATUS_CMD_GET_PRIORITY_LIST: | |
5862 | error = memorystatus_cmd_get_priority_list(args->buffer, args->buffersize, ret); | |
5863 | break; | |
5864 | #if CONFIG_JETSAM | |
5865 | case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES: | |
5866 | error = memorystatus_cmd_set_priority_properties(args->pid, args->buffer, args->buffersize, ret); | |
5867 | break; | |
3e170ce0 A |
5868 | case MEMORYSTATUS_CMD_SET_MEMLIMIT_PROPERTIES: |
5869 | error = memorystatus_cmd_set_memlimit_properties(args->pid, args->buffer, args->buffersize, ret); | |
5870 | break; | |
5871 | case MEMORYSTATUS_CMD_GET_MEMLIMIT_PROPERTIES: | |
5872 | error = memorystatus_cmd_get_memlimit_properties(args->pid, args->buffer, args->buffersize, ret); | |
5873 | break; | |
fe8ab488 A |
5874 | case MEMORYSTATUS_CMD_GRP_SET_PROPERTIES: |
5875 | error = memorystatus_cmd_grp_set_properties((int32_t)args->flags, args->buffer, args->buffersize, ret); | |
5876 | break; | |
39236c6e | 5877 | case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT: |
3e170ce0 | 5878 | error = memorystatus_cmd_get_jetsam_snapshot((int32_t)args->flags, args->buffer, args->buffersize, ret); |
39236c6e A |
5879 | break; |
5880 | case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS: | |
5881 | error = memorystatus_cmd_get_pressure_status(ret); | |
5882 | break; | |
5883 | case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK: | |
3e170ce0 A |
5884 | /* |
5885 | * This call does not distinguish between active and inactive limits. | |
5886 | * Default behavior in 2-level HWM world is to set both. | |
5887 | * Non-fatal limit is also assumed for both. | |
5888 | */ | |
fe8ab488 A |
5889 | error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, FALSE); |
5890 | break; | |
5891 | case MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT: | |
3e170ce0 A |
5892 | /* |
5893 | * This call does not distinguish between active and inactive limits. | |
5894 | * Default behavior in 2-level HWM world is to set both. | |
5895 | * Fatal limit is also assumed for both. | |
5896 | */ | |
fe8ab488 | 5897 | error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, TRUE); |
39236c6e A |
5898 | break; |
5899 | /* Test commands */ | |
5900 | #if DEVELOPMENT || DEBUG | |
5901 | case MEMORYSTATUS_CMD_TEST_JETSAM: | |
5902 | error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled) ? 0 : EINVAL; | |
5903 | break; | |
3e170ce0 A |
5904 | case MEMORYSTATUS_CMD_TEST_JETSAM_SORT: |
5905 | error = memorystatus_cmd_test_jetsam_sort(args->pid, (int32_t)args->flags); | |
5906 | break; | |
39236c6e A |
5907 | case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS: |
5908 | error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize); | |
5909 | break; | |
5910 | #endif /* DEVELOPMENT || DEBUG */ | |
5911 | #endif /* CONFIG_JETSAM */ | |
3e170ce0 A |
5912 | case MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE: |
5913 | case MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE: | |
5914 | error = memorystatus_low_mem_privileged_listener(args->command); | |
5915 | break; | |
39236c6e A |
5916 | default: |
5917 | break; | |
5918 | } | |
5919 | ||
5920 | out: | |
5921 | return error; | |
5922 | } | |
5923 | ||
5924 | ||
5925 | static int | |
5926 | filt_memorystatusattach(struct knote *kn) | |
5927 | { | |
5928 | kn->kn_flags |= EV_CLEAR; | |
5929 | return memorystatus_knote_register(kn); | |
5930 | } | |
5931 | ||
5932 | static void | |
5933 | filt_memorystatusdetach(struct knote *kn) | |
5934 | { | |
5935 | memorystatus_knote_unregister(kn); | |
5936 | } | |
5937 | ||
5938 | static int | |
5939 | filt_memorystatus(struct knote *kn __unused, long hint) | |
5940 | { | |
5941 | if (hint) { | |
5942 | switch (hint) { | |
5943 | case kMemorystatusNoPressure: | |
5944 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
3e170ce0 | 5945 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; |
39236c6e A |
5946 | } |
5947 | break; | |
5948 | case kMemorystatusPressure: | |
5949 | if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) { | |
5950 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { | |
3e170ce0 | 5951 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; |
39236c6e A |
5952 | } |
5953 | } else if (memorystatus_vm_pressure_level == kVMPressureCritical) { | |
5954 | ||
5955 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { | |
3e170ce0 | 5956 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; |
39236c6e A |
5957 | } |
5958 | } | |
5959 | break; | |
fe8ab488 A |
5960 | case kMemorystatusLowSwap: |
5961 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) { | |
3e170ce0 | 5962 | kn->kn_fflags = NOTE_MEMORYSTATUS_LOW_SWAP; |
fe8ab488 A |
5963 | } |
5964 | break; | |
39236c6e A |
5965 | default: |
5966 | break; | |
b0d623f7 | 5967 | } |
39236c6e A |
5968 | } |
5969 | ||
5970 | return (kn->kn_fflags != 0); | |
5971 | } | |
5972 | ||
5973 | static void | |
5974 | memorystatus_klist_lock(void) { | |
5975 | lck_mtx_lock(&memorystatus_klist_mutex); | |
5976 | } | |
5977 | ||
5978 | static void | |
5979 | memorystatus_klist_unlock(void) { | |
5980 | lck_mtx_unlock(&memorystatus_klist_mutex); | |
5981 | } | |
5982 | ||
5983 | void | |
5984 | memorystatus_kevent_init(lck_grp_t *grp, lck_attr_t *attr) { | |
5985 | lck_mtx_init(&memorystatus_klist_mutex, grp, attr); | |
5986 | klist_init(&memorystatus_klist); | |
5987 | } | |
5988 | ||
5989 | int | |
5990 | memorystatus_knote_register(struct knote *kn) { | |
5991 | int error = 0; | |
5992 | ||
5993 | memorystatus_klist_lock(); | |
5994 | ||
fe8ab488 | 5995 | if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL | NOTE_MEMORYSTATUS_LOW_SWAP)) { |
39236c6e | 5996 | |
3e170ce0 | 5997 | KNOTE_ATTACH(&memorystatus_klist, kn); |
39236c6e | 5998 | |
39236c6e A |
5999 | } else { |
6000 | error = ENOTSUP; | |
b0d623f7 | 6001 | } |
39236c6e A |
6002 | |
6003 | memorystatus_klist_unlock(); | |
6004 | ||
6005 | return error; | |
b0d623f7 A |
6006 | } |
6007 | ||
39236c6e A |
6008 | void |
6009 | memorystatus_knote_unregister(struct knote *kn __unused) { | |
6010 | memorystatus_klist_lock(); | |
6011 | KNOTE_DETACH(&memorystatus_klist, kn); | |
6012 | memorystatus_klist_unlock(); | |
6013 | } | |
316670eb | 6014 | |
fe8ab488 A |
6015 | |
6016 | #if 0 | |
39236c6e A |
6017 | #if CONFIG_JETSAM && VM_PRESSURE_EVENTS |
6018 | static boolean_t | |
6019 | memorystatus_issue_pressure_kevent(boolean_t pressured) { | |
6020 | memorystatus_klist_lock(); | |
6021 | KNOTE(&memorystatus_klist, pressured ? kMemorystatusPressure : kMemorystatusNoPressure); | |
6022 | memorystatus_klist_unlock(); | |
6023 | return TRUE; | |
6024 | } | |
39236c6e | 6025 | #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ |
fe8ab488 | 6026 | #endif /* 0 */ |
3e170ce0 A |
6027 | |
6028 | #if CONFIG_JETSAM | |
6029 | /* Coalition support */ | |
6030 | ||
6031 | /* sorting info for a particular priority bucket */ | |
6032 | typedef struct memstat_sort_info { | |
6033 | coalition_t msi_coal; | |
6034 | uint64_t msi_page_count; | |
6035 | pid_t msi_pid; | |
6036 | int msi_ntasks; | |
6037 | } memstat_sort_info_t; | |
6038 | ||
6039 | /* | |
6040 | * qsort from smallest page count to largest page count | |
6041 | * | |
6042 | * return < 0 for a < b | |
6043 | * 0 for a == b | |
6044 | * > 0 for a > b | |
6045 | */ | |
6046 | static int memstat_asc_cmp(const void *a, const void *b) | |
6047 | { | |
6048 | const memstat_sort_info_t *msA = (const memstat_sort_info_t *)a; | |
6049 | const memstat_sort_info_t *msB = (const memstat_sort_info_t *)b; | |
6050 | ||
6051 | return (int)((uint64_t)msA->msi_page_count - (uint64_t)msB->msi_page_count); | |
6052 | } | |
6053 | ||
6054 | /* | |
6055 | * Return the number of pids rearranged during this sort. | |
6056 | */ | |
6057 | static int | |
6058 | memorystatus_sort_by_largest_coalition_locked(unsigned int bucket_index, int coal_sort_order) | |
6059 | { | |
6060 | #define MAX_SORT_PIDS 80 | |
6061 | #define MAX_COAL_LEADERS 10 | |
6062 | ||
6063 | unsigned int b = bucket_index; | |
6064 | int nleaders = 0; | |
6065 | int ntasks = 0; | |
6066 | proc_t p = NULL; | |
6067 | coalition_t coal = COALITION_NULL; | |
6068 | int pids_moved = 0; | |
6069 | int total_pids_moved = 0; | |
6070 | int i; | |
6071 | ||
6072 | /* | |
6073 | * The system is typically under memory pressure when in this | |
6074 | * path, hence, we want to avoid dynamic memory allocation. | |
6075 | */ | |
6076 | memstat_sort_info_t leaders[MAX_COAL_LEADERS]; | |
6077 | pid_t pid_list[MAX_SORT_PIDS]; | |
6078 | ||
6079 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
6080 | return(0); | |
6081 | } | |
6082 | ||
6083 | /* | |
6084 | * Clear the array that holds coalition leader information | |
6085 | */ | |
6086 | for (i=0; i < MAX_COAL_LEADERS; i++) { | |
6087 | leaders[i].msi_coal = COALITION_NULL; | |
6088 | leaders[i].msi_page_count = 0; /* will hold total coalition page count */ | |
6089 | leaders[i].msi_pid = 0; /* will hold coalition leader pid */ | |
6090 | leaders[i].msi_ntasks = 0; /* will hold the number of tasks in a coalition */ | |
6091 | } | |
6092 | ||
6093 | p = memorystatus_get_first_proc_locked(&b, FALSE); | |
6094 | while (p) { | |
6095 | if (coalition_is_leader(p->task, COALITION_TYPE_JETSAM, &coal)) { | |
6096 | if (nleaders < MAX_COAL_LEADERS) { | |
6097 | int coal_ntasks = 0; | |
6098 | uint64_t coal_page_count = coalition_get_page_count(coal, &coal_ntasks); | |
6099 | leaders[nleaders].msi_coal = coal; | |
6100 | leaders[nleaders].msi_page_count = coal_page_count; | |
6101 | leaders[nleaders].msi_pid = p->p_pid; /* the coalition leader */ | |
6102 | leaders[nleaders].msi_ntasks = coal_ntasks; | |
6103 | nleaders++; | |
6104 | } else { | |
6105 | /* | |
6106 | * We've hit MAX_COAL_LEADERS meaning we can handle no more coalitions. | |
6107 | * Abandoned coalitions will linger at the tail of the priority band | |
6108 | * when this sort session ends. | |
6109 | * TODO: should this be an assert? | |
6110 | */ | |
6111 | printf("%s: WARNING: more than %d leaders in priority band [%d]\n", | |
6112 | __FUNCTION__, MAX_COAL_LEADERS, bucket_index); | |
6113 | break; | |
6114 | } | |
6115 | } | |
6116 | p=memorystatus_get_next_proc_locked(&b, p, FALSE); | |
6117 | } | |
6118 | ||
6119 | if (nleaders == 0) { | |
6120 | /* Nothing to sort */ | |
6121 | return(0); | |
6122 | } | |
6123 | ||
6124 | /* | |
6125 | * Sort the coalition leader array, from smallest coalition page count | |
6126 | * to largest coalition page count. When inserted in the priority bucket, | |
6127 | * smallest coalition is handled first, resulting in the last to be jetsammed. | |
6128 | */ | |
6129 | if (nleaders > 1) { | |
6130 | qsort(leaders, nleaders, sizeof(memstat_sort_info_t), memstat_asc_cmp); | |
6131 | } | |
6132 | ||
6133 | #if 0 | |
6134 | for (i = 0; i < nleaders; i++) { | |
6135 | printf("%s: coal_leader[%d of %d] pid[%d] pages[%llu] ntasks[%d]\n", | |
6136 | __FUNCTION__, i, nleaders, leaders[i].msi_pid, leaders[i].msi_page_count, | |
6137 | leaders[i].msi_ntasks); | |
6138 | } | |
6139 | #endif | |
6140 | ||
6141 | /* | |
6142 | * During coalition sorting, processes in a priority band are rearranged | |
6143 | * by being re-inserted at the head of the queue. So, when handling a | |
6144 | * list, the first process that gets moved to the head of the queue, | |
6145 | * ultimately gets pushed toward the queue tail, and hence, jetsams last. | |
6146 | * | |
6147 | * So, for example, the coalition leader is expected to jetsam last, | |
6148 | * after its coalition members. Therefore, the coalition leader is | |
6149 | * inserted at the head of the queue first. | |
6150 | * | |
6151 | * After processing a coalition, the jetsam order is as follows: | |
6152 | * undefs(jetsam first), extensions, xpc services, leader(jetsam last) | |
6153 | */ | |
6154 | ||
6155 | /* | |
6156 | * Coalition members are rearranged in the priority bucket here, | |
6157 | * based on their coalition role. | |
6158 | */ | |
6159 | total_pids_moved = 0; | |
6160 | for (i=0; i < nleaders; i++) { | |
6161 | ||
6162 | /* a bit of bookkeeping */ | |
6163 | pids_moved = 0; | |
6164 | ||
6165 | /* Coalition leaders are jetsammed last, so move into place first */ | |
6166 | pid_list[0] = leaders[i].msi_pid; | |
6167 | pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, 1); | |
6168 | ||
6169 | /* xpc services should jetsam after extensions */ | |
6170 | ntasks = coalition_get_pid_list (leaders[i].msi_coal, COALITION_ROLEMASK_XPC, | |
6171 | coal_sort_order, pid_list, MAX_SORT_PIDS); | |
6172 | ||
6173 | if (ntasks > 0) { | |
6174 | pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, | |
6175 | (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS)); | |
6176 | } | |
6177 | ||
6178 | /* extensions should jetsam after unmarked processes */ | |
6179 | ntasks = coalition_get_pid_list (leaders[i].msi_coal, COALITION_ROLEMASK_EXT, | |
6180 | coal_sort_order, pid_list, MAX_SORT_PIDS); | |
6181 | ||
6182 | if (ntasks > 0) { | |
6183 | pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, | |
6184 | (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS)); | |
6185 | } | |
6186 | ||
6187 | /* undefined coalition members should be the first to jetsam */ | |
6188 | ntasks = coalition_get_pid_list (leaders[i].msi_coal, COALITION_ROLEMASK_UNDEF, | |
6189 | coal_sort_order, pid_list, MAX_SORT_PIDS); | |
6190 | ||
6191 | if (ntasks > 0) { | |
6192 | pids_moved += memorystatus_move_list_locked(bucket_index, pid_list, | |
6193 | (ntasks <= MAX_SORT_PIDS ? ntasks : MAX_SORT_PIDS)); | |
6194 | } | |
6195 | ||
6196 | #if 0 | |
6197 | if (pids_moved == leaders[i].msi_ntasks) { | |
6198 | /* | |
6199 | * All the pids in the coalition were found in this band. | |
6200 | */ | |
6201 | printf("%s: pids_moved[%d] equal total coalition ntasks[%d] \n", __FUNCTION__, | |
6202 | pids_moved, leaders[i].msi_ntasks); | |
6203 | } else if (pids_moved > leaders[i].msi_ntasks) { | |
6204 | /* | |
6205 | * Apparently new coalition members showed up during the sort? | |
6206 | */ | |
6207 | printf("%s: pids_moved[%d] were greater than expected coalition ntasks[%d] \n", __FUNCTION__, | |
6208 | pids_moved, leaders[i].msi_ntasks); | |
6209 | } else { | |
6210 | /* | |
6211 | * Apparently not all the pids in the coalition were found in this band? | |
6212 | */ | |
6213 | printf("%s: pids_moved[%d] were less than expected coalition ntasks[%d] \n", __FUNCTION__, | |
6214 | pids_moved, leaders[i].msi_ntasks); | |
6215 | } | |
6216 | #endif | |
6217 | ||
6218 | total_pids_moved += pids_moved; | |
6219 | ||
6220 | } /* end for */ | |
6221 | ||
6222 | return(total_pids_moved); | |
6223 | } | |
6224 | ||
6225 | ||
6226 | /* | |
6227 | * Traverse a list of pids, searching for each within the priority band provided. | |
6228 | * If pid is found, move it to the front of the priority band. | |
6229 | * Never searches outside the priority band provided. | |
6230 | * | |
6231 | * Input: | |
6232 | * bucket_index - jetsam priority band. | |
6233 | * pid_list - pointer to a list of pids. | |
6234 | * list_sz - number of pids in the list. | |
6235 | * | |
6236 | * Pid list ordering is important in that, | |
6237 | * pid_list[n] is expected to jetsam ahead of pid_list[n+1]. | |
6238 | * The sort_order is set by the coalition default. | |
6239 | * | |
6240 | * Return: | |
6241 | * the number of pids found and hence moved within the priority band. | |
6242 | */ | |
6243 | static int | |
6244 | memorystatus_move_list_locked(unsigned int bucket_index, pid_t *pid_list, int list_sz) | |
6245 | { | |
6246 | memstat_bucket_t *current_bucket; | |
6247 | int i; | |
6248 | int found_pids = 0; | |
6249 | ||
6250 | if ((pid_list == NULL) || (list_sz <= 0)) { | |
6251 | return(0); | |
6252 | } | |
6253 | ||
6254 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
6255 | return(0); | |
6256 | } | |
6257 | ||
6258 | current_bucket = &memstat_bucket[bucket_index]; | |
6259 | for (i=0; i < list_sz; i++) { | |
6260 | unsigned int b = bucket_index; | |
6261 | proc_t p = NULL; | |
6262 | proc_t aProc = NULL; | |
6263 | pid_t aPid; | |
6264 | int list_index; | |
6265 | ||
6266 | list_index = ((list_sz - 1) - i); | |
6267 | aPid = pid_list[list_index]; | |
6268 | ||
6269 | /* never search beyond bucket_index provided */ | |
6270 | p = memorystatus_get_first_proc_locked(&b, FALSE); | |
6271 | while (p) { | |
6272 | if (p->p_pid == aPid) { | |
6273 | aProc = p; | |
6274 | break; | |
6275 | } | |
6276 | p = memorystatus_get_next_proc_locked(&b, p, FALSE); | |
6277 | } | |
6278 | ||
6279 | if (aProc == NULL) { | |
6280 | /* pid not found in this band, just skip it */ | |
6281 | continue; | |
6282 | } else { | |
6283 | TAILQ_REMOVE(¤t_bucket->list, aProc, p_memstat_list); | |
6284 | TAILQ_INSERT_HEAD(¤t_bucket->list, aProc, p_memstat_list); | |
6285 | found_pids++; | |
6286 | } | |
6287 | } | |
6288 | return(found_pids); | |
6289 | } | |
6290 | #endif /* CONFIG_JETSAM */ |