]>
Commit | Line | Data |
---|---|---|
2d21ac55 A |
1 | /* |
2 | * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | * | |
28 | */ | |
2d21ac55 | 29 | |
2d21ac55 | 30 | #include <kern/sched_prim.h> |
6d2010ae | 31 | #include <kern/kalloc.h> |
316670eb | 32 | #include <kern/assert.h> |
6d2010ae | 33 | #include <kern/debug.h> |
fe8ab488 | 34 | #include <kern/locks.h> |
2d21ac55 A |
35 | #include <kern/task.h> |
36 | #include <kern/thread.h> | |
316670eb | 37 | #include <kern/host.h> |
2d21ac55 | 38 | #include <libkern/libkern.h> |
316670eb | 39 | #include <mach/mach_time.h> |
b0d623f7 | 40 | #include <mach/task.h> |
316670eb | 41 | #include <mach/host_priv.h> |
39236c6e A |
42 | #include <mach/mach_host.h> |
43 | #include <pexpert/pexpert.h> | |
316670eb | 44 | #include <sys/kern_event.h> |
b0d623f7 | 45 | #include <sys/proc.h> |
39236c6e | 46 | #include <sys/proc_info.h> |
b0d623f7 A |
47 | #include <sys/signal.h> |
48 | #include <sys/signalvar.h> | |
2d21ac55 | 49 | #include <sys/sysctl.h> |
316670eb | 50 | #include <sys/sysproto.h> |
b0d623f7 | 51 | #include <sys/wait.h> |
6d2010ae | 52 | #include <sys/tree.h> |
316670eb | 53 | #include <sys/priv.h> |
39236c6e A |
54 | #include <vm/vm_pageout.h> |
55 | #include <vm/vm_protos.h> | |
6d2010ae A |
56 | |
57 | #if CONFIG_FREEZE | |
6d2010ae | 58 | #include <vm/vm_map.h> |
39236c6e | 59 | #endif /* CONFIG_FREEZE */ |
6d2010ae | 60 | |
316670eb | 61 | #include <sys/kern_memorystatus.h> |
6d2010ae | 62 | |
fe8ab488 A |
63 | #if CONFIG_JETSAM |
64 | /* For logging clarity */ | |
65 | static const char *jetsam_kill_cause_name[] = { | |
66 | "" , | |
67 | "jettisoned" , /* kMemorystatusKilled */ | |
68 | "highwater" , /* kMemorystatusKilledHiwat */ | |
69 | "vnode-limit" , /* kMemorystatusKilledVnodes */ | |
70 | "vm-pageshortage" , /* kMemorystatusKilledVMPageShortage */ | |
71 | "vm-thrashing" , /* kMemorystatusKilledVMThrashing */ | |
72 | "fc-thrashing" , /* kMemorystatusKilledFCThrashing */ | |
73 | "per-process-limit" , /* kMemorystatusKilledPerProcessLimit */ | |
74 | "diagnostic" , /* kMemorystatusKilledDiagnostic */ | |
75 | "idle-exit" , /* kMemorystatusKilledIdleExit */ | |
76 | }; | |
77 | ||
78 | /* Does cause indicate vm or fc thrashing? */ | |
79 | static boolean_t | |
80 | is_thrashing(unsigned cause) | |
81 | { | |
82 | switch (cause) { | |
83 | case kMemorystatusKilledVMThrashing: | |
84 | case kMemorystatusKilledFCThrashing: | |
85 | return TRUE; | |
86 | default: | |
87 | return FALSE; | |
88 | } | |
89 | } | |
90 | ||
91 | /* Callback into vm_compressor.c to signal that thrashing has been mitigated. */ | |
92 | extern void vm_thrashing_jetsam_done(void); | |
93 | #endif | |
94 | ||
316670eb A |
95 | /* These are very verbose printfs(), enable with |
96 | * MEMORYSTATUS_DEBUG_LOG | |
97 | */ | |
98 | #if MEMORYSTATUS_DEBUG_LOG | |
99 | #define MEMORYSTATUS_DEBUG(cond, format, ...) \ | |
100 | do { \ | |
101 | if (cond) { printf(format, ##__VA_ARGS__); } \ | |
102 | } while(0) | |
103 | #else | |
104 | #define MEMORYSTATUS_DEBUG(cond, format, ...) | |
105 | #endif | |
6d2010ae | 106 | |
39236c6e A |
107 | /* General tunables */ |
108 | ||
109 | unsigned long delta_percentage = 5; | |
110 | unsigned long critical_threshold_percentage = 5; | |
111 | unsigned long idle_offset_percentage = 5; | |
112 | unsigned long pressure_threshold_percentage = 15; | |
113 | unsigned long freeze_threshold_percentage = 50; | |
114 | ||
316670eb | 115 | /* General memorystatus stuff */ |
6d2010ae | 116 | |
39236c6e A |
117 | struct klist memorystatus_klist; |
118 | static lck_mtx_t memorystatus_klist_mutex; | |
6d2010ae | 119 | |
39236c6e A |
120 | static void memorystatus_klist_lock(void); |
121 | static void memorystatus_klist_unlock(void); | |
6d2010ae | 122 | |
39236c6e A |
123 | static uint64_t memorystatus_idle_delay_time = 0; |
124 | ||
125 | /* | |
126 | * Memorystatus kevents | |
127 | */ | |
128 | ||
129 | static int filt_memorystatusattach(struct knote *kn); | |
130 | static void filt_memorystatusdetach(struct knote *kn); | |
131 | static int filt_memorystatus(struct knote *kn, long hint); | |
132 | ||
133 | struct filterops memorystatus_filtops = { | |
134 | .f_attach = filt_memorystatusattach, | |
135 | .f_detach = filt_memorystatusdetach, | |
136 | .f_event = filt_memorystatus, | |
137 | }; | |
138 | ||
139 | enum { | |
fe8ab488 A |
140 | kMemorystatusNoPressure = 0x1, |
141 | kMemorystatusPressure = 0x2, | |
142 | kMemorystatusLowSwap = 0x4 | |
39236c6e A |
143 | }; |
144 | ||
145 | /* Idle guard handling */ | |
146 | ||
147 | static int32_t memorystatus_scheduled_idle_demotions = 0; | |
148 | ||
149 | static thread_call_t memorystatus_idle_demotion_call; | |
150 | ||
151 | static void memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2); | |
152 | static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state); | |
153 | static void memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clean_state); | |
154 | static void memorystatus_reschedule_idle_demotion_locked(void); | |
6d2010ae | 155 | |
fe8ab488 A |
156 | static void memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert); |
157 | ||
158 | boolean_t is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t); | |
159 | void memorystatus_send_low_swap_note(void); | |
39236c6e A |
160 | |
161 | int memorystatus_wakeup = 0; | |
162 | ||
163 | unsigned int memorystatus_level = 0; | |
6d2010ae | 164 | |
316670eb | 165 | static int memorystatus_list_count = 0; |
6d2010ae | 166 | |
39236c6e | 167 | #define MEMSTAT_BUCKET_COUNT (JETSAM_PRIORITY_MAX + 1) |
6d2010ae | 168 | |
39236c6e A |
169 | typedef struct memstat_bucket { |
170 | TAILQ_HEAD(, proc) list; | |
171 | int count; | |
172 | } memstat_bucket_t; | |
6d2010ae | 173 | |
39236c6e A |
174 | memstat_bucket_t memstat_bucket[MEMSTAT_BUCKET_COUNT]; |
175 | ||
176 | uint64_t memstat_idle_demotion_deadline = 0; | |
6d2010ae | 177 | |
316670eb | 178 | static unsigned int memorystatus_dirty_count = 0; |
6d2010ae | 179 | |
39236c6e A |
180 | |
181 | int | |
182 | memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret) | |
183 | { | |
184 | user_addr_t level = 0; | |
185 | ||
186 | level = args->level; | |
187 | ||
188 | if (copyout(&memorystatus_level, level, sizeof(memorystatus_level)) != 0) { | |
189 | return EFAULT; | |
190 | } | |
191 | ||
192 | return 0; | |
193 | } | |
194 | ||
195 | static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search); | |
196 | static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search); | |
197 | ||
198 | static void memorystatus_thread(void *param __unused, wait_result_t wr __unused); | |
6d2010ae | 199 | |
316670eb A |
200 | /* Jetsam */ |
201 | ||
202 | #if CONFIG_JETSAM | |
203 | ||
fe8ab488 A |
204 | int proc_get_memstat_priority(proc_t, boolean_t); |
205 | ||
39236c6e A |
206 | /* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */ |
207 | #define LEGACY_HIWATER 1 | |
208 | ||
fe8ab488 | 209 | static boolean_t memorystatus_idle_snapshot = 0; |
39236c6e | 210 | |
fe8ab488 | 211 | static int memorystatus_highwater_enabled = 1; |
316670eb | 212 | |
316670eb A |
213 | unsigned int memorystatus_delta = 0; |
214 | ||
39236c6e | 215 | static unsigned int memorystatus_available_pages_critical_base = 0; |
fe8ab488 | 216 | //static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1; |
39236c6e | 217 | static unsigned int memorystatus_available_pages_critical_idle_offset = 0; |
316670eb | 218 | |
39236c6e A |
219 | #if DEVELOPMENT || DEBUG |
220 | static unsigned int memorystatus_jetsam_panic_debug = 0; | |
316670eb | 221 | |
39236c6e A |
222 | static unsigned int memorystatus_jetsam_policy = kPolicyDefault; |
223 | static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0; | |
224 | #endif | |
316670eb | 225 | |
fe8ab488 A |
226 | static unsigned int memorystatus_thread_wasted_wakeup = 0; |
227 | ||
228 | static uint32_t kill_under_pressure_cause = 0; | |
316670eb | 229 | |
39236c6e A |
230 | static memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot; |
231 | #define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries | |
316670eb | 232 | |
39236c6e A |
233 | static unsigned int memorystatus_jetsam_snapshot_count = 0; |
234 | static unsigned int memorystatus_jetsam_snapshot_max = 0; | |
316670eb | 235 | |
39236c6e | 236 | static void memorystatus_clear_errors(void); |
fe8ab488 | 237 | static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages); |
39236c6e A |
238 | static uint32_t memorystatus_build_state(proc_t p); |
239 | static void memorystatus_update_levels_locked(boolean_t critical_only); | |
fe8ab488 | 240 | //static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured); |
39236c6e A |
241 | |
242 | static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause); | |
243 | static boolean_t memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors); | |
244 | #if LEGACY_HIWATER | |
245 | static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors); | |
246 | #endif | |
247 | ||
248 | static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause); | |
249 | static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause); | |
316670eb | 250 | |
39236c6e | 251 | #endif /* CONFIG_JETSAM */ |
6d2010ae | 252 | |
316670eb | 253 | /* VM pressure */ |
6d2010ae | 254 | |
fe8ab488 A |
255 | extern unsigned int vm_page_free_count; |
256 | extern unsigned int vm_page_active_count; | |
257 | extern unsigned int vm_page_inactive_count; | |
258 | extern unsigned int vm_page_throttled_count; | |
259 | extern unsigned int vm_page_purgeable_count; | |
260 | extern unsigned int vm_page_wire_count; | |
261 | ||
316670eb | 262 | #if VM_PRESSURE_EVENTS |
6d2010ae | 263 | |
39236c6e | 264 | #include "vm_pressure.h" |
6d2010ae | 265 | |
fe8ab488 | 266 | extern boolean_t memorystatus_warn_process(pid_t pid, boolean_t critical); |
316670eb | 267 | |
39236c6e | 268 | vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal; |
316670eb | 269 | |
fe8ab488 A |
270 | #if CONFIG_MEMORYSTATUS |
271 | unsigned int memorystatus_available_pages = (unsigned int)-1; | |
272 | unsigned int memorystatus_available_pages_pressure = 0; | |
273 | unsigned int memorystatus_available_pages_critical = 0; | |
274 | unsigned int memorystatus_frozen_count = 0; | |
275 | unsigned int memorystatus_suspended_count = 0; | |
276 | ||
277 | /* | |
278 | * We use this flag to signal if we have any HWM offenders | |
279 | * on the system. This way we can reduce the number of wakeups | |
280 | * of the memorystatus_thread when the system is between the | |
281 | * "pressure" and "critical" threshold. | |
282 | * | |
283 | * The (re-)setting of this variable is done without any locks | |
284 | * or synchronization simply because it is not possible (currently) | |
285 | * to keep track of HWM offenders that drop down below their memory | |
286 | * limit and/or exit. So, we choose to burn a couple of wasted wakeups | |
287 | * by allowing the unguarded modification of this variable. | |
288 | */ | |
289 | boolean_t memorystatus_hwm_candidates = 0; | |
290 | ||
291 | static int memorystatus_send_note(int event_code, void *data, size_t data_length); | |
292 | #endif /* CONFIG_MEMORYSTATUS */ | |
293 | ||
316670eb A |
294 | #endif /* VM_PRESSURE_EVENTS */ |
295 | ||
316670eb A |
296 | /* Freeze */ |
297 | ||
298 | #if CONFIG_FREEZE | |
299 | ||
316670eb A |
300 | boolean_t memorystatus_freeze_enabled = FALSE; |
301 | int memorystatus_freeze_wakeup = 0; | |
302 | ||
303 | static inline boolean_t memorystatus_can_freeze_processes(void); | |
304 | static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low); | |
305 | ||
306 | static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused); | |
307 | ||
308 | /* Thresholds */ | |
309 | static unsigned int memorystatus_freeze_threshold = 0; | |
310 | ||
fe8ab488 A |
311 | static unsigned int memorystatus_freeze_pages_min = 0; |
312 | static unsigned int memorystatus_freeze_pages_max = 0; | |
316670eb A |
313 | |
314 | static unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; | |
315 | ||
316 | /* Stats */ | |
317 | static uint64_t memorystatus_freeze_count = 0; | |
318 | static uint64_t memorystatus_freeze_pageouts = 0; | |
6d2010ae A |
319 | |
320 | /* Throttling */ | |
316670eb A |
321 | static throttle_interval_t throttle_intervals[] = { |
322 | { 60, 8, 0, 0, { 0, 0 }, FALSE }, /* 1 hour intermediate interval, 8x burst */ | |
6d2010ae A |
323 | { 24 * 60, 1, 0, 0, { 0, 0 }, FALSE }, /* 24 hour long interval, no burst */ |
324 | }; | |
325 | ||
316670eb | 326 | static uint64_t memorystatus_freeze_throttle_count = 0; |
6d2010ae | 327 | |
39236c6e | 328 | static unsigned int memorystatus_suspended_footprint_total = 0; |
6d2010ae | 329 | |
39236c6e | 330 | #endif /* CONFIG_FREEZE */ |
6d2010ae | 331 | |
316670eb | 332 | /* Debug */ |
6d2010ae | 333 | |
fe8ab488 A |
334 | extern struct knote *vm_find_knote_from_pid(pid_t, struct klist *); |
335 | ||
6d2010ae | 336 | #if DEVELOPMENT || DEBUG |
6d2010ae | 337 | |
39236c6e A |
338 | #if CONFIG_JETSAM |
339 | ||
340 | /* Debug aid to aid determination of limit */ | |
341 | ||
342 | static int | |
343 | sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS | |
344 | { | |
345 | #pragma unused(oidp, arg2) | |
346 | proc_t p; | |
347 | unsigned int b = 0; | |
348 | int error, enable = 0; | |
349 | int32_t memlimit; | |
350 | ||
351 | error = SYSCTL_OUT(req, arg1, sizeof(int)); | |
352 | if (error || !req->newptr) { | |
353 | return (error); | |
354 | } | |
355 | ||
356 | error = SYSCTL_IN(req, &enable, sizeof(int)); | |
357 | if (error || !req->newptr) { | |
358 | return (error); | |
359 | } | |
360 | ||
361 | if (!(enable == 0 || enable == 1)) { | |
362 | return EINVAL; | |
363 | } | |
364 | ||
365 | proc_list_lock(); | |
366 | ||
367 | p = memorystatus_get_first_proc_locked(&b, TRUE); | |
368 | while (p) { | |
369 | if (enable) { | |
370 | if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { | |
371 | memlimit = -1; | |
372 | } else { | |
373 | memlimit = p->p_memstat_memlimit; | |
374 | } | |
375 | } else { | |
376 | memlimit = -1; | |
377 | } | |
378 | task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); | |
379 | ||
fe8ab488 A |
380 | if (memlimit == -1) { |
381 | p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; | |
382 | } else { | |
383 | if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) { | |
384 | p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; | |
385 | } | |
386 | } | |
387 | ||
39236c6e A |
388 | p = memorystatus_get_next_proc_locked(&b, p, TRUE); |
389 | } | |
390 | ||
391 | memorystatus_highwater_enabled = enable; | |
392 | ||
393 | proc_list_unlock(); | |
394 | ||
395 | return 0; | |
396 | } | |
397 | ||
fe8ab488 A |
398 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_idle_snapshot, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_idle_snapshot, 0, ""); |
399 | ||
39236c6e A |
400 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_highwater_enabled, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_highwater_enabled, 0, sysctl_memorystatus_highwater_enable, "I", ""); |
401 | ||
402 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages, 0, ""); | |
403 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages_critical, 0, ""); | |
404 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_base, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_base, 0, ""); | |
39236c6e | 405 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_idle_offset, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_idle_offset, 0, ""); |
316670eb A |
406 | |
407 | /* Diagnostic code */ | |
39236c6e | 408 | |
316670eb A |
409 | enum { |
410 | kJetsamDiagnosticModeNone = 0, | |
411 | kJetsamDiagnosticModeAll = 1, | |
412 | kJetsamDiagnosticModeStopAtFirstActive = 2, | |
413 | kJetsamDiagnosticModeCount | |
414 | } jetsam_diagnostic_mode = kJetsamDiagnosticModeNone; | |
415 | ||
416 | static int jetsam_diagnostic_suspended_one_active_proc = 0; | |
417 | ||
418 | static int | |
419 | sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS | |
420 | { | |
421 | #pragma unused(arg1, arg2) | |
422 | ||
423 | const char *diagnosticStrings[] = { | |
424 | "jetsam: diagnostic mode: resetting critical level.", | |
425 | "jetsam: diagnostic mode: will examine all processes", | |
426 | "jetsam: diagnostic mode: will stop at first active process" | |
427 | }; | |
428 | ||
429 | int error, val = jetsam_diagnostic_mode; | |
430 | boolean_t changed = FALSE; | |
431 | ||
432 | error = sysctl_handle_int(oidp, &val, 0, req); | |
433 | if (error || !req->newptr) | |
434 | return (error); | |
435 | if ((val < 0) || (val >= kJetsamDiagnosticModeCount)) { | |
436 | printf("jetsam: diagnostic mode: invalid value - %d\n", val); | |
437 | return EINVAL; | |
438 | } | |
439 | ||
39236c6e | 440 | proc_list_lock(); |
316670eb A |
441 | |
442 | if ((unsigned int) val != jetsam_diagnostic_mode) { | |
443 | jetsam_diagnostic_mode = val; | |
444 | ||
445 | memorystatus_jetsam_policy &= ~kPolicyDiagnoseActive; | |
446 | ||
447 | switch (jetsam_diagnostic_mode) { | |
448 | case kJetsamDiagnosticModeNone: | |
449 | /* Already cleared */ | |
450 | break; | |
451 | case kJetsamDiagnosticModeAll: | |
452 | memorystatus_jetsam_policy |= kPolicyDiagnoseAll; | |
453 | break; | |
454 | case kJetsamDiagnosticModeStopAtFirstActive: | |
455 | memorystatus_jetsam_policy |= kPolicyDiagnoseFirst; | |
456 | break; | |
457 | default: | |
458 | /* Already validated */ | |
459 | break; | |
460 | } | |
461 | ||
39236c6e | 462 | memorystatus_update_levels_locked(FALSE); |
316670eb A |
463 | changed = TRUE; |
464 | } | |
465 | ||
39236c6e | 466 | proc_list_unlock(); |
316670eb A |
467 | |
468 | if (changed) { | |
469 | printf("%s\n", diagnosticStrings[val]); | |
470 | } | |
471 | ||
472 | return (0); | |
473 | } | |
474 | ||
39236c6e | 475 | SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED|CTLFLAG_ANYBODY, |
316670eb A |
476 | &jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode"); |
477 | ||
39236c6e | 478 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_diagnostic, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jetsam_policy_offset_pages_diagnostic, 0, ""); |
316670eb A |
479 | |
480 | #if VM_PRESSURE_EVENTS | |
481 | ||
39236c6e | 482 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_pressure, 0, ""); |
316670eb | 483 | |
316670eb | 484 | |
fe8ab488 A |
485 | /* |
486 | * This routine is used for targeted notifications | |
487 | * regardless of system memory pressure. | |
488 | * "memnote" is the current user. | |
489 | */ | |
316670eb A |
490 | |
491 | static int | |
492 | sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS | |
493 | { | |
494 | #pragma unused(arg1, arg2) | |
495 | ||
fe8ab488 A |
496 | int error = 0, pid = 0; |
497 | int ret = 0; | |
498 | struct knote *kn = NULL; | |
316670eb A |
499 | |
500 | error = sysctl_handle_int(oidp, &pid, 0, req); | |
501 | if (error || !req->newptr) | |
502 | return (error); | |
503 | ||
fe8ab488 A |
504 | /* |
505 | * We inspect 3 lists here for targeted notifications: | |
506 | * - memorystatus_klist | |
507 | * - vm_pressure_klist | |
508 | * - vm_pressure_dormant_klist | |
509 | * | |
510 | * The vm_pressure_* lists are tied to the old VM_PRESSURE | |
511 | * notification mechanism. We intend to stop using that | |
512 | * mechanism and, in turn, get rid of the 2 lists and | |
513 | * vm_dispatch_pressure_note_to_pid() too. | |
514 | */ | |
515 | ||
516 | memorystatus_klist_lock(); | |
517 | kn = vm_find_knote_from_pid(pid, &memorystatus_klist); | |
518 | if (kn) { | |
519 | /* | |
520 | * Forcibly send this pid a "warning" memory pressure notification. | |
521 | */ | |
522 | kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
523 | KNOTE(&memorystatus_klist, kMemorystatusPressure); | |
524 | ret = 0; | |
525 | } else { | |
526 | ret = vm_dispatch_pressure_note_to_pid(pid, FALSE); | |
527 | } | |
528 | memorystatus_klist_unlock(); | |
529 | ||
530 | return ret; | |
316670eb A |
531 | } |
532 | ||
533 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, | |
534 | 0, 0, &sysctl_memorystatus_vm_pressure_send, "I", ""); | |
535 | ||
536 | #endif /* VM_PRESSURE_EVENTS */ | |
537 | ||
538 | #endif /* CONFIG_JETSAM */ | |
539 | ||
540 | #if CONFIG_FREEZE | |
541 | ||
39236c6e | 542 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, ""); |
316670eb | 543 | |
39236c6e A |
544 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, ""); |
545 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, ""); | |
316670eb | 546 | |
39236c6e A |
547 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_count, ""); |
548 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, ""); | |
549 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_throttle_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_count, ""); | |
550 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, ""); | |
316670eb A |
551 | |
552 | boolean_t memorystatus_freeze_throttle_enabled = TRUE; | |
39236c6e | 553 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, ""); |
316670eb A |
554 | |
555 | /* | |
fe8ab488 | 556 | * Manual trigger of freeze and thaw for dev / debug kernels only. |
316670eb A |
557 | */ |
558 | static int | |
559 | sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS | |
560 | { | |
561 | #pragma unused(arg1, arg2) | |
562 | ||
563 | int error, pid = 0; | |
564 | proc_t p; | |
565 | ||
fe8ab488 A |
566 | if (memorystatus_freeze_enabled == FALSE) { |
567 | return ENOTSUP; | |
568 | } | |
569 | ||
316670eb A |
570 | error = sysctl_handle_int(oidp, &pid, 0, req); |
571 | if (error || !req->newptr) | |
572 | return (error); | |
573 | ||
574 | p = proc_find(pid); | |
575 | if (p != NULL) { | |
576 | uint32_t purgeable, wired, clean, dirty; | |
577 | boolean_t shared; | |
39236c6e A |
578 | uint32_t max_pages = 0; |
579 | ||
fe8ab488 | 580 | if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { |
39236c6e A |
581 | max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); |
582 | } else { | |
583 | max_pages = UINT32_MAX - 1; | |
584 | } | |
585 | error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); | |
316670eb | 586 | proc_rele(p); |
316670eb | 587 | |
39236c6e A |
588 | if (error) |
589 | error = EIO; | |
590 | return error; | |
591 | } | |
316670eb A |
592 | return EINVAL; |
593 | } | |
594 | ||
595 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, | |
596 | 0, 0, &sysctl_memorystatus_freeze, "I", ""); | |
597 | ||
598 | static int | |
599 | sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS | |
600 | { | |
601 | #pragma unused(arg1, arg2) | |
602 | ||
603 | int error, pid = 0; | |
604 | proc_t p; | |
605 | ||
fe8ab488 A |
606 | if (memorystatus_freeze_enabled == FALSE) { |
607 | return ENOTSUP; | |
608 | } | |
609 | ||
316670eb A |
610 | error = sysctl_handle_int(oidp, &pid, 0, req); |
611 | if (error || !req->newptr) | |
612 | return (error); | |
613 | ||
614 | p = proc_find(pid); | |
615 | if (p != NULL) { | |
39236c6e | 616 | error = task_thaw(p->task); |
316670eb | 617 | proc_rele(p); |
39236c6e A |
618 | |
619 | if (error) | |
620 | error = EIO; | |
621 | return error; | |
316670eb A |
622 | } |
623 | ||
624 | return EINVAL; | |
625 | } | |
626 | ||
627 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, | |
628 | 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", ""); | |
6d2010ae | 629 | |
6d2010ae | 630 | #endif /* CONFIG_FREEZE */ |
2d21ac55 | 631 | |
fe8ab488 A |
632 | #endif /* DEVELOPMENT || DEBUG */ |
633 | ||
39236c6e A |
634 | extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation, |
635 | void *parameter, | |
636 | integer_t priority, | |
637 | thread_t *new_thread); | |
638 | ||
fe8ab488 A |
639 | #if CONFIG_JETSAM |
640 | /* | |
641 | * Sort processes by size for a single jetsam bucket. | |
642 | */ | |
643 | ||
644 | static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index) | |
645 | { | |
646 | proc_t p = NULL, insert_after_proc = NULL, max_proc = NULL; | |
647 | uint32_t pages = 0, max_pages = 0; | |
648 | memstat_bucket_t *current_bucket; | |
649 | ||
650 | if (bucket_index >= MEMSTAT_BUCKET_COUNT) { | |
651 | return; | |
652 | } | |
653 | ||
654 | current_bucket = &memstat_bucket[bucket_index]; | |
655 | ||
656 | p = TAILQ_FIRST(¤t_bucket->list); | |
657 | ||
658 | if (p) { | |
659 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); | |
660 | max_pages = pages; | |
661 | insert_after_proc = NULL; | |
662 | ||
663 | p = TAILQ_NEXT(p, p_memstat_list); | |
664 | ||
665 | restart: | |
666 | while (p) { | |
667 | ||
668 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); | |
669 | ||
670 | if (pages > max_pages) { | |
671 | max_pages = pages; | |
672 | max_proc = p; | |
673 | } | |
674 | ||
675 | p = TAILQ_NEXT(p, p_memstat_list); | |
676 | } | |
677 | ||
678 | if (max_proc) { | |
679 | ||
680 | TAILQ_REMOVE(¤t_bucket->list, max_proc, p_memstat_list); | |
681 | ||
682 | if (insert_after_proc == NULL) { | |
683 | TAILQ_INSERT_HEAD(¤t_bucket->list, max_proc, p_memstat_list); | |
684 | } else { | |
685 | TAILQ_INSERT_AFTER(¤t_bucket->list, insert_after_proc, max_proc, p_memstat_list); | |
686 | } | |
687 | ||
688 | insert_after_proc = max_proc; | |
689 | ||
690 | /* Reset parameters for the new search. */ | |
691 | p = TAILQ_NEXT(max_proc, p_memstat_list); | |
692 | if (p) { | |
693 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); | |
694 | max_pages = pages; | |
695 | } | |
696 | max_proc = NULL; | |
697 | ||
698 | goto restart; | |
699 | } | |
700 | } | |
701 | } | |
702 | ||
703 | #endif /* CONFIG_JETSAM */ | |
704 | ||
39236c6e A |
705 | static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search) { |
706 | memstat_bucket_t *current_bucket; | |
707 | proc_t next_p; | |
708 | ||
709 | if ((*bucket_index) >= MEMSTAT_BUCKET_COUNT) { | |
710 | return NULL; | |
711 | } | |
712 | ||
713 | current_bucket = &memstat_bucket[*bucket_index]; | |
714 | next_p = TAILQ_FIRST(¤t_bucket->list); | |
715 | if (!next_p && search) { | |
716 | while (!next_p && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { | |
717 | current_bucket = &memstat_bucket[*bucket_index]; | |
718 | next_p = TAILQ_FIRST(¤t_bucket->list); | |
719 | } | |
720 | } | |
721 | ||
722 | return next_p; | |
723 | } | |
724 | ||
725 | static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search) { | |
726 | memstat_bucket_t *current_bucket; | |
727 | proc_t next_p; | |
728 | ||
729 | if (!p || ((*bucket_index) >= MEMSTAT_BUCKET_COUNT)) { | |
730 | return NULL; | |
731 | } | |
732 | ||
733 | next_p = TAILQ_NEXT(p, p_memstat_list); | |
734 | while (!next_p && search && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) { | |
735 | current_bucket = &memstat_bucket[*bucket_index]; | |
736 | next_p = TAILQ_FIRST(¤t_bucket->list); | |
737 | } | |
738 | ||
739 | return next_p; | |
740 | } | |
316670eb A |
741 | |
742 | __private_extern__ void | |
743 | memorystatus_init(void) | |
744 | { | |
745 | thread_t thread = THREAD_NULL; | |
746 | kern_return_t result; | |
39236c6e A |
747 | int i; |
748 | ||
fe8ab488 A |
749 | #if CONFIG_FREEZE |
750 | memorystatus_freeze_pages_min = FREEZE_PAGES_MIN; | |
751 | memorystatus_freeze_pages_max = FREEZE_PAGES_MAX; | |
752 | #endif | |
753 | ||
39236c6e A |
754 | nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_idle_delay_time); |
755 | ||
756 | /* Init buckets */ | |
757 | for (i = 0; i < MEMSTAT_BUCKET_COUNT; i++) { | |
758 | TAILQ_INIT(&memstat_bucket[i].list); | |
759 | memstat_bucket[i].count = 0; | |
760 | } | |
316670eb | 761 | |
39236c6e | 762 | memorystatus_idle_demotion_call = thread_call_allocate((thread_call_func_t)memorystatus_perform_idle_demotion, NULL); |
316670eb | 763 | |
39236c6e A |
764 | /* Apply overrides */ |
765 | PE_get_default("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage)); | |
766 | assert(delta_percentage < 100); | |
767 | PE_get_default("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage)); | |
768 | assert(critical_threshold_percentage < 100); | |
769 | PE_get_default("kern.jetsam_idle_offset", &idle_offset_percentage, sizeof(idle_offset_percentage)); | |
770 | assert(idle_offset_percentage < 100); | |
771 | PE_get_default("kern.jetsam_pressure_threshold", &pressure_threshold_percentage, sizeof(pressure_threshold_percentage)); | |
772 | assert(pressure_threshold_percentage < 100); | |
773 | PE_get_default("kern.jetsam_freeze_threshold", &freeze_threshold_percentage, sizeof(freeze_threshold_percentage)); | |
774 | assert(freeze_threshold_percentage < 100); | |
316670eb | 775 | |
39236c6e A |
776 | #if CONFIG_JETSAM |
777 | memorystatus_delta = delta_percentage * atop_64(max_mem) / 100; | |
39236c6e | 778 | memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100; |
39236c6e A |
779 | memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta; |
780 | ||
781 | memorystatus_jetsam_snapshot_max = maxproc; | |
782 | memorystatus_jetsam_snapshot = | |
783 | (memorystatus_jetsam_snapshot_t*)kalloc(sizeof(memorystatus_jetsam_snapshot_t) + | |
784 | sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_max); | |
785 | if (!memorystatus_jetsam_snapshot) { | |
786 | panic("Could not allocate memorystatus_jetsam_snapshot"); | |
787 | } | |
788 | ||
789 | /* No contention at this point */ | |
790 | memorystatus_update_levels_locked(FALSE); | |
791 | #endif | |
792 | ||
316670eb | 793 | #if CONFIG_FREEZE |
39236c6e | 794 | memorystatus_freeze_threshold = (freeze_threshold_percentage / delta_percentage) * memorystatus_delta; |
316670eb | 795 | #endif |
39236c6e A |
796 | |
797 | result = kernel_thread_start_priority(memorystatus_thread, NULL, 95 /* MAXPRI_KERNEL */, &thread); | |
316670eb A |
798 | if (result == KERN_SUCCESS) { |
799 | thread_deallocate(thread); | |
800 | } else { | |
801 | panic("Could not create memorystatus_thread"); | |
802 | } | |
39236c6e | 803 | } |
316670eb | 804 | |
39236c6e A |
805 | /* Centralised for the purposes of allowing panic-on-jetsam */ |
806 | extern void | |
807 | vm_wake_compactor_swapper(void); | |
316670eb | 808 | |
fe8ab488 A |
809 | /* |
810 | * The jetsam no frills kill call | |
811 | * Return: 0 on success | |
812 | * error code on failure (EINVAL...) | |
813 | */ | |
814 | static int | |
815 | jetsam_do_kill(proc_t p, int jetsam_flags) { | |
816 | int error = 0; | |
817 | error = exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags); | |
818 | return(error); | |
819 | } | |
820 | ||
821 | /* | |
822 | * Wrapper for processes exiting with memorystatus details | |
823 | */ | |
39236c6e A |
824 | static boolean_t |
825 | memorystatus_do_kill(proc_t p, uint32_t cause) { | |
826 | ||
fe8ab488 A |
827 | int error = 0; |
828 | __unused pid_t victim_pid = p->p_pid; | |
829 | ||
830 | KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_START, | |
831 | victim_pid, cause, vm_page_free_count, 0, 0); | |
39236c6e A |
832 | |
833 | #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG) | |
834 | if (memorystatus_jetsam_panic_debug & (1 << cause)) { | |
835 | panic("memorystatus_do_kill(): jetsam debug panic (cause: %d)", cause); | |
316670eb | 836 | } |
39236c6e A |
837 | #else |
838 | #pragma unused(cause) | |
316670eb | 839 | #endif |
39236c6e A |
840 | int jetsam_flags = P_LTERM_JETSAM; |
841 | switch (cause) { | |
842 | case kMemorystatusKilledHiwat: jetsam_flags |= P_JETSAM_HIWAT; break; | |
843 | case kMemorystatusKilledVnodes: jetsam_flags |= P_JETSAM_VNODE; break; | |
844 | case kMemorystatusKilledVMPageShortage: jetsam_flags |= P_JETSAM_VMPAGESHORTAGE; break; | |
845 | case kMemorystatusKilledVMThrashing: jetsam_flags |= P_JETSAM_VMTHRASHING; break; | |
fe8ab488 | 846 | case kMemorystatusKilledFCThrashing: jetsam_flags |= P_JETSAM_FCTHRASHING; break; |
39236c6e A |
847 | case kMemorystatusKilledPerProcessLimit: jetsam_flags |= P_JETSAM_PID; break; |
848 | case kMemorystatusKilledIdleExit: jetsam_flags |= P_JETSAM_IDLEEXIT; break; | |
849 | } | |
fe8ab488 A |
850 | error = jetsam_do_kill(p, jetsam_flags); |
851 | ||
852 | KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_END, | |
853 | victim_pid, cause, vm_page_free_count, error, 0); | |
39236c6e A |
854 | |
855 | if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { | |
856 | vm_wake_compactor_swapper(); | |
857 | } | |
fe8ab488 A |
858 | |
859 | return (error == 0); | |
316670eb A |
860 | } |
861 | ||
862 | /* | |
863 | * Node manipulation | |
864 | */ | |
865 | ||
866 | static void | |
39236c6e A |
867 | memorystatus_check_levels_locked(void) { |
868 | #if CONFIG_JETSAM | |
869 | /* Update levels */ | |
870 | memorystatus_update_levels_locked(TRUE); | |
871 | #endif | |
872 | } | |
316670eb | 873 | |
39236c6e A |
874 | static void |
875 | memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2) | |
876 | { | |
877 | proc_t p; | |
878 | uint64_t current_time; | |
879 | memstat_bucket_t *demotion_bucket; | |
880 | ||
881 | MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion()\n"); | |
882 | ||
883 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_START, 0, 0, 0, 0, 0); | |
884 | ||
885 | current_time = mach_absolute_time(); | |
886 | ||
887 | proc_list_lock(); | |
316670eb | 888 | |
39236c6e A |
889 | demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; |
890 | p = TAILQ_FIRST(&demotion_bucket->list); | |
891 | ||
892 | while (p) { | |
893 | MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion() found %d\n", p->p_pid); | |
894 | ||
895 | assert(p->p_memstat_idledeadline); | |
896 | assert(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS); | |
897 | assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED); | |
898 | ||
899 | if (current_time >= p->p_memstat_idledeadline) { | |
900 | #if DEBUG || DEVELOPMENT | |
901 | if (!(p->p_memstat_dirty & P_DIRTY_MARKED)) { | |
fe8ab488 A |
902 | printf("memorystatus_perform_idle_demotion: moving process %d [%s] to idle band, but never dirtied (0x%x)!\n", |
903 | p->p_pid, (p->p_comm ? p->p_comm : "(unknown)"), p->p_memstat_dirty); | |
39236c6e A |
904 | } |
905 | #endif | |
906 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
fe8ab488 | 907 | memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, false); |
39236c6e A |
908 | |
909 | // The prior process has moved out of the demotion bucket, so grab the new head and continue | |
910 | p = TAILQ_FIRST(&demotion_bucket->list); | |
911 | continue; | |
316670eb | 912 | } |
39236c6e A |
913 | |
914 | // No further candidates | |
915 | break; | |
316670eb | 916 | } |
39236c6e A |
917 | |
918 | memorystatus_reschedule_idle_demotion_locked(); | |
919 | ||
920 | proc_list_unlock(); | |
316670eb | 921 | |
39236c6e | 922 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_END, 0, 0, 0, 0, 0); |
316670eb A |
923 | } |
924 | ||
925 | static void | |
39236c6e A |
926 | memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state) |
927 | { | |
fe8ab488 A |
928 | boolean_t present_in_deferred_bucket = FALSE; |
929 | ||
930 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
931 | present_in_deferred_bucket = TRUE; | |
932 | } | |
933 | ||
39236c6e A |
934 | MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for process %d (dirty:0x%x, set_state %d, demotions %d).\n", |
935 | p->p_pid, p->p_memstat_dirty, set_state, memorystatus_scheduled_idle_demotions); | |
316670eb | 936 | |
fe8ab488 | 937 | assert((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED); |
316670eb | 938 | |
39236c6e A |
939 | if (set_state) { |
940 | assert(p->p_memstat_idledeadline == 0); | |
fe8ab488 | 941 | p->p_memstat_dirty |= P_DIRTY_DEFER_IN_PROGRESS; |
39236c6e | 942 | p->p_memstat_idledeadline = mach_absolute_time() + memorystatus_idle_delay_time; |
316670eb | 943 | } |
39236c6e | 944 | |
fe8ab488 | 945 | assert(p->p_memstat_idledeadline); |
39236c6e | 946 | |
fe8ab488 A |
947 | if (present_in_deferred_bucket == FALSE) { |
948 | memorystatus_scheduled_idle_demotions++; | |
949 | } | |
316670eb A |
950 | } |
951 | ||
39236c6e A |
952 | static void |
953 | memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state) | |
316670eb | 954 | { |
fe8ab488 A |
955 | boolean_t present_in_deferred_bucket = FALSE; |
956 | ||
957 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
958 | present_in_deferred_bucket = TRUE; | |
959 | assert(p->p_memstat_idledeadline); | |
960 | } | |
961 | ||
39236c6e A |
962 | MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for process %d (clear_state %d, demotions %d).\n", |
963 | p->p_pid, clear_state, memorystatus_scheduled_idle_demotions); | |
964 | ||
39236c6e A |
965 | |
966 | if (clear_state) { | |
967 | p->p_memstat_idledeadline = 0; | |
968 | p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS; | |
316670eb | 969 | } |
39236c6e | 970 | |
fe8ab488 A |
971 | if (present_in_deferred_bucket == TRUE) { |
972 | memorystatus_scheduled_idle_demotions--; | |
973 | } | |
974 | ||
39236c6e | 975 | assert(memorystatus_scheduled_idle_demotions >= 0); |
316670eb A |
976 | } |
977 | ||
978 | static void | |
39236c6e A |
979 | memorystatus_reschedule_idle_demotion_locked(void) { |
980 | if (0 == memorystatus_scheduled_idle_demotions) { | |
981 | if (memstat_idle_demotion_deadline) { | |
982 | /* Transitioned 1->0, so cancel next call */ | |
983 | thread_call_cancel(memorystatus_idle_demotion_call); | |
984 | memstat_idle_demotion_deadline = 0; | |
985 | } | |
986 | } else { | |
987 | memstat_bucket_t *demotion_bucket; | |
988 | proc_t p; | |
989 | demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED]; | |
990 | p = TAILQ_FIRST(&demotion_bucket->list); | |
39236c6e | 991 | |
fe8ab488 A |
992 | assert(p && p->p_memstat_idledeadline); |
993 | ||
994 | if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline){ | |
995 | thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline); | |
996 | memstat_idle_demotion_deadline = p->p_memstat_idledeadline; | |
39236c6e A |
997 | } |
998 | } | |
316670eb A |
999 | } |
1000 | ||
1001 | /* | |
1002 | * List manipulation | |
1003 | */ | |
1004 | ||
39236c6e A |
1005 | int |
1006 | memorystatus_add(proc_t p, boolean_t locked) | |
316670eb | 1007 | { |
39236c6e A |
1008 | memstat_bucket_t *bucket; |
1009 | ||
fe8ab488 | 1010 | MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority); |
39236c6e A |
1011 | |
1012 | if (!locked) { | |
1013 | proc_list_lock(); | |
1014 | } | |
1015 | ||
1016 | /* Processes marked internal do not have priority tracked */ | |
1017 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { | |
1018 | goto exit; | |
1019 | } | |
1020 | ||
1021 | bucket = &memstat_bucket[p->p_memstat_effectivepriority]; | |
1022 | ||
fe8ab488 A |
1023 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { |
1024 | assert(bucket->count == memorystatus_scheduled_idle_demotions); | |
1025 | } | |
1026 | ||
39236c6e A |
1027 | TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list); |
1028 | bucket->count++; | |
316670eb | 1029 | |
39236c6e | 1030 | memorystatus_list_count++; |
316670eb | 1031 | |
39236c6e A |
1032 | memorystatus_check_levels_locked(); |
1033 | ||
1034 | exit: | |
1035 | if (!locked) { | |
1036 | proc_list_unlock(); | |
1037 | } | |
1038 | ||
1039 | return 0; | |
1040 | } | |
316670eb | 1041 | |
39236c6e | 1042 | static void |
fe8ab488 | 1043 | memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert) |
39236c6e A |
1044 | { |
1045 | memstat_bucket_t *old_bucket, *new_bucket; | |
1046 | ||
1047 | assert(priority < MEMSTAT_BUCKET_COUNT); | |
1048 | ||
1049 | /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */ | |
1050 | if ((p->p_listflag & P_LIST_EXITED) != 0) { | |
1051 | return; | |
316670eb | 1052 | } |
39236c6e | 1053 | |
fe8ab488 A |
1054 | MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d, inserting at %s\n", |
1055 | p->p_pid, priority, head_insert ? "head" : "tail"); | |
316670eb | 1056 | |
39236c6e | 1057 | old_bucket = &memstat_bucket[p->p_memstat_effectivepriority]; |
fe8ab488 A |
1058 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { |
1059 | assert(old_bucket->count == (memorystatus_scheduled_idle_demotions + 1)); | |
1060 | } | |
1061 | ||
39236c6e A |
1062 | TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list); |
1063 | old_bucket->count--; | |
316670eb | 1064 | |
39236c6e | 1065 | new_bucket = &memstat_bucket[priority]; |
fe8ab488 A |
1066 | if (head_insert) |
1067 | TAILQ_INSERT_HEAD(&new_bucket->list, p, p_memstat_list); | |
1068 | else | |
1069 | TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list); | |
39236c6e A |
1070 | new_bucket->count++; |
1071 | ||
1072 | #if CONFIG_JETSAM | |
1073 | if (memorystatus_highwater_enabled && (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND)) { | |
fe8ab488 A |
1074 | |
1075 | /* | |
1076 | * Adjust memory limit based on if the task is going to/from foreground and background. | |
1077 | */ | |
1078 | ||
39236c6e A |
1079 | if (((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) || |
1080 | ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND))) { | |
1081 | int32_t memlimit = (priority >= JETSAM_PRIORITY_FOREGROUND) ? -1 : p->p_memstat_memlimit; | |
1082 | task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); | |
fe8ab488 A |
1083 | |
1084 | if (memlimit <= 0) { | |
1085 | p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; | |
1086 | } else { | |
1087 | p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; | |
1088 | } | |
39236c6e A |
1089 | } |
1090 | } | |
1091 | #endif | |
1092 | ||
1093 | p->p_memstat_effectivepriority = priority; | |
1094 | ||
1095 | memorystatus_check_levels_locked(); | |
316670eb A |
1096 | } |
1097 | ||
39236c6e | 1098 | int |
fe8ab488 | 1099 | memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background, boolean_t is_fatal_limit) |
316670eb | 1100 | { |
39236c6e | 1101 | int ret; |
fe8ab488 | 1102 | boolean_t head_insert = false; |
39236c6e | 1103 | |
316670eb | 1104 | #if !CONFIG_JETSAM |
fe8ab488 | 1105 | #pragma unused(update_memlimit, memlimit, memlimit_background, is_fatal_limit) |
316670eb | 1106 | #endif |
316670eb | 1107 | |
39236c6e | 1108 | MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing process %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data); |
316670eb | 1109 | |
39236c6e A |
1110 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0); |
1111 | ||
1112 | if (priority == -1) { | |
1113 | /* Use as shorthand for default priority */ | |
1114 | priority = JETSAM_PRIORITY_DEFAULT; | |
1115 | } else if (priority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
1116 | /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */ | |
1117 | priority = JETSAM_PRIORITY_IDLE; | |
fe8ab488 A |
1118 | } else if (priority == JETSAM_PRIORITY_IDLE_HEAD) { |
1119 | /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle queue */ | |
1120 | priority = JETSAM_PRIORITY_IDLE; | |
1121 | head_insert = true; | |
39236c6e A |
1122 | } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) { |
1123 | /* Sanity check */ | |
1124 | ret = EINVAL; | |
1125 | goto out; | |
316670eb | 1126 | } |
39236c6e A |
1127 | |
1128 | proc_list_lock(); | |
1129 | ||
1130 | assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); | |
316670eb | 1131 | |
39236c6e A |
1132 | if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) { |
1133 | ret = EALREADY; | |
1134 | proc_list_unlock(); | |
fe8ab488 A |
1135 | MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", p->p_pid); |
1136 | goto out; | |
1137 | } | |
1138 | ||
1139 | if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) || ((p->p_listflag & P_LIST_EXITED) != 0)) { | |
1140 | /* | |
1141 | * This could happen when a process calling posix_spawn() is exiting on the jetsam thread. | |
1142 | */ | |
1143 | ret = EBUSY; | |
1144 | proc_list_unlock(); | |
316670eb A |
1145 | goto out; |
1146 | } | |
1147 | ||
39236c6e A |
1148 | p->p_memstat_state |= P_MEMSTAT_PRIORITYUPDATED; |
1149 | p->p_memstat_userdata = user_data; | |
1150 | p->p_memstat_requestedpriority = priority; | |
1151 | ||
1152 | #if CONFIG_JETSAM | |
1153 | if (update_memlimit) { | |
1154 | p->p_memstat_memlimit = memlimit; | |
1155 | if (memlimit_background) { | |
1156 | /* Will be set as priority is updated */ | |
1157 | p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND; | |
fe8ab488 A |
1158 | |
1159 | /* Cannot have a background memory limit and be fatal. */ | |
1160 | is_fatal_limit = FALSE; | |
1161 | ||
316670eb | 1162 | } else { |
39236c6e A |
1163 | /* Otherwise, apply now */ |
1164 | if (memorystatus_highwater_enabled) { | |
1165 | task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE); | |
1166 | } | |
316670eb | 1167 | } |
fe8ab488 A |
1168 | |
1169 | if (is_fatal_limit || memlimit <= 0) { | |
1170 | p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; | |
1171 | } else { | |
1172 | p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; | |
1173 | } | |
316670eb | 1174 | } |
39236c6e | 1175 | #endif |
316670eb | 1176 | |
fe8ab488 A |
1177 | /* |
1178 | * We can't add to the JETSAM_PRIORITY_IDLE_DEFERRED bucket here. | |
1179 | * But, we could be removing it from the bucket. | |
1180 | * Check and take appropriate steps if so. | |
1181 | */ | |
1182 | ||
1183 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
1184 | ||
1185 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1186 | } | |
1187 | ||
1188 | memorystatus_update_priority_locked(p, priority, head_insert); | |
39236c6e A |
1189 | |
1190 | proc_list_unlock(); | |
1191 | ret = 0; | |
316670eb A |
1192 | |
1193 | out: | |
39236c6e A |
1194 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_END, ret, 0, 0, 0, 0); |
1195 | ||
316670eb A |
1196 | return ret; |
1197 | } | |
1198 | ||
39236c6e A |
1199 | int |
1200 | memorystatus_remove(proc_t p, boolean_t locked) | |
316670eb | 1201 | { |
39236c6e A |
1202 | int ret; |
1203 | memstat_bucket_t *bucket; | |
316670eb | 1204 | |
fe8ab488 | 1205 | MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", p->p_pid); |
316670eb | 1206 | |
39236c6e A |
1207 | if (!locked) { |
1208 | proc_list_lock(); | |
1209 | } | |
316670eb | 1210 | |
39236c6e | 1211 | assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL)); |
fe8ab488 | 1212 | |
39236c6e | 1213 | bucket = &memstat_bucket[p->p_memstat_effectivepriority]; |
fe8ab488 A |
1214 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { |
1215 | assert(bucket->count == memorystatus_scheduled_idle_demotions); | |
1216 | } | |
1217 | ||
39236c6e A |
1218 | TAILQ_REMOVE(&bucket->list, p, p_memstat_list); |
1219 | bucket->count--; | |
1220 | ||
1221 | memorystatus_list_count--; | |
316670eb | 1222 | |
39236c6e A |
1223 | /* If awaiting demotion to the idle band, clean up */ |
1224 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
1225 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1226 | memorystatus_reschedule_idle_demotion_locked(); | |
1227 | } | |
316670eb | 1228 | |
39236c6e A |
1229 | memorystatus_check_levels_locked(); |
1230 | ||
1231 | #if CONFIG_FREEZE | |
1232 | if (p->p_memstat_state & (P_MEMSTAT_FROZEN)) { | |
1233 | memorystatus_frozen_count--; | |
1234 | } | |
316670eb | 1235 | |
39236c6e A |
1236 | if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { |
1237 | memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint; | |
1238 | memorystatus_suspended_count--; | |
316670eb | 1239 | } |
39236c6e A |
1240 | #endif |
1241 | ||
1242 | if (!locked) { | |
1243 | proc_list_unlock(); | |
1244 | } | |
316670eb | 1245 | |
39236c6e A |
1246 | if (p) { |
1247 | ret = 0; | |
316670eb | 1248 | } else { |
39236c6e | 1249 | ret = ESRCH; |
316670eb A |
1250 | } |
1251 | ||
1252 | return ret; | |
1253 | } | |
1254 | ||
39236c6e A |
1255 | static boolean_t |
1256 | memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) { | |
1257 | /* See that the process isn't marked for termination */ | |
1258 | if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) { | |
1259 | return FALSE; | |
316670eb A |
1260 | } |
1261 | ||
39236c6e A |
1262 | /* Idle exit requires that process be tracked */ |
1263 | if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) && | |
1264 | !(pcontrol & PROC_DIRTY_TRACK)) { | |
1265 | return FALSE; | |
1266 | } | |
1267 | ||
fe8ab488 A |
1268 | /* 'Launch in progress' tracking requires that process have enabled dirty tracking too. */ |
1269 | if ((pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) && | |
1270 | !(pcontrol & PROC_DIRTY_TRACK)) { | |
1271 | return FALSE; | |
1272 | } | |
1273 | ||
39236c6e A |
1274 | /* Deferral is only relevant if idle exit is specified */ |
1275 | if ((pcontrol & PROC_DIRTY_DEFER) && | |
1276 | !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) { | |
1277 | return FALSE; | |
316670eb A |
1278 | } |
1279 | ||
39236c6e | 1280 | return TRUE; |
316670eb | 1281 | } |
593a1d5f | 1282 | |
39236c6e A |
1283 | static void |
1284 | memorystatus_update_idle_priority_locked(proc_t p) { | |
1285 | int32_t priority; | |
1286 | ||
1287 | MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty); | |
1288 | ||
1289 | if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) { | |
1290 | priority = (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) ? JETSAM_PRIORITY_IDLE_DEFERRED : JETSAM_PRIORITY_IDLE; | |
1291 | } else { | |
1292 | priority = p->p_memstat_requestedpriority; | |
1293 | } | |
1294 | ||
fe8ab488 A |
1295 | if (priority != p->p_memstat_effectivepriority) { |
1296 | memorystatus_update_priority_locked(p, priority, false); | |
1297 | } | |
39236c6e A |
1298 | } |
1299 | ||
1300 | /* | |
1301 | * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle | |
1302 | * (clean). They may also indicate that they support termination when idle, with the result that they are promoted | |
1303 | * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low | |
1304 | * priority idle band when clean (and killed earlier, protecting higher priority procesess). | |
1305 | * | |
1306 | * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by | |
1307 | * memorystatus_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band | |
1308 | * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to | |
1309 | * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle | |
1310 | * band. The deferral can be cleared early by clearing the appropriate flag. | |
1311 | * | |
1312 | * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process | |
1313 | * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be | |
1314 | * re-enabled or the guard state cleared, depending on whether the guard deadline has passed. | |
1315 | */ | |
1316 | ||
1317 | int | |
1318 | memorystatus_dirty_track(proc_t p, uint32_t pcontrol) { | |
1319 | unsigned int old_dirty; | |
1320 | boolean_t reschedule = FALSE; | |
fe8ab488 A |
1321 | boolean_t already_deferred = FALSE; |
1322 | boolean_t defer_now = FALSE; | |
39236c6e A |
1323 | int ret; |
1324 | ||
fe8ab488 A |
1325 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_TRACK), |
1326 | p->p_pid, p->p_memstat_dirty, pcontrol, 0, 0); | |
1327 | ||
39236c6e | 1328 | proc_list_lock(); |
316670eb | 1329 | |
fe8ab488 A |
1330 | if ((p->p_listflag & P_LIST_EXITED) != 0) { |
1331 | /* | |
1332 | * Process is on its way out. | |
1333 | */ | |
1334 | ret = EBUSY; | |
1335 | goto exit; | |
1336 | } | |
1337 | ||
39236c6e A |
1338 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { |
1339 | ret = EPERM; | |
1340 | goto exit; | |
316670eb A |
1341 | } |
1342 | ||
39236c6e A |
1343 | if (!memorystatus_validate_track_flags(p, pcontrol)) { |
1344 | ret = EINVAL; | |
1345 | goto exit; | |
1346 | } | |
1347 | ||
1348 | old_dirty = p->p_memstat_dirty; | |
1349 | ||
1350 | /* These bits are cumulative, as per <rdar://problem/11159924> */ | |
1351 | if (pcontrol & PROC_DIRTY_TRACK) { | |
1352 | p->p_memstat_dirty |= P_DIRTY_TRACK; | |
1353 | } | |
1354 | ||
1355 | if (pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) { | |
1356 | p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT; | |
1357 | } | |
1358 | ||
fe8ab488 A |
1359 | if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) { |
1360 | p->p_memstat_dirty |= P_DIRTY_LAUNCH_IN_PROGRESS; | |
1361 | } | |
1362 | ||
1363 | if (old_dirty & P_DIRTY_DEFER_IN_PROGRESS) { | |
1364 | already_deferred = TRUE; | |
1365 | } | |
1366 | ||
39236c6e | 1367 | /* This can be set and cleared exactly once. */ |
fe8ab488 A |
1368 | if (pcontrol & PROC_DIRTY_DEFER) { |
1369 | ||
1370 | if ( !(old_dirty & P_DIRTY_DEFER)) { | |
1371 | p->p_memstat_dirty |= P_DIRTY_DEFER; | |
1372 | } | |
1373 | ||
1374 | defer_now = TRUE; | |
39236c6e A |
1375 | } |
1376 | ||
fe8ab488 | 1377 | MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for process %d\n", |
39236c6e | 1378 | ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N", |
fe8ab488 | 1379 | defer_now ? "Y" : "N", |
39236c6e A |
1380 | p->p_memstat_dirty & P_DIRTY ? "Y" : "N", |
1381 | p->p_pid); | |
1382 | ||
1383 | /* Kick off or invalidate the idle exit deferment if there's a state transition. */ | |
1384 | if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) { | |
1385 | if (((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) && | |
fe8ab488 A |
1386 | defer_now && !already_deferred) { |
1387 | ||
1388 | /* | |
1389 | * Request to defer a clean process that's idle-exit enabled | |
1390 | * and not already in the jetsam deferred band. | |
1391 | */ | |
39236c6e A |
1392 | memorystatus_schedule_idle_demotion_locked(p, TRUE); |
1393 | reschedule = TRUE; | |
fe8ab488 A |
1394 | |
1395 | } else if (!defer_now && already_deferred) { | |
1396 | ||
1397 | /* | |
1398 | * Either the process is no longer idle-exit enabled OR | |
1399 | * there's a request to cancel a currently active deferral. | |
1400 | */ | |
1401 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1402 | reschedule = TRUE; | |
1403 | } | |
1404 | } else { | |
1405 | ||
1406 | /* | |
1407 | * We are trying to operate on a dirty process. Dirty processes have to | |
1408 | * be removed from the deferred band. The question is do we reset the | |
1409 | * deferred state or not? | |
1410 | * | |
1411 | * This could be a legal request like: | |
1412 | * - this process had opted into the JETSAM_DEFERRED band | |
1413 | * - but it's now dirty and requests to opt out. | |
1414 | * In this case, we remove the process from the band and reset its | |
1415 | * state too. It'll opt back in properly when needed. | |
1416 | * | |
1417 | * OR, this request could be a user-space bug. E.g.: | |
1418 | * - this process had opted into the JETSAM_DEFERRED band when clean | |
1419 | * - and, then issues another request to again put it into the band except | |
1420 | * this time the process is dirty. | |
1421 | * The process going dirty, as a transition in memorystatus_dirty_set(), will pull the process out of | |
1422 | * the deferred band with its state intact. So our request below is no-op. | |
1423 | * But we do it here anyways for coverage. | |
1424 | * | |
1425 | * memorystatus_update_idle_priority_locked() | |
1426 | * single-mindedly treats a dirty process as "cannot be in the deferred band". | |
1427 | */ | |
1428 | ||
1429 | if (!defer_now && already_deferred) { | |
39236c6e A |
1430 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); |
1431 | reschedule = TRUE; | |
fe8ab488 A |
1432 | } else { |
1433 | memorystatus_invalidate_idle_demotion_locked(p, FALSE); | |
1434 | reschedule = TRUE; | |
316670eb A |
1435 | } |
1436 | } | |
39236c6e A |
1437 | |
1438 | memorystatus_update_idle_priority_locked(p); | |
1439 | ||
1440 | if (reschedule) { | |
1441 | memorystatus_reschedule_idle_demotion_locked(); | |
1442 | } | |
1443 | ||
1444 | ret = 0; | |
316670eb | 1445 | |
39236c6e A |
1446 | exit: |
1447 | proc_list_unlock(); | |
316670eb A |
1448 | |
1449 | return ret; | |
1450 | } | |
2d21ac55 | 1451 | |
39236c6e A |
1452 | int |
1453 | memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) { | |
1454 | int ret; | |
1455 | boolean_t kill = false; | |
1456 | boolean_t reschedule = FALSE; | |
1457 | boolean_t was_dirty = FALSE; | |
1458 | boolean_t now_dirty = FALSE; | |
6d2010ae | 1459 | |
39236c6e | 1460 | MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty); |
fe8ab488 A |
1461 | |
1462 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_SET), p->p_pid, self, pcontrol, 0, 0); | |
b0d623f7 | 1463 | |
39236c6e A |
1464 | proc_list_lock(); |
1465 | ||
fe8ab488 A |
1466 | if ((p->p_listflag & P_LIST_EXITED) != 0) { |
1467 | /* | |
1468 | * Process is on its way out. | |
1469 | */ | |
1470 | ret = EBUSY; | |
1471 | goto exit; | |
1472 | } | |
1473 | ||
39236c6e A |
1474 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { |
1475 | ret = EPERM; | |
1476 | goto exit; | |
1477 | } | |
1478 | ||
1479 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) | |
1480 | was_dirty = TRUE; | |
1481 | ||
1482 | if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { | |
1483 | /* Dirty tracking not enabled */ | |
1484 | ret = EINVAL; | |
1485 | } else if (pcontrol && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { | |
1486 | /* | |
1487 | * Process is set to be terminated and we're attempting to mark it dirty. | |
1488 | * Set for termination and marking as clean is OK - see <rdar://problem/10594349>. | |
1489 | */ | |
1490 | ret = EBUSY; | |
1491 | } else { | |
1492 | int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN; | |
1493 | if (pcontrol && !(p->p_memstat_dirty & flag)) { | |
1494 | /* Mark the process as having been dirtied at some point */ | |
1495 | p->p_memstat_dirty |= (flag | P_DIRTY_MARKED); | |
1496 | memorystatus_dirty_count++; | |
1497 | ret = 0; | |
1498 | } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) { | |
1499 | if ((flag == P_DIRTY_SHUTDOWN) && (!p->p_memstat_dirty & P_DIRTY)) { | |
1500 | /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */ | |
1501 | p->p_memstat_dirty |= P_DIRTY_TERMINATED; | |
1502 | kill = true; | |
1503 | } else if ((flag == P_DIRTY) && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) { | |
1504 | /* Kill previously terminated processes if set clean */ | |
1505 | kill = true; | |
1506 | } | |
1507 | p->p_memstat_dirty &= ~flag; | |
1508 | memorystatus_dirty_count--; | |
1509 | ret = 0; | |
1510 | } else { | |
1511 | /* Already set */ | |
1512 | ret = EALREADY; | |
316670eb | 1513 | } |
39236c6e A |
1514 | } |
1515 | ||
1516 | if (ret != 0) { | |
1517 | goto exit; | |
1518 | } | |
1519 | ||
1520 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) | |
1521 | now_dirty = TRUE; | |
1522 | ||
1523 | if ((was_dirty == TRUE && now_dirty == FALSE) || | |
1524 | (was_dirty == FALSE && now_dirty == TRUE)) { | |
1525 | ||
1526 | /* Manage idle exit deferral, if applied */ | |
1527 | if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) == | |
1528 | (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) { | |
fe8ab488 A |
1529 | |
1530 | /* | |
1531 | * P_DIRTY_DEFER_IN_PROGRESS means the process is in the deferred band OR it might be heading back | |
1532 | * there once it's clean again and has some protection window left. | |
1533 | */ | |
1534 | ||
39236c6e | 1535 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { |
fe8ab488 A |
1536 | /* |
1537 | * New dirty process i.e. "was_dirty == FALSE && now_dirty == TRUE" | |
1538 | * | |
1539 | * The process will move from the deferred band to its higher requested | |
1540 | * jetsam band. But we don't clear its state i.e. we want to remember that | |
1541 | * this process was part of the "deferred" band and will return to it. | |
1542 | * | |
1543 | * This way, we don't let it age beyond the protection | |
1544 | * window when it returns to "clean". All the while giving | |
1545 | * it a chance to perform its work while "dirty". | |
1546 | * | |
1547 | */ | |
39236c6e A |
1548 | memorystatus_invalidate_idle_demotion_locked(p, FALSE); |
1549 | reschedule = TRUE; | |
1550 | } else { | |
fe8ab488 A |
1551 | |
1552 | /* | |
1553 | * Process is back from "dirty" to "clean". | |
1554 | * | |
1555 | * Is its timer up OR does it still have some protection | |
1556 | * window left? | |
1557 | */ | |
1558 | ||
39236c6e | 1559 | if (mach_absolute_time() >= p->p_memstat_idledeadline) { |
fe8ab488 A |
1560 | /* |
1561 | * The process' deadline has expired. It currently | |
1562 | * does not reside in the DEFERRED bucket. | |
1563 | * | |
1564 | * It's on its way to the JETSAM_PRIORITY_IDLE | |
1565 | * bucket via memorystatus_update_idle_priority_locked() | |
1566 | * below. | |
1567 | ||
1568 | * So all we need to do is reset all the state on the | |
1569 | * process that's related to the DEFERRED bucket i.e. | |
1570 | * the DIRTY_DEFER_IN_PROGRESS flag and the timer deadline. | |
1571 | * | |
1572 | */ | |
1573 | ||
1574 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1575 | reschedule = TRUE; | |
39236c6e | 1576 | } else { |
fe8ab488 A |
1577 | /* |
1578 | * It still has some protection window left and so | |
1579 | * we just re-arm the timer without modifying any | |
1580 | * state on the process. | |
1581 | */ | |
39236c6e A |
1582 | memorystatus_schedule_idle_demotion_locked(p, FALSE); |
1583 | reschedule = TRUE; | |
1584 | } | |
1585 | } | |
1586 | } | |
1587 | ||
1588 | memorystatus_update_idle_priority_locked(p); | |
1589 | ||
1590 | /* If the deferral state changed, reschedule the demotion timer */ | |
1591 | if (reschedule) { | |
1592 | memorystatus_reschedule_idle_demotion_locked(); | |
1593 | } | |
1594 | } | |
1595 | ||
1596 | if (kill) { | |
1597 | psignal(p, SIGKILL); | |
1598 | } | |
1599 | ||
1600 | exit: | |
1601 | proc_list_unlock(); | |
1602 | ||
1603 | return ret; | |
1604 | } | |
b0d623f7 | 1605 | |
39236c6e | 1606 | int |
fe8ab488 A |
1607 | memorystatus_dirty_clear(proc_t p, uint32_t pcontrol) { |
1608 | ||
39236c6e | 1609 | int ret = 0; |
fe8ab488 A |
1610 | |
1611 | MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_clear(): %d 0x%x 0x%x\n", p->p_pid, pcontrol, p->p_memstat_dirty); | |
39236c6e | 1612 | |
fe8ab488 A |
1613 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_CLEAR), p->p_pid, pcontrol, 0, 0, 0); |
1614 | ||
1615 | proc_list_lock(); | |
1616 | ||
1617 | if ((p->p_listflag & P_LIST_EXITED) != 0) { | |
1618 | /* | |
1619 | * Process is on its way out. | |
1620 | */ | |
1621 | ret = EBUSY; | |
1622 | goto exit; | |
1623 | } | |
1624 | ||
1625 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { | |
1626 | ret = EPERM; | |
1627 | goto exit; | |
1628 | } | |
1629 | ||
1630 | if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) { | |
1631 | /* Dirty tracking not enabled */ | |
1632 | ret = EINVAL; | |
1633 | goto exit; | |
1634 | } | |
1635 | ||
1636 | if (!pcontrol || (pcontrol & (PROC_DIRTY_LAUNCH_IN_PROGRESS | PROC_DIRTY_DEFER)) == 0) { | |
1637 | ret = EINVAL; | |
1638 | goto exit; | |
1639 | } | |
1640 | ||
1641 | if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) { | |
1642 | p->p_memstat_dirty &= ~P_DIRTY_LAUNCH_IN_PROGRESS; | |
1643 | } | |
1644 | ||
1645 | /* This can be set and cleared exactly once. */ | |
1646 | if (pcontrol & PROC_DIRTY_DEFER) { | |
1647 | ||
1648 | if (p->p_memstat_dirty & P_DIRTY_DEFER) { | |
1649 | ||
1650 | p->p_memstat_dirty &= ~P_DIRTY_DEFER; | |
1651 | ||
1652 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1653 | memorystatus_update_idle_priority_locked(p); | |
1654 | memorystatus_reschedule_idle_demotion_locked(); | |
1655 | } | |
1656 | } | |
1657 | ||
1658 | ret = 0; | |
1659 | exit: | |
1660 | proc_list_unlock(); | |
1661 | ||
1662 | return ret; | |
1663 | } | |
1664 | ||
1665 | int | |
1666 | memorystatus_dirty_get(proc_t p) { | |
1667 | int ret = 0; | |
1668 | ||
1669 | proc_list_lock(); | |
1670 | ||
1671 | if (p->p_memstat_dirty & P_DIRTY_TRACK) { | |
39236c6e A |
1672 | ret |= PROC_DIRTY_TRACKED; |
1673 | if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) { | |
1674 | ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT; | |
1675 | } | |
1676 | if (p->p_memstat_dirty & P_DIRTY) { | |
1677 | ret |= PROC_DIRTY_IS_DIRTY; | |
1678 | } | |
fe8ab488 A |
1679 | if (p->p_memstat_dirty & P_DIRTY_LAUNCH_IN_PROGRESS) { |
1680 | ret |= PROC_DIRTY_LAUNCH_IS_IN_PROGRESS; | |
1681 | } | |
39236c6e A |
1682 | } |
1683 | ||
1684 | proc_list_unlock(); | |
1685 | ||
1686 | return ret; | |
1687 | } | |
b0d623f7 | 1688 | |
39236c6e A |
1689 | int |
1690 | memorystatus_on_terminate(proc_t p) { | |
1691 | int sig; | |
1692 | ||
1693 | proc_list_lock(); | |
1694 | ||
1695 | p->p_memstat_dirty |= P_DIRTY_TERMINATED; | |
1696 | ||
1697 | if ((p->p_memstat_dirty & (P_DIRTY_TRACK|P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) { | |
1698 | /* Clean; mark as terminated and issue SIGKILL */ | |
1699 | sig = SIGKILL; | |
1700 | } else { | |
1701 | /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */ | |
1702 | sig = SIGTERM; | |
316670eb | 1703 | } |
39236c6e A |
1704 | |
1705 | proc_list_unlock(); | |
1706 | ||
1707 | return sig; | |
316670eb | 1708 | } |
b0d623f7 | 1709 | |
316670eb | 1710 | void |
39236c6e A |
1711 | memorystatus_on_suspend(proc_t p) |
1712 | { | |
316670eb | 1713 | #if CONFIG_FREEZE |
39236c6e | 1714 | uint32_t pages; |
fe8ab488 | 1715 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); |
39236c6e A |
1716 | #endif |
1717 | proc_list_lock(); | |
1718 | #if CONFIG_FREEZE | |
1719 | p->p_memstat_suspendedfootprint = pages; | |
1720 | memorystatus_suspended_footprint_total += pages; | |
1721 | memorystatus_suspended_count++; | |
316670eb | 1722 | #endif |
39236c6e A |
1723 | p->p_memstat_state |= P_MEMSTAT_SUSPENDED; |
1724 | proc_list_unlock(); | |
1725 | } | |
6d2010ae | 1726 | |
39236c6e A |
1727 | void |
1728 | memorystatus_on_resume(proc_t p) | |
1729 | { | |
1730 | #if CONFIG_FREEZE | |
1731 | boolean_t frozen; | |
1732 | pid_t pid; | |
1733 | #endif | |
6d2010ae | 1734 | |
39236c6e | 1735 | proc_list_lock(); |
6d2010ae | 1736 | |
316670eb | 1737 | #if CONFIG_FREEZE |
39236c6e A |
1738 | frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN); |
1739 | if (frozen) { | |
1740 | memorystatus_frozen_count--; | |
1741 | p->p_memstat_state |= P_MEMSTAT_PRIOR_THAW; | |
1742 | } | |
1743 | ||
1744 | memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint; | |
1745 | memorystatus_suspended_count--; | |
1746 | ||
1747 | pid = p->p_pid; | |
316670eb | 1748 | #endif |
39236c6e A |
1749 | |
1750 | p->p_memstat_state &= ~(P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN); | |
1751 | ||
1752 | proc_list_unlock(); | |
1753 | ||
1754 | #if CONFIG_FREEZE | |
1755 | if (frozen) { | |
1756 | memorystatus_freeze_entry_t data = { pid, FALSE, 0 }; | |
1757 | memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); | |
316670eb | 1758 | } |
39236c6e | 1759 | #endif |
316670eb | 1760 | } |
6d2010ae | 1761 | |
316670eb | 1762 | void |
39236c6e | 1763 | memorystatus_on_inactivity(proc_t p) |
6d2010ae | 1764 | { |
39236c6e | 1765 | #pragma unused(p) |
316670eb A |
1766 | #if CONFIG_FREEZE |
1767 | /* Wake the freeze thread */ | |
1768 | thread_wakeup((event_t)&memorystatus_freeze_wakeup); | |
1769 | #endif | |
1770 | } | |
6d2010ae | 1771 | |
39236c6e A |
1772 | static uint32_t |
1773 | memorystatus_build_state(proc_t p) { | |
1774 | uint32_t snapshot_state = 0; | |
1775 | ||
1776 | /* General */ | |
1777 | if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) { | |
1778 | snapshot_state |= kMemorystatusSuspended; | |
1779 | } | |
1780 | if (p->p_memstat_state & P_MEMSTAT_FROZEN) { | |
1781 | snapshot_state |= kMemorystatusFrozen; | |
1782 | } | |
1783 | if (p->p_memstat_state & P_MEMSTAT_PRIOR_THAW) { | |
1784 | snapshot_state |= kMemorystatusWasThawed; | |
1785 | } | |
1786 | ||
1787 | /* Tracking */ | |
1788 | if (p->p_memstat_dirty & P_DIRTY_TRACK) { | |
1789 | snapshot_state |= kMemorystatusTracked; | |
1790 | } | |
1791 | if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) { | |
1792 | snapshot_state |= kMemorystatusSupportsIdleExit; | |
1793 | } | |
1794 | if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) { | |
1795 | snapshot_state |= kMemorystatusDirty; | |
1796 | } | |
1797 | ||
1798 | return snapshot_state; | |
1799 | } | |
1800 | ||
1801 | #if !CONFIG_JETSAM | |
1802 | ||
1803 | static boolean_t | |
1804 | kill_idle_exit_proc(void) | |
316670eb | 1805 | { |
39236c6e | 1806 | proc_t p, victim_p = PROC_NULL; |
316670eb | 1807 | uint64_t current_time; |
39236c6e A |
1808 | boolean_t killed = FALSE; |
1809 | unsigned int i = 0; | |
316670eb | 1810 | |
39236c6e | 1811 | /* Pick next idle exit victim. */ |
316670eb | 1812 | current_time = mach_absolute_time(); |
6d2010ae | 1813 | |
39236c6e | 1814 | proc_list_lock(); |
6d2010ae | 1815 | |
39236c6e A |
1816 | p = memorystatus_get_first_proc_locked(&i, FALSE); |
1817 | while (p) { | |
1818 | /* No need to look beyond the idle band */ | |
1819 | if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) { | |
1820 | break; | |
1821 | } | |
1822 | ||
1823 | if ((p->p_memstat_dirty & (P_DIRTY_ALLOW_IDLE_EXIT|P_DIRTY_IS_DIRTY|P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) { | |
1824 | if (current_time >= p->p_memstat_idledeadline) { | |
1825 | p->p_memstat_dirty |= P_DIRTY_TERMINATED; | |
1826 | victim_p = proc_ref_locked(p); | |
1827 | break; | |
316670eb A |
1828 | } |
1829 | } | |
39236c6e A |
1830 | |
1831 | p = memorystatus_get_next_proc_locked(&i, p, FALSE); | |
6d2010ae | 1832 | } |
316670eb | 1833 | |
39236c6e A |
1834 | proc_list_unlock(); |
1835 | ||
1836 | if (victim_p) { | |
1837 | printf("memorystatus_thread: idle exiting pid %d [%s]\n", victim_p->p_pid, (victim_p->p_comm ? victim_p->p_comm : "(unknown)")); | |
1838 | killed = memorystatus_do_kill(victim_p, kMemorystatusKilledIdleExit); | |
1839 | proc_rele(victim_p); | |
316670eb | 1840 | } |
b0d623f7 | 1841 | |
39236c6e | 1842 | return killed; |
2d21ac55 | 1843 | } |
39236c6e | 1844 | #endif |
2d21ac55 | 1845 | |
fe8ab488 | 1846 | #if CONFIG_JETSAM |
39236c6e A |
1847 | static void |
1848 | memorystatus_thread_wake(void) { | |
1849 | thread_wakeup((event_t)&memorystatus_wakeup); | |
b0d623f7 | 1850 | } |
fe8ab488 A |
1851 | #endif /* CONFIG_JETSAM */ |
1852 | ||
1853 | extern void vm_pressure_response(void); | |
b0d623f7 | 1854 | |
316670eb | 1855 | static int |
39236c6e A |
1856 | memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation) |
1857 | { | |
1858 | if (interval_ms) { | |
1859 | assert_wait_timeout(&memorystatus_wakeup, THREAD_UNINT, interval_ms, 1000 * NSEC_PER_USEC); | |
1860 | } else { | |
1861 | assert_wait(&memorystatus_wakeup, THREAD_UNINT); | |
1862 | } | |
316670eb | 1863 | |
39236c6e A |
1864 | return thread_block(continuation); |
1865 | } | |
316670eb | 1866 | |
39236c6e A |
1867 | static void |
1868 | memorystatus_thread(void *param __unused, wait_result_t wr __unused) | |
1869 | { | |
1870 | static boolean_t is_vm_privileged = FALSE; | |
1871 | #if CONFIG_JETSAM | |
1872 | boolean_t post_snapshot = FALSE; | |
1873 | uint32_t errors = 0; | |
fe8ab488 | 1874 | uint32_t hwm_kill = 0; |
39236c6e | 1875 | #endif |
316670eb | 1876 | |
39236c6e A |
1877 | if (is_vm_privileged == FALSE) { |
1878 | /* | |
1879 | * It's the first time the thread has run, so just mark the thread as privileged and block. | |
1880 | * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>. | |
1881 | */ | |
1882 | thread_wire(host_priv_self(), current_thread(), TRUE); | |
1883 | is_vm_privileged = TRUE; | |
1884 | ||
1885 | memorystatus_thread_block(0, memorystatus_thread); | |
316670eb A |
1886 | } |
1887 | ||
39236c6e A |
1888 | #if CONFIG_JETSAM |
1889 | ||
1890 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START, | |
1891 | memorystatus_available_pages, 0, 0, 0, 0); | |
316670eb | 1892 | |
fe8ab488 A |
1893 | /* |
1894 | * Jetsam aware version. | |
1895 | * | |
1896 | * The VM pressure notification thread is working it's way through clients in parallel. | |
39236c6e | 1897 | * |
fe8ab488 A |
1898 | * So, while the pressure notification thread is targeting processes in order of |
1899 | * increasing jetsam priority, we can hopefully reduce / stop it's work by killing | |
1900 | * any processes that have exceeded their highwater mark. | |
39236c6e | 1901 | * |
fe8ab488 A |
1902 | * If we run out of HWM processes and our available pages drops below the critical threshold, then, |
1903 | * we target the least recently used process in order of increasing jetsam priority (exception: the FG band). | |
39236c6e | 1904 | */ |
fe8ab488 A |
1905 | while (is_thrashing(kill_under_pressure_cause) || |
1906 | memorystatus_available_pages <= memorystatus_available_pages_pressure) { | |
39236c6e A |
1907 | boolean_t killed; |
1908 | int32_t priority; | |
fe8ab488 A |
1909 | uint32_t cause; |
1910 | ||
1911 | if (kill_under_pressure_cause) { | |
1912 | cause = kill_under_pressure_cause; | |
1913 | } else { | |
1914 | cause = kMemorystatusKilledVMPageShortage; | |
1915 | } | |
39236c6e A |
1916 | |
1917 | #if LEGACY_HIWATER | |
1918 | /* Highwater */ | |
1919 | killed = memorystatus_kill_hiwat_proc(&errors); | |
1920 | if (killed) { | |
fe8ab488 | 1921 | hwm_kill++; |
39236c6e A |
1922 | post_snapshot = TRUE; |
1923 | goto done; | |
fe8ab488 A |
1924 | } else { |
1925 | memorystatus_hwm_candidates = FALSE; | |
1926 | } | |
1927 | ||
1928 | /* No highwater processes to kill. Continue or stop for now? */ | |
1929 | if (!is_thrashing(kill_under_pressure_cause) && | |
1930 | (memorystatus_available_pages > memorystatus_available_pages_critical)) { | |
1931 | /* | |
1932 | * We are _not_ out of pressure but we are above the critical threshold and there's: | |
1933 | * - no compressor thrashing | |
1934 | * - no more HWM processes left. | |
1935 | * For now, don't kill any other processes. | |
1936 | */ | |
1937 | ||
1938 | if (hwm_kill == 0) { | |
1939 | memorystatus_thread_wasted_wakeup++; | |
1940 | } | |
1941 | ||
1942 | break; | |
39236c6e A |
1943 | } |
1944 | #endif | |
1945 | ||
1946 | /* LRU */ | |
1947 | killed = memorystatus_kill_top_process(TRUE, cause, &priority, &errors); | |
1948 | if (killed) { | |
fe8ab488 A |
1949 | /* Don't generate logs for steady-state idle-exit kills (unless overridden for debug) */ |
1950 | if ((priority != JETSAM_PRIORITY_IDLE) || memorystatus_idle_snapshot) { | |
39236c6e A |
1951 | post_snapshot = TRUE; |
1952 | } | |
1953 | goto done; | |
1954 | } | |
fe8ab488 A |
1955 | |
1956 | if (memorystatus_available_pages <= memorystatus_available_pages_critical) { | |
1957 | /* Under pressure and unable to kill a process - panic */ | |
1958 | panic("memorystatus_jetsam_thread: no victim! available pages:%d\n", memorystatus_available_pages); | |
1959 | } | |
39236c6e A |
1960 | |
1961 | done: | |
fe8ab488 A |
1962 | |
1963 | /* | |
1964 | * We do not want to over-kill when thrashing has been detected. | |
1965 | * To avoid that, we reset the flag here and notify the | |
1966 | * compressor. | |
39236c6e | 1967 | */ |
fe8ab488 A |
1968 | if (is_thrashing(kill_under_pressure_cause)) { |
1969 | kill_under_pressure_cause = 0; | |
1970 | vm_thrashing_jetsam_done(); | |
39236c6e | 1971 | } |
39236c6e | 1972 | } |
fe8ab488 A |
1973 | |
1974 | kill_under_pressure_cause = 0; | |
1975 | ||
39236c6e A |
1976 | if (errors) { |
1977 | memorystatus_clear_errors(); | |
1978 | } | |
1979 | ||
1980 | #if VM_PRESSURE_EVENTS | |
fe8ab488 A |
1981 | /* |
1982 | * LD: We used to target the foreground process first and foremost here. | |
1983 | * Now, we target all processes, starting from the non-suspended, background | |
1984 | * processes first. We will target foreground too. | |
1985 | * | |
1986 | * memorystatus_update_vm_pressure(TRUE); | |
1987 | */ | |
1988 | //vm_pressure_response(); | |
39236c6e A |
1989 | #endif |
1990 | ||
1991 | if (post_snapshot) { | |
1992 | size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + | |
1993 | sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count); | |
1994 | memorystatus_jetsam_snapshot->notification_time = mach_absolute_time(); | |
1995 | memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); | |
1996 | } | |
1997 | ||
1998 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END, | |
1999 | memorystatus_available_pages, 0, 0, 0, 0); | |
2000 | ||
2001 | #else /* CONFIG_JETSAM */ | |
2002 | ||
fe8ab488 A |
2003 | /* |
2004 | * Jetsam not enabled | |
39236c6e A |
2005 | */ |
2006 | ||
39236c6e A |
2007 | #endif /* CONFIG_JETSAM */ |
2008 | ||
2009 | memorystatus_thread_block(0, memorystatus_thread); | |
2010 | } | |
2011 | ||
2012 | #if !CONFIG_JETSAM | |
fe8ab488 A |
2013 | /* |
2014 | * Returns TRUE: | |
2015 | * when an idle-exitable proc was killed | |
2016 | * Returns FALSE: | |
2017 | * when there are no more idle-exitable procs found | |
2018 | * when the attempt to kill an idle-exitable proc failed | |
2019 | */ | |
39236c6e | 2020 | boolean_t memorystatus_idle_exit_from_VM(void) { |
fe8ab488 | 2021 | return(kill_idle_exit_proc()); |
39236c6e | 2022 | } |
fe8ab488 | 2023 | #endif /* !CONFIG_JETSAM */ |
39236c6e A |
2024 | |
2025 | #if CONFIG_JETSAM | |
2026 | ||
2027 | /* | |
2028 | * Callback invoked when allowable physical memory footprint exceeded | |
2029 | * (dirty pages + IOKit mappings) | |
2030 | * | |
2031 | * This is invoked for both advisory, non-fatal per-task high watermarks, | |
fe8ab488 | 2032 | * as well as the fatal task memory limits. |
39236c6e A |
2033 | */ |
2034 | void | |
2035 | memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footprint_mb) | |
2036 | { | |
2037 | proc_t p = current_proc(); | |
fe8ab488 A |
2038 | |
2039 | if (warning == FALSE) { | |
2040 | printf("process %d (%s) exceeded physical memory footprint limit of %d MB\n", | |
2041 | p->p_pid, p->p_comm, max_footprint_mb); | |
2042 | } | |
39236c6e A |
2043 | |
2044 | #if VM_PRESSURE_EVENTS | |
2045 | if (warning == TRUE) { | |
fe8ab488 | 2046 | if (memorystatus_warn_process(p->p_pid, TRUE /* critical? */) != TRUE) { |
39236c6e | 2047 | /* Print warning, since it's possible that task has not registered for pressure notifications */ |
fe8ab488 | 2048 | printf("task_exceeded_footprint: failed to warn the current task (exiting, or no handler registered?).\n"); |
39236c6e A |
2049 | } |
2050 | return; | |
2051 | } | |
2052 | #endif /* VM_PRESSURE_EVENTS */ | |
2053 | ||
fe8ab488 | 2054 | if ((p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT) == P_MEMSTAT_FATAL_MEMLIMIT) { |
39236c6e | 2055 | /* |
fe8ab488 A |
2056 | * If this process has no high watermark or has a fatal task limit, then we have been invoked because the task |
2057 | * has violated either the system-wide per-task memory limit OR its own task limit. | |
39236c6e A |
2058 | */ |
2059 | if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit) != TRUE) { | |
2060 | printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n"); | |
2061 | } | |
fe8ab488 A |
2062 | } else { |
2063 | /* | |
2064 | * HWM offender exists. Done without locks or synchronization. | |
2065 | * See comment near its declaration for more details. | |
2066 | */ | |
2067 | memorystatus_hwm_candidates = TRUE; | |
2068 | } | |
2069 | } | |
2070 | ||
2071 | /* | |
2072 | * This is invoked when cpulimits have been exceeded while in fatal mode. | |
2073 | * The jetsam_flags do not apply as those are for memory related kills. | |
2074 | * We call this routine so that the offending process is killed with | |
2075 | * a non-zero exit status. | |
2076 | */ | |
2077 | void | |
2078 | jetsam_on_ledger_cpulimit_exceeded(void) | |
2079 | { | |
2080 | int retval = 0; | |
2081 | int jetsam_flags = 0; /* make it obvious */ | |
2082 | proc_t p = current_proc(); | |
2083 | ||
2084 | printf("task_exceeded_cpulimit: killing pid %d [%s]\n", | |
2085 | p->p_pid, (p->p_comm ? p->p_comm : "(unknown)")); | |
2086 | ||
2087 | retval = jetsam_do_kill(p, jetsam_flags); | |
2088 | ||
2089 | if (retval) { | |
2090 | printf("task_exceeded_cpulimit: failed to kill current task (exiting?).\n"); | |
39236c6e A |
2091 | } |
2092 | } | |
2093 | ||
2094 | static void | |
fe8ab488 | 2095 | memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages) |
39236c6e A |
2096 | { |
2097 | assert(task); | |
2098 | assert(footprint); | |
2099 | ||
2100 | *footprint = (uint32_t)(get_task_phys_footprint(task) / PAGE_SIZE_64); | |
2101 | if (max_footprint) { | |
2102 | *max_footprint = (uint32_t)(get_task_phys_footprint_max(task) / PAGE_SIZE_64); | |
2103 | } | |
fe8ab488 A |
2104 | if (max_footprint_lifetime) { |
2105 | *max_footprint_lifetime = (uint32_t)(get_task_resident_max(task) / PAGE_SIZE_64); | |
2106 | } | |
2107 | if (purgeable_pages) { | |
2108 | *purgeable_pages = (uint32_t)(get_task_purgeable_size(task) / PAGE_SIZE_64); | |
39236c6e | 2109 | } |
39236c6e A |
2110 | } |
2111 | ||
fe8ab488 | 2112 | |
39236c6e A |
2113 | static void |
2114 | memorystatus_update_snapshot_locked(proc_t p, uint32_t kill_cause) | |
2115 | { | |
2116 | unsigned int i; | |
2117 | ||
2118 | for (i = 0; i < memorystatus_jetsam_snapshot_count; i++) { | |
2119 | if (memorystatus_jetsam_snapshot_list[i].pid == p->p_pid) { | |
2120 | /* Update if the priority has changed since the snapshot was taken */ | |
2121 | if (memorystatus_jetsam_snapshot_list[i].priority != p->p_memstat_effectivepriority) { | |
2122 | memorystatus_jetsam_snapshot_list[i].priority = p->p_memstat_effectivepriority; | |
2123 | strlcpy(memorystatus_jetsam_snapshot_list[i].name, p->p_comm, MAXCOMLEN+1); | |
2124 | memorystatus_jetsam_snapshot_list[i].state = memorystatus_build_state(p); | |
2125 | memorystatus_jetsam_snapshot_list[i].user_data = p->p_memstat_userdata; | |
2126 | memorystatus_jetsam_snapshot_list[i].fds = p->p_fd->fd_nfiles; | |
2127 | } | |
2128 | memorystatus_jetsam_snapshot_list[i].killed = kill_cause; | |
2129 | return; | |
2130 | } | |
2131 | } | |
316670eb | 2132 | } |
b0d623f7 | 2133 | |
39236c6e A |
2134 | void memorystatus_pages_update(unsigned int pages_avail) |
2135 | { | |
fe8ab488 A |
2136 | memorystatus_available_pages = pages_avail; |
2137 | ||
2138 | #if VM_PRESSURE_EVENTS | |
2139 | /* | |
2140 | * Since memorystatus_available_pages changes, we should | |
2141 | * re-evaluate the pressure levels on the system and | |
2142 | * check if we need to wake the pressure thread. | |
2143 | * We also update memorystatus_level in that routine. | |
2144 | */ | |
2145 | vm_pressure_response(); | |
2146 | ||
2147 | if (memorystatus_available_pages <= memorystatus_available_pages_pressure) { | |
2148 | ||
2149 | if (memorystatus_hwm_candidates || (memorystatus_available_pages <= memorystatus_available_pages_critical)) { | |
2150 | memorystatus_thread_wake(); | |
2151 | } | |
2152 | } | |
2153 | #else /* VM_PRESSURE_EVENTS */ | |
2154 | ||
39236c6e A |
2155 | boolean_t critical, delta; |
2156 | ||
316670eb A |
2157 | if (!memorystatus_delta) { |
2158 | return; | |
2159 | } | |
39236c6e A |
2160 | |
2161 | critical = (pages_avail < memorystatus_available_pages_critical) ? TRUE : FALSE; | |
2162 | delta = ((pages_avail >= (memorystatus_available_pages + memorystatus_delta)) | |
2163 | || (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) ? TRUE : FALSE; | |
2164 | ||
2165 | if (critical || delta) { | |
39236c6e | 2166 | memorystatus_level = memorystatus_available_pages * 100 / atop_64(max_mem); |
39236c6e | 2167 | memorystatus_thread_wake(); |
b0d623f7 | 2168 | } |
fe8ab488 | 2169 | #endif /* VM_PRESSURE_EVENTS */ |
316670eb A |
2170 | } |
2171 | ||
2172 | static boolean_t | |
2173 | memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry) | |
2174 | { | |
fe8ab488 A |
2175 | clock_sec_t tv_sec; |
2176 | clock_usec_t tv_usec; | |
2177 | ||
39236c6e | 2178 | memset(entry, 0, sizeof(memorystatus_jetsam_snapshot_entry_t)); |
316670eb A |
2179 | |
2180 | entry->pid = p->p_pid; | |
2181 | strlcpy(&entry->name[0], p->p_comm, MAXCOMLEN+1); | |
39236c6e | 2182 | entry->priority = p->p_memstat_effectivepriority; |
fe8ab488 | 2183 | memorystatus_get_task_page_counts(p->task, &entry->pages, &entry->max_pages, &entry->max_pages_lifetime, &entry->purgeable_pages); |
39236c6e A |
2184 | entry->state = memorystatus_build_state(p); |
2185 | entry->user_data = p->p_memstat_userdata; | |
316670eb | 2186 | memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid)); |
fe8ab488 A |
2187 | entry->fds = p->p_fd->fd_nfiles; |
2188 | ||
2189 | absolutetime_to_microtime(get_task_cpu_time(p->task), &tv_sec, &tv_usec); | |
2190 | entry->cpu_time.tv_sec = tv_sec; | |
2191 | entry->cpu_time.tv_usec = tv_usec; | |
316670eb A |
2192 | |
2193 | return TRUE; | |
b0d623f7 A |
2194 | } |
2195 | ||
2196 | static void | |
316670eb | 2197 | memorystatus_jetsam_snapshot_procs_locked(void) |
b0d623f7 | 2198 | { |
39236c6e A |
2199 | proc_t p, next_p; |
2200 | unsigned int b = 0, i = 0; | |
2201 | kern_return_t kr = KERN_SUCCESS; | |
2202 | ||
2203 | mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; | |
2204 | vm_statistics64_data_t vm_stat; | |
2205 | ||
2206 | if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count) != KERN_SUCCESS)) { | |
2207 | printf("memorystatus_jetsam_snapshot_procs_locked: host_statistics64 failed with %d\n", kr); | |
2208 | memset(&memorystatus_jetsam_snapshot->stats, 0, sizeof(memorystatus_jetsam_snapshot->stats)); | |
2209 | } else { | |
2210 | memorystatus_jetsam_snapshot->stats.free_pages = vm_stat.free_count; | |
2211 | memorystatus_jetsam_snapshot->stats.active_pages = vm_stat.active_count; | |
2212 | memorystatus_jetsam_snapshot->stats.inactive_pages = vm_stat.inactive_count; | |
2213 | memorystatus_jetsam_snapshot->stats.throttled_pages = vm_stat.throttled_count; | |
2214 | memorystatus_jetsam_snapshot->stats.purgeable_pages = vm_stat.purgeable_count; | |
2215 | memorystatus_jetsam_snapshot->stats.wired_pages = vm_stat.wire_count; | |
2216 | ||
2217 | memorystatus_jetsam_snapshot->stats.speculative_pages = vm_stat.speculative_count; | |
2218 | memorystatus_jetsam_snapshot->stats.filebacked_pages = vm_stat.external_page_count; | |
2219 | memorystatus_jetsam_snapshot->stats.anonymous_pages = vm_stat.internal_page_count; | |
2220 | memorystatus_jetsam_snapshot->stats.compressions = vm_stat.compressions; | |
2221 | memorystatus_jetsam_snapshot->stats.decompressions = vm_stat.decompressions; | |
2222 | memorystatus_jetsam_snapshot->stats.compressor_pages = vm_stat.compressor_page_count; | |
2223 | memorystatus_jetsam_snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor; | |
2224 | } | |
2225 | ||
2226 | next_p = memorystatus_get_first_proc_locked(&b, TRUE); | |
2227 | while (next_p) { | |
2228 | p = next_p; | |
2229 | next_p = memorystatus_get_next_proc_locked(&b, p, TRUE); | |
2230 | ||
316670eb A |
2231 | if (FALSE == memorystatus_get_snapshot_properties_for_proc_locked(p, &memorystatus_jetsam_snapshot_list[i])) { |
2232 | continue; | |
2233 | } | |
2234 | ||
2235 | MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n", | |
b0d623f7 A |
2236 | p->p_pid, |
2237 | p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7], | |
2238 | p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]); | |
316670eb | 2239 | |
39236c6e | 2240 | if (++i == memorystatus_jetsam_snapshot_max) { |
b0d623f7 A |
2241 | break; |
2242 | } | |
2243 | } | |
39236c6e A |
2244 | |
2245 | memorystatus_jetsam_snapshot->snapshot_time = mach_absolute_time(); | |
2246 | memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = i; | |
b0d623f7 A |
2247 | } |
2248 | ||
39236c6e | 2249 | #if DEVELOPMENT || DEBUG |
b0d623f7 | 2250 | |
39236c6e A |
2251 | static int |
2252 | memorystatus_cmd_set_panic_bits(user_addr_t buffer, uint32_t buffer_size) { | |
2253 | int ret; | |
2254 | memorystatus_jetsam_panic_options_t debug; | |
2255 | ||
2256 | if (buffer_size != sizeof(memorystatus_jetsam_panic_options_t)) { | |
2257 | return EINVAL; | |
b0d623f7 | 2258 | } |
39236c6e A |
2259 | |
2260 | ret = copyin(buffer, &debug, buffer_size); | |
2261 | if (ret) { | |
2262 | return ret; | |
2263 | } | |
2264 | ||
2265 | /* Panic bits match kMemorystatusKilled* enum */ | |
2266 | memorystatus_jetsam_panic_debug = (memorystatus_jetsam_panic_debug & ~debug.mask) | (debug.data & debug.mask); | |
2267 | ||
2268 | /* Copyout new value */ | |
2269 | debug.data = memorystatus_jetsam_panic_debug; | |
2270 | ret = copyout(&debug, buffer, sizeof(memorystatus_jetsam_panic_options_t)); | |
2271 | ||
2272 | return ret; | |
b0d623f7 A |
2273 | } |
2274 | ||
39236c6e A |
2275 | #endif |
2276 | ||
2277 | /* | |
2278 | * Jetsam a specific process. | |
2279 | */ | |
2280 | static boolean_t | |
2281 | memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) { | |
2282 | boolean_t killed; | |
b0d623f7 | 2283 | proc_t p; |
39236c6e A |
2284 | |
2285 | /* TODO - add a victim queue and push this into the main jetsam thread */ | |
2286 | ||
2287 | p = proc_find(victim_pid); | |
2288 | if (!p) { | |
2289 | return FALSE; | |
2290 | } | |
2291 | ||
fe8ab488 A |
2292 | printf("memorystatus: specifically killing pid %d [%s] (%s) - memorystatus_available_pages: %d\n", |
2293 | victim_pid, (p->p_comm ? p->p_comm : "(unknown)"), | |
2294 | jetsam_kill_cause_name[cause], memorystatus_available_pages); | |
39236c6e A |
2295 | |
2296 | proc_list_lock(); | |
2297 | ||
2298 | if (memorystatus_jetsam_snapshot_count == 0) { | |
2299 | memorystatus_jetsam_snapshot_procs_locked(); | |
2300 | } | |
2301 | ||
2302 | memorystatus_update_snapshot_locked(p, cause); | |
2303 | proc_list_unlock(); | |
2304 | ||
2305 | killed = memorystatus_do_kill(p, cause); | |
2306 | proc_rele(p); | |
2307 | ||
2308 | return killed; | |
2309 | } | |
2310 | ||
2311 | /* | |
2312 | * Jetsam the first process in the queue. | |
2313 | */ | |
2314 | static boolean_t | |
2315 | memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors) | |
2316 | { | |
2317 | pid_t aPid; | |
2318 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
2319 | boolean_t new_snapshot = FALSE, killed = FALSE; | |
2320 | unsigned int i = 0; | |
b0d623f7 | 2321 | |
6d2010ae A |
2322 | #ifndef CONFIG_FREEZE |
2323 | #pragma unused(any) | |
2324 | #endif | |
316670eb | 2325 | |
39236c6e A |
2326 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START, |
2327 | memorystatus_available_pages, 0, 0, 0, 0); | |
6d2010ae | 2328 | |
39236c6e | 2329 | proc_list_lock(); |
316670eb | 2330 | |
fe8ab488 A |
2331 | memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND); |
2332 | ||
39236c6e A |
2333 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
2334 | while (next_p) { | |
316670eb A |
2335 | #if DEVELOPMENT || DEBUG |
2336 | int activeProcess; | |
2337 | int procSuspendedForDiagnosis; | |
2338 | #endif /* DEVELOPMENT || DEBUG */ | |
39236c6e A |
2339 | |
2340 | p = next_p; | |
2341 | next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
2342 | ||
6d2010ae | 2343 | #if DEVELOPMENT || DEBUG |
39236c6e A |
2344 | activeProcess = p->p_memstat_state & P_MEMSTAT_FOREGROUND; |
2345 | procSuspendedForDiagnosis = p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED; | |
6d2010ae | 2346 | #endif /* DEVELOPMENT || DEBUG */ |
316670eb | 2347 | |
39236c6e | 2348 | aPid = p->p_pid; |
316670eb | 2349 | |
39236c6e A |
2350 | if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { |
2351 | continue; | |
b0d623f7 | 2352 | } |
39236c6e | 2353 | |
6d2010ae | 2354 | #if DEVELOPMENT || DEBUG |
39236c6e A |
2355 | if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) { |
2356 | printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid); | |
2357 | continue; | |
2358 | } | |
6d2010ae | 2359 | #endif /* DEVELOPMENT || DEBUG */ |
316670eb | 2360 | |
fe8ab488 A |
2361 | if (cause == kMemorystatusKilledVnodes) |
2362 | { | |
2363 | /* | |
2364 | * If the system runs out of vnodes, we systematically jetsam | |
2365 | * processes in hopes of stumbling onto a vnode gain that helps | |
2366 | * the system recover. The process that happens to trigger | |
2367 | * this path has no known relationship to the vnode consumption. | |
2368 | * We attempt to safeguard that process e.g: do not jetsam it. | |
2369 | */ | |
2370 | ||
2371 | if (p == current_proc()) { | |
2372 | /* do not jetsam the current process */ | |
2373 | continue; | |
2374 | } | |
2375 | } | |
2376 | ||
6d2010ae | 2377 | #if CONFIG_FREEZE |
39236c6e A |
2378 | boolean_t skip; |
2379 | boolean_t reclaim_proc = !(p->p_memstat_state & (P_MEMSTAT_LOCKED | P_MEMSTAT_NORECLAIM)); | |
2380 | if (any || reclaim_proc) { | |
2381 | skip = FALSE; | |
2382 | } else { | |
2383 | skip = TRUE; | |
2384 | } | |
316670eb | 2385 | |
39236c6e A |
2386 | if (skip) { |
2387 | continue; | |
2388 | } else | |
6d2010ae | 2389 | #endif |
39236c6e A |
2390 | { |
2391 | if (priority) { | |
2392 | *priority = p->p_memstat_effectivepriority; | |
2393 | } | |
2394 | ||
2395 | /* | |
2396 | * Capture a snapshot if none exists and: | |
2397 | * - priority was not requested (this is something other than an ambient kill) | |
2398 | * - the priority was requested *and* the targeted process is not at idle priority | |
2399 | */ | |
2400 | if ((memorystatus_jetsam_snapshot_count == 0) && | |
fe8ab488 | 2401 | (memorystatus_idle_snapshot || ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE))))) { |
39236c6e A |
2402 | memorystatus_jetsam_snapshot_procs_locked(); |
2403 | new_snapshot = TRUE; | |
2404 | } | |
2405 | ||
2406 | /* | |
2407 | * Mark as terminated so that if exit1() indicates success, but the process (for example) | |
2408 | * is blocked in task_exception_notify(), it'll be skipped if encountered again - see | |
2409 | * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the | |
2410 | * acquisition of the proc lock. | |
2411 | */ | |
2412 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; | |
2413 | ||
6d2010ae | 2414 | #if DEVELOPMENT || DEBUG |
39236c6e A |
2415 | if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) { |
2416 | MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n", | |
2417 | aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level); | |
2418 | memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic); | |
2419 | p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; | |
2420 | if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) { | |
2421 | jetsam_diagnostic_suspended_one_active_proc = 1; | |
2422 | printf("jetsam: returning after suspending first active proc - %d\n", aPid); | |
2423 | } | |
2424 | ||
2425 | p = proc_ref_locked(p); | |
2426 | proc_list_unlock(); | |
2427 | if (p) { | |
316670eb A |
2428 | task_suspend(p->task); |
2429 | proc_rele(p); | |
39236c6e A |
2430 | killed = TRUE; |
2431 | } | |
2432 | ||
2433 | goto exit; | |
2434 | } else | |
6d2010ae | 2435 | #endif /* DEVELOPMENT || DEBUG */ |
39236c6e A |
2436 | { |
2437 | /* Shift queue, update stats */ | |
2438 | memorystatus_update_snapshot_locked(p, cause); | |
2439 | ||
2440 | p = proc_ref_locked(p); | |
2441 | proc_list_unlock(); | |
2442 | if (p) { | |
fe8ab488 A |
2443 | printf("memorystatus: %s %d [%s] (%s) - memorystatus_available_pages: %d\n", |
2444 | ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) ? | |
2445 | "idle exiting pid" : "jetsam killing pid"), | |
2446 | aPid, (p->p_comm ? p->p_comm : "(unknown)"), | |
2447 | jetsam_kill_cause_name[cause], memorystatus_available_pages); | |
39236c6e A |
2448 | killed = memorystatus_do_kill(p, cause); |
2449 | } | |
2450 | ||
2451 | /* Success? */ | |
2452 | if (killed) { | |
6d2010ae | 2453 | proc_rele(p); |
39236c6e | 2454 | goto exit; |
6d2010ae | 2455 | } |
39236c6e A |
2456 | |
2457 | /* Failure - unwind and restart. */ | |
2458 | proc_list_lock(); | |
2459 | proc_rele_locked(p); | |
2460 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
2461 | p->p_memstat_state |= P_MEMSTAT_ERROR; | |
2462 | *errors += 1; | |
2463 | i = 0; | |
2464 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
6d2010ae | 2465 | } |
b0d623f7 | 2466 | } |
b0d623f7 | 2467 | } |
316670eb | 2468 | |
39236c6e | 2469 | proc_list_unlock(); |
316670eb | 2470 | |
39236c6e A |
2471 | exit: |
2472 | /* Clear snapshot if freshly captured and no target was found */ | |
2473 | if (new_snapshot && !killed) { | |
2474 | memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
316670eb A |
2475 | } |
2476 | ||
39236c6e A |
2477 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END, |
2478 | memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0); | |
b0d623f7 | 2479 | |
39236c6e | 2480 | return killed; |
316670eb A |
2481 | } |
2482 | ||
39236c6e A |
2483 | #if LEGACY_HIWATER |
2484 | ||
2485 | static boolean_t | |
2486 | memorystatus_kill_hiwat_proc(uint32_t *errors) | |
d1ecb069 | 2487 | { |
39236c6e A |
2488 | pid_t aPid = 0; |
2489 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
2490 | boolean_t new_snapshot = FALSE, killed = FALSE; | |
2491 | unsigned int i = 0; | |
316670eb | 2492 | |
39236c6e A |
2493 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START, |
2494 | memorystatus_available_pages, 0, 0, 0, 0); | |
316670eb | 2495 | |
39236c6e | 2496 | proc_list_lock(); |
fe8ab488 | 2497 | memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND); |
316670eb | 2498 | |
39236c6e A |
2499 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
2500 | while (next_p) { | |
2501 | uint32_t footprint; | |
2502 | boolean_t skip; | |
2503 | ||
2504 | p = next_p; | |
2505 | next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
316670eb | 2506 | |
39236c6e | 2507 | aPid = p->p_pid; |
316670eb | 2508 | |
39236c6e A |
2509 | if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) { |
2510 | continue; | |
2511 | } | |
2512 | ||
2513 | /* skip if no limit set */ | |
2514 | if (p->p_memstat_memlimit <= 0) { | |
2515 | continue; | |
d1ecb069 | 2516 | } |
316670eb | 2517 | |
39236c6e A |
2518 | /* skip if a currently inapplicable limit is encountered */ |
2519 | if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) { | |
2520 | continue; | |
2521 | } | |
2522 | ||
2523 | footprint = (uint32_t)(get_task_phys_footprint(p->task) / (1024 * 1024)); | |
2524 | skip = (((int32_t)footprint) <= p->p_memstat_memlimit); | |
6d2010ae | 2525 | #if DEVELOPMENT || DEBUG |
39236c6e A |
2526 | if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) { |
2527 | if (p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED) { | |
2528 | continue; | |
6d2010ae | 2529 | } |
39236c6e | 2530 | } |
6d2010ae | 2531 | #endif /* DEVELOPMENT || DEBUG */ |
316670eb | 2532 | |
6d2010ae | 2533 | #if CONFIG_FREEZE |
39236c6e A |
2534 | if (!skip) { |
2535 | if (p->p_memstat_state & P_MEMSTAT_LOCKED) { | |
2536 | skip = TRUE; | |
2537 | } else { | |
2538 | skip = FALSE; | |
2539 | } | |
2540 | } | |
6d2010ae | 2541 | #endif |
316670eb | 2542 | |
39236c6e A |
2543 | if (skip) { |
2544 | continue; | |
2545 | } else { | |
fe8ab488 A |
2546 | MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d Mb > 1 (%d Mb)\n", |
2547 | (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, footprint, p->p_memstat_memlimit); | |
39236c6e A |
2548 | |
2549 | if (memorystatus_jetsam_snapshot_count == 0) { | |
2550 | memorystatus_jetsam_snapshot_procs_locked(); | |
2551 | new_snapshot = TRUE; | |
2552 | } | |
2553 | ||
2554 | p->p_memstat_state |= P_MEMSTAT_TERMINATED; | |
2555 | ||
6d2010ae | 2556 | #if DEVELOPMENT || DEBUG |
39236c6e A |
2557 | if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { |
2558 | MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages); | |
2559 | memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic); | |
2560 | p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED; | |
2561 | ||
2562 | p = proc_ref_locked(p); | |
2563 | proc_list_unlock(); | |
2564 | if (p) { | |
6d2010ae A |
2565 | task_suspend(p->task); |
2566 | proc_rele(p); | |
39236c6e A |
2567 | killed = TRUE; |
2568 | } | |
2569 | ||
2570 | goto exit; | |
2571 | } else | |
6d2010ae | 2572 | #endif /* DEVELOPMENT || DEBUG */ |
39236c6e A |
2573 | { |
2574 | memorystatus_update_snapshot_locked(p, kMemorystatusKilledHiwat); | |
2575 | ||
2576 | p = proc_ref_locked(p); | |
2577 | proc_list_unlock(); | |
2578 | if (p) { | |
2579 | printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n", | |
2580 | aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages); | |
2581 | killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat); | |
2582 | } | |
2583 | ||
2584 | /* Success? */ | |
2585 | if (killed) { | |
6d2010ae | 2586 | proc_rele(p); |
39236c6e | 2587 | goto exit; |
6d2010ae | 2588 | } |
6d2010ae | 2589 | |
39236c6e A |
2590 | /* Failure - unwind and restart. */ |
2591 | proc_list_lock(); | |
2592 | proc_rele_locked(p); | |
2593 | p->p_memstat_state &= ~P_MEMSTAT_TERMINATED; | |
2594 | p->p_memstat_state |= P_MEMSTAT_ERROR; | |
2595 | *errors += 1; | |
2596 | i = 0; | |
2597 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); | |
2598 | } | |
6d2010ae A |
2599 | } |
2600 | } | |
316670eb | 2601 | |
39236c6e | 2602 | proc_list_unlock(); |
316670eb | 2603 | |
39236c6e A |
2604 | exit: |
2605 | /* Clear snapshot if freshly captured and no target was found */ | |
2606 | if (new_snapshot && !killed) { | |
2607 | memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
316670eb A |
2608 | } |
2609 | ||
39236c6e A |
2610 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END, |
2611 | memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0); | |
6d2010ae | 2612 | |
39236c6e | 2613 | return killed; |
316670eb | 2614 | } |
2d21ac55 | 2615 | |
39236c6e | 2616 | #endif /* LEGACY_HIWATER */ |
316670eb | 2617 | |
39236c6e A |
2618 | static boolean_t |
2619 | memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause) { | |
2620 | /* TODO: allow a general async path */ | |
fe8ab488 A |
2621 | if ((victim_pid != -1) || (cause != kMemorystatusKilledVMPageShortage && cause != kMemorystatusKilledVMThrashing && |
2622 | cause != kMemorystatusKilledFCThrashing)) { | |
39236c6e | 2623 | return FALSE; |
316670eb | 2624 | } |
39236c6e | 2625 | |
fe8ab488 | 2626 | kill_under_pressure_cause = cause; |
39236c6e A |
2627 | memorystatus_thread_wake(); |
2628 | return TRUE; | |
2629 | } | |
2d21ac55 | 2630 | |
39236c6e A |
2631 | static boolean_t |
2632 | memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) { | |
2633 | boolean_t res; | |
2634 | uint32_t errors = 0; | |
2635 | ||
2636 | if (victim_pid == -1) { | |
2637 | /* No pid, so kill first process */ | |
2638 | res = memorystatus_kill_top_process(TRUE, cause, NULL, &errors); | |
2639 | } else { | |
2640 | res = memorystatus_kill_specific_process(victim_pid, cause); | |
2641 | } | |
2642 | ||
2643 | if (errors) { | |
2644 | memorystatus_clear_errors(); | |
2645 | } | |
2646 | ||
2647 | if (res == TRUE) { | |
2648 | /* Fire off snapshot notification */ | |
2649 | size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + | |
2650 | sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count; | |
2651 | memorystatus_jetsam_snapshot->notification_time = mach_absolute_time(); | |
2652 | memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); | |
2653 | } | |
2654 | ||
2655 | return res; | |
2656 | } | |
b0d623f7 | 2657 | |
39236c6e A |
2658 | boolean_t |
2659 | memorystatus_kill_on_VM_page_shortage(boolean_t async) { | |
2660 | if (async) { | |
2661 | return memorystatus_kill_process_async(-1, kMemorystatusKilledVMPageShortage); | |
2662 | } else { | |
2663 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMPageShortage); | |
2664 | } | |
2665 | } | |
2d21ac55 | 2666 | |
39236c6e A |
2667 | boolean_t |
2668 | memorystatus_kill_on_VM_thrashing(boolean_t async) { | |
2669 | if (async) { | |
2670 | return memorystatus_kill_process_async(-1, kMemorystatusKilledVMThrashing); | |
2671 | } else { | |
2672 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMThrashing); | |
2d21ac55 A |
2673 | } |
2674 | } | |
b0d623f7 | 2675 | |
fe8ab488 A |
2676 | boolean_t |
2677 | memorystatus_kill_on_FC_thrashing(boolean_t async) { | |
2678 | if (async) { | |
2679 | return memorystatus_kill_process_async(-1, kMemorystatusKilledFCThrashing); | |
2680 | } else { | |
2681 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledFCThrashing); | |
2682 | } | |
2683 | } | |
2684 | ||
39236c6e A |
2685 | boolean_t |
2686 | memorystatus_kill_on_vnode_limit(void) { | |
2687 | return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes); | |
2688 | } | |
2689 | ||
316670eb A |
2690 | #endif /* CONFIG_JETSAM */ |
2691 | ||
6d2010ae A |
2692 | #if CONFIG_FREEZE |
2693 | ||
2694 | __private_extern__ void | |
316670eb | 2695 | memorystatus_freeze_init(void) |
6d2010ae | 2696 | { |
316670eb A |
2697 | kern_return_t result; |
2698 | thread_t thread; | |
39236c6e | 2699 | |
316670eb A |
2700 | result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread); |
2701 | if (result == KERN_SUCCESS) { | |
2702 | thread_deallocate(thread); | |
2703 | } else { | |
2704 | panic("Could not create memorystatus_freeze_thread"); | |
2705 | } | |
6d2010ae A |
2706 | } |
2707 | ||
316670eb | 2708 | static int |
39236c6e | 2709 | memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low) |
6d2010ae | 2710 | { |
39236c6e A |
2711 | pid_t aPid = 0; |
2712 | int ret = -1; | |
2713 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
2714 | unsigned int i = 0; | |
6d2010ae | 2715 | |
39236c6e A |
2716 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, |
2717 | memorystatus_available_pages, 0, 0, 0, 0); | |
2718 | ||
2719 | proc_list_lock(); | |
6d2010ae | 2720 | |
39236c6e A |
2721 | next_p = memorystatus_get_first_proc_locked(&i, TRUE); |
2722 | while (next_p) { | |
2723 | kern_return_t kr; | |
2724 | uint32_t purgeable, wired, clean, dirty; | |
2725 | boolean_t shared; | |
2726 | uint32_t pages; | |
2727 | uint32_t max_pages = 0; | |
316670eb A |
2728 | uint32_t state; |
2729 | ||
39236c6e A |
2730 | p = next_p; |
2731 | next_p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
6d2010ae | 2732 | |
39236c6e A |
2733 | aPid = p->p_pid; |
2734 | state = p->p_memstat_state; | |
6d2010ae | 2735 | |
316670eb | 2736 | /* Ensure the process is eligible for freezing */ |
39236c6e | 2737 | if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FROZEN)) || !(state & P_MEMSTAT_SUSPENDED)) { |
316670eb A |
2738 | continue; // with lock held |
2739 | } | |
316670eb | 2740 | |
39236c6e | 2741 | /* Only freeze processes meeting our minimum resident page criteria */ |
fe8ab488 | 2742 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL); |
39236c6e A |
2743 | if (pages < memorystatus_freeze_pages_min) { |
2744 | continue; // with lock held | |
2745 | } | |
6d2010ae | 2746 | |
fe8ab488 | 2747 | if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) { |
316670eb A |
2748 | /* Ensure there's enough free space to freeze this process. */ |
2749 | max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max); | |
2750 | if (max_pages < memorystatus_freeze_pages_min) { | |
2751 | *memorystatus_freeze_swap_low = TRUE; | |
39236c6e A |
2752 | proc_list_unlock(); |
2753 | goto exit; | |
316670eb | 2754 | } |
39236c6e A |
2755 | } else { |
2756 | max_pages = UINT32_MAX - 1; | |
2757 | } | |
2758 | ||
2759 | /* Mark as locked temporarily to avoid kill */ | |
2760 | p->p_memstat_state |= P_MEMSTAT_LOCKED; | |
2761 | ||
2762 | p = proc_ref_locked(p); | |
2763 | proc_list_unlock(); | |
2764 | if (!p) { | |
2765 | goto exit; | |
2766 | } | |
2767 | ||
2768 | kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE); | |
2769 | ||
2770 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - " | |
2771 | "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n", | |
2772 | (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"), | |
2773 | memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free()); | |
2774 | ||
2775 | proc_list_lock(); | |
2776 | p->p_memstat_state &= ~P_MEMSTAT_LOCKED; | |
2777 | ||
2778 | /* Success? */ | |
2779 | if (KERN_SUCCESS == kr) { | |
2780 | memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; | |
316670eb | 2781 | |
39236c6e | 2782 | memorystatus_frozen_count++; |
316670eb | 2783 | |
39236c6e A |
2784 | p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM)); |
2785 | ||
2786 | /* Update stats */ | |
2787 | for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { | |
2788 | throttle_intervals[i].pageouts += dirty; | |
2789 | } | |
2790 | ||
2791 | memorystatus_freeze_pageouts += dirty; | |
2792 | memorystatus_freeze_count++; | |
2793 | ||
2794 | proc_list_unlock(); | |
6d2010ae | 2795 | |
39236c6e | 2796 | memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); |
6d2010ae | 2797 | |
39236c6e A |
2798 | /* Return the number of reclaimed pages */ |
2799 | ret = dirty; | |
6d2010ae | 2800 | |
39236c6e A |
2801 | } else { |
2802 | proc_list_unlock(); | |
316670eb | 2803 | } |
39236c6e A |
2804 | |
2805 | proc_rele(p); | |
2806 | goto exit; | |
6d2010ae | 2807 | } |
316670eb | 2808 | |
39236c6e A |
2809 | proc_list_unlock(); |
2810 | ||
2811 | exit: | |
2812 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, | |
2813 | memorystatus_available_pages, aPid, 0, 0, 0); | |
316670eb | 2814 | |
39236c6e | 2815 | return ret; |
6d2010ae A |
2816 | } |
2817 | ||
316670eb A |
2818 | static inline boolean_t |
2819 | memorystatus_can_freeze_processes(void) | |
6d2010ae | 2820 | { |
316670eb | 2821 | boolean_t ret; |
6d2010ae | 2822 | |
39236c6e | 2823 | proc_list_lock(); |
316670eb A |
2824 | |
2825 | if (memorystatus_suspended_count) { | |
2826 | uint32_t average_resident_pages, estimated_processes; | |
2827 | ||
2828 | /* Estimate the number of suspended processes we can fit */ | |
39236c6e | 2829 | average_resident_pages = memorystatus_suspended_footprint_total / memorystatus_suspended_count; |
316670eb A |
2830 | estimated_processes = memorystatus_suspended_count + |
2831 | ((memorystatus_available_pages - memorystatus_available_pages_critical) / average_resident_pages); | |
2832 | ||
2833 | /* If it's predicted that no freeze will occur, lower the threshold temporarily */ | |
2834 | if (estimated_processes <= FREEZE_SUSPENDED_THRESHOLD_DEFAULT) { | |
2835 | memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_LOW; | |
6d2010ae | 2836 | } else { |
39236c6e | 2837 | memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; |
6d2010ae | 2838 | } |
6d2010ae | 2839 | |
316670eb A |
2840 | MEMORYSTATUS_DEBUG(1, "memorystatus_can_freeze_processes: %d suspended processes, %d average resident pages / process, %d suspended processes estimated\n", |
2841 | memorystatus_suspended_count, average_resident_pages, estimated_processes); | |
6d2010ae | 2842 | |
316670eb A |
2843 | if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) { |
2844 | ret = TRUE; | |
2845 | } else { | |
2846 | ret = FALSE; | |
6d2010ae | 2847 | } |
316670eb A |
2848 | } else { |
2849 | ret = FALSE; | |
6d2010ae | 2850 | } |
316670eb | 2851 | |
39236c6e | 2852 | proc_list_unlock(); |
6d2010ae | 2853 | |
316670eb | 2854 | return ret; |
6d2010ae A |
2855 | } |
2856 | ||
316670eb A |
2857 | static boolean_t |
2858 | memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low) | |
6d2010ae | 2859 | { |
316670eb A |
2860 | /* Only freeze if we're sufficiently low on memory; this holds off freeze right |
2861 | after boot, and is generally is a no-op once we've reached steady state. */ | |
2862 | if (memorystatus_available_pages > memorystatus_freeze_threshold) { | |
2863 | return FALSE; | |
2864 | } | |
2865 | ||
2866 | /* Check minimum suspended process threshold. */ | |
2867 | if (!memorystatus_can_freeze_processes()) { | |
2868 | return FALSE; | |
2869 | } | |
6d2010ae | 2870 | |
316670eb A |
2871 | /* Is swap running low? */ |
2872 | if (*memorystatus_freeze_swap_low) { | |
2873 | /* If there's been no movement in free swap pages since we last attempted freeze, return. */ | |
2874 | if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) { | |
2875 | return FALSE; | |
2876 | } | |
2877 | ||
2878 | /* Pages have been freed - we can retry. */ | |
2879 | *memorystatus_freeze_swap_low = FALSE; | |
6d2010ae A |
2880 | } |
2881 | ||
316670eb A |
2882 | /* OK */ |
2883 | return TRUE; | |
6d2010ae A |
2884 | } |
2885 | ||
2886 | static void | |
316670eb | 2887 | memorystatus_freeze_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval) |
6d2010ae A |
2888 | { |
2889 | if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) { | |
2890 | if (!interval->max_pageouts) { | |
316670eb | 2891 | interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * FREEZE_DAILY_PAGEOUTS_MAX) / (24 * 60))); |
6d2010ae | 2892 | } else { |
316670eb | 2893 | printf("memorystatus_freeze_update_throttle_interval: %d minute throttle timeout, resetting\n", interval->mins); |
6d2010ae A |
2894 | } |
2895 | interval->ts.tv_sec = interval->mins * 60; | |
2896 | interval->ts.tv_nsec = 0; | |
2897 | ADD_MACH_TIMESPEC(&interval->ts, ts); | |
316670eb | 2898 | /* Since we update the throttle stats pre-freeze, adjust for overshoot here */ |
6d2010ae A |
2899 | if (interval->pageouts > interval->max_pageouts) { |
2900 | interval->pageouts -= interval->max_pageouts; | |
2901 | } else { | |
2902 | interval->pageouts = 0; | |
2903 | } | |
2904 | interval->throttle = FALSE; | |
2905 | } else if (!interval->throttle && interval->pageouts >= interval->max_pageouts) { | |
316670eb | 2906 | printf("memorystatus_freeze_update_throttle_interval: %d minute pageout limit exceeded; enabling throttle\n", interval->mins); |
6d2010ae A |
2907 | interval->throttle = TRUE; |
2908 | } | |
316670eb A |
2909 | |
2910 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n", | |
6d2010ae A |
2911 | interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60, |
2912 | interval->throttle ? "on" : "off"); | |
6d2010ae A |
2913 | } |
2914 | ||
2915 | static boolean_t | |
316670eb | 2916 | memorystatus_freeze_update_throttle(void) |
6d2010ae A |
2917 | { |
2918 | clock_sec_t sec; | |
2919 | clock_nsec_t nsec; | |
2920 | mach_timespec_t ts; | |
2921 | uint32_t i; | |
2922 | boolean_t throttled = FALSE; | |
2923 | ||
2924 | #if DEVELOPMENT || DEBUG | |
316670eb | 2925 | if (!memorystatus_freeze_throttle_enabled) |
6d2010ae A |
2926 | return FALSE; |
2927 | #endif | |
2928 | ||
2929 | clock_get_system_nanotime(&sec, &nsec); | |
2930 | ts.tv_sec = sec; | |
2931 | ts.tv_nsec = nsec; | |
2932 | ||
316670eb | 2933 | /* Check freeze pageouts over multiple intervals and throttle if we've exceeded our budget. |
6d2010ae | 2934 | * |
316670eb | 2935 | * This ensures that periods of inactivity can't be used as 'credit' towards freeze if the device has |
6d2010ae A |
2936 | * remained dormant for a long period. We do, however, allow increased thresholds for shorter intervals in |
2937 | * order to allow for bursts of activity. | |
2938 | */ | |
2939 | for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { | |
316670eb | 2940 | memorystatus_freeze_update_throttle_interval(&ts, &throttle_intervals[i]); |
6d2010ae A |
2941 | if (throttle_intervals[i].throttle == TRUE) |
2942 | throttled = TRUE; | |
2943 | } | |
2944 | ||
2945 | return throttled; | |
2946 | } | |
2947 | ||
2948 | static void | |
316670eb | 2949 | memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) |
6d2010ae | 2950 | { |
316670eb A |
2951 | static boolean_t memorystatus_freeze_swap_low = FALSE; |
2952 | ||
2953 | if (memorystatus_freeze_enabled) { | |
2954 | if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { | |
fe8ab488 A |
2955 | /* Only freeze if we've not exceeded our pageout budgets or we're not backed by swap. */ |
2956 | if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS || | |
2957 | !memorystatus_freeze_update_throttle()) { | |
39236c6e | 2958 | memorystatus_freeze_top_process(&memorystatus_freeze_swap_low); |
316670eb A |
2959 | } else { |
2960 | printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n"); | |
2961 | memorystatus_freeze_throttle_count++; /* Throttled, update stats */ | |
2962 | } | |
2963 | } | |
2964 | } | |
6d2010ae | 2965 | |
316670eb A |
2966 | assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT); |
2967 | thread_block((thread_continue_t) memorystatus_freeze_thread); | |
2968 | } | |
2969 | ||
2970 | #endif /* CONFIG_FREEZE */ | |
6d2010ae | 2971 | |
fe8ab488 | 2972 | #if VM_PRESSURE_EVENTS |
6d2010ae | 2973 | |
fe8ab488 | 2974 | #if CONFIG_MEMORYSTATUS |
316670eb | 2975 | |
fe8ab488 A |
2976 | static int |
2977 | memorystatus_send_note(int event_code, void *data, size_t data_length) { | |
2978 | int ret; | |
2979 | struct kev_msg ev_msg; | |
316670eb | 2980 | |
fe8ab488 A |
2981 | ev_msg.vendor_code = KEV_VENDOR_APPLE; |
2982 | ev_msg.kev_class = KEV_SYSTEM_CLASS; | |
2983 | ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS; | |
2984 | ||
2985 | ev_msg.event_code = event_code; | |
2986 | ||
2987 | ev_msg.dv[0].data_length = data_length; | |
2988 | ev_msg.dv[0].data_ptr = data; | |
2989 | ev_msg.dv[1].data_length = 0; | |
2990 | ||
2991 | ret = kev_post_msg(&ev_msg); | |
2992 | if (ret) { | |
2993 | printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); | |
316670eb | 2994 | } |
39236c6e | 2995 | |
fe8ab488 | 2996 | return ret; |
316670eb A |
2997 | } |
2998 | ||
fe8ab488 A |
2999 | boolean_t |
3000 | memorystatus_warn_process(pid_t pid, boolean_t critical) { | |
316670eb | 3001 | |
fe8ab488 A |
3002 | boolean_t ret = FALSE; |
3003 | struct knote *kn = NULL; | |
316670eb | 3004 | |
fe8ab488 A |
3005 | /* |
3006 | * See comment in sysctl_memorystatus_vm_pressure_send. | |
3007 | */ | |
39236c6e | 3008 | |
fe8ab488 A |
3009 | memorystatus_klist_lock(); |
3010 | kn = vm_find_knote_from_pid(pid, &memorystatus_klist); | |
3011 | if (kn) { | |
3012 | /* | |
3013 | * By setting the "fflags" here, we are forcing | |
3014 | * a process to deal with the case where it's | |
3015 | * bumping up into its memory limits. If we don't | |
3016 | * do this here, we will end up depending on the | |
3017 | * system pressure snapshot evaluation in | |
3018 | * filt_memorystatus(). | |
3019 | */ | |
39236c6e | 3020 | |
fe8ab488 A |
3021 | if (critical) { |
3022 | kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; | |
39236c6e | 3023 | } else { |
fe8ab488 | 3024 | kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; |
39236c6e | 3025 | } |
fe8ab488 A |
3026 | KNOTE(&memorystatus_klist, kMemorystatusPressure); |
3027 | ret = TRUE; | |
3028 | } else { | |
3029 | if (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0) { | |
3030 | ret = TRUE; | |
6d2010ae A |
3031 | } |
3032 | } | |
fe8ab488 | 3033 | memorystatus_klist_unlock(); |
6d2010ae | 3034 | |
fe8ab488 | 3035 | return ret; |
316670eb A |
3036 | } |
3037 | ||
39236c6e | 3038 | int |
316670eb | 3039 | memorystatus_send_pressure_note(pid_t pid) { |
39236c6e A |
3040 | MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid); |
3041 | return memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid)); | |
6d2010ae A |
3042 | } |
3043 | ||
fe8ab488 A |
3044 | void |
3045 | memorystatus_send_low_swap_note(void) { | |
3046 | ||
3047 | struct knote *kn = NULL; | |
3048 | ||
3049 | memorystatus_klist_lock(); | |
3050 | SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { | |
3051 | if (is_knote_registered_modify_task_pressure_bits(kn, NOTE_MEMORYSTATUS_LOW_SWAP, NULL, 0, 0) == TRUE) { | |
3052 | KNOTE(&memorystatus_klist, kMemorystatusLowSwap); | |
3053 | } | |
3054 | } | |
3055 | memorystatus_klist_unlock(); | |
3056 | } | |
3057 | ||
39236c6e A |
3058 | boolean_t |
3059 | memorystatus_bg_pressure_eligible(proc_t p) { | |
3060 | boolean_t eligible = FALSE; | |
3061 | ||
3062 | proc_list_lock(); | |
3063 | ||
3064 | MEMORYSTATUS_DEBUG(1, "memorystatus_bg_pressure_eligible: pid %d, state 0x%x\n", p->p_pid, p->p_memstat_state); | |
3065 | ||
3066 | /* Foreground processes have already been dealt with at this point, so just test for eligibility */ | |
3067 | if (!(p->p_memstat_state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN))) { | |
3068 | eligible = TRUE; | |
3069 | } | |
3070 | ||
3071 | proc_list_unlock(); | |
3072 | ||
3073 | return eligible; | |
3074 | } | |
3075 | ||
3076 | boolean_t | |
3077 | memorystatus_is_foreground_locked(proc_t p) { | |
3078 | return ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND) || | |
3079 | (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND_SUPPORT)); | |
3080 | } | |
fe8ab488 | 3081 | #endif /* CONFIG_MEMORYSTATUS */ |
39236c6e A |
3082 | |
3083 | /* | |
3084 | * Trigger levels to test the mechanism. | |
3085 | * Can be used via a sysctl. | |
3086 | */ | |
3087 | #define TEST_LOW_MEMORY_TRIGGER_ONE 1 | |
3088 | #define TEST_LOW_MEMORY_TRIGGER_ALL 2 | |
3089 | #define TEST_PURGEABLE_TRIGGER_ONE 3 | |
3090 | #define TEST_PURGEABLE_TRIGGER_ALL 4 | |
3091 | #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE 5 | |
3092 | #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL 6 | |
3093 | ||
3094 | boolean_t memorystatus_manual_testing_on = FALSE; | |
3095 | vm_pressure_level_t memorystatus_manual_testing_level = kVMPressureNormal; | |
3096 | ||
3097 | extern struct knote * | |
fe8ab488 | 3098 | vm_pressure_select_optimal_candidate_to_notify(struct klist *, int, boolean_t); |
39236c6e A |
3099 | |
3100 | extern | |
fe8ab488 | 3101 | kern_return_t vm_pressure_notification_without_levels(boolean_t); |
39236c6e A |
3102 | |
3103 | extern void vm_pressure_klist_lock(void); | |
3104 | extern void vm_pressure_klist_unlock(void); | |
3105 | ||
3106 | extern void vm_reset_active_list(void); | |
3107 | ||
3108 | extern void delay(int); | |
3109 | ||
3110 | #define INTER_NOTIFICATION_DELAY (250000) /* .25 second */ | |
3111 | ||
3112 | void memorystatus_on_pageout_scan_end(void) { | |
3113 | /* No-op */ | |
3114 | } | |
3115 | ||
3116 | /* | |
3117 | * kn_max - knote | |
3118 | * | |
3119 | * knote_pressure_level - to check if the knote is registered for this notification level. | |
3120 | * | |
3121 | * task - task whose bits we'll be modifying | |
3122 | * | |
3123 | * pressure_level_to_clear - if the task has been notified of this past level, clear that notification bit so that if/when we revert to that level, the task will be notified again. | |
3124 | * | |
3125 | * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately. | |
3126 | * | |
3127 | */ | |
39236c6e A |
3128 | |
3129 | boolean_t | |
3130 | is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pressure_level, task_t task, vm_pressure_level_t pressure_level_to_clear, vm_pressure_level_t pressure_level_to_set) | |
3131 | { | |
3132 | if (kn_max->kn_sfflags & knote_pressure_level) { | |
3133 | ||
3134 | if (task_has_been_notified(task, pressure_level_to_clear) == TRUE) { | |
3135 | ||
3136 | task_clear_has_been_notified(task, pressure_level_to_clear); | |
3137 | } | |
3138 | ||
3139 | task_mark_has_been_notified(task, pressure_level_to_set); | |
3140 | return TRUE; | |
3141 | } | |
3142 | ||
3143 | return FALSE; | |
3144 | } | |
3145 | ||
fe8ab488 A |
3146 | extern kern_return_t vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process); |
3147 | ||
3148 | #define VM_PRESSURE_DECREASED_SMOOTHING_PERIOD 5000 /* milliseconds */ | |
39236c6e A |
3149 | |
3150 | kern_return_t | |
fe8ab488 | 3151 | memorystatus_update_vm_pressure(boolean_t target_foreground_process) |
39236c6e A |
3152 | { |
3153 | struct knote *kn_max = NULL; | |
3154 | pid_t target_pid = -1; | |
3155 | struct klist dispatch_klist = { NULL }; | |
3156 | proc_t target_proc = PROC_NULL; | |
39236c6e A |
3157 | struct task *task = NULL; |
3158 | boolean_t found_candidate = FALSE; | |
3159 | ||
fe8ab488 A |
3160 | static vm_pressure_level_t level_snapshot = kVMPressureNormal; |
3161 | static vm_pressure_level_t prev_level_snapshot = kVMPressureNormal; | |
3162 | boolean_t smoothing_window_started = FALSE; | |
3163 | struct timeval smoothing_window_start_tstamp = {0, 0}; | |
3164 | struct timeval curr_tstamp = {0, 0}; | |
3165 | int elapsed_msecs = 0; | |
3166 | ||
3167 | #if !CONFIG_JETSAM | |
3168 | #define MAX_IDLE_KILLS 100 /* limit the number of idle kills allowed */ | |
3169 | ||
3170 | int idle_kill_counter = 0; | |
3171 | ||
3172 | /* | |
3173 | * On desktop we take this opportunity to free up memory pressure | |
3174 | * by immediately killing idle exitable processes. We use a delay | |
3175 | * to avoid overkill. And we impose a max counter as a fail safe | |
3176 | * in case daemons re-launch too fast. | |
3177 | */ | |
3178 | while ((memorystatus_vm_pressure_level != kVMPressureNormal) && (idle_kill_counter < MAX_IDLE_KILLS)) { | |
3179 | if (memorystatus_idle_exit_from_VM() == FALSE) { | |
3180 | /* No idle exitable processes left to kill */ | |
3181 | break; | |
3182 | } | |
3183 | idle_kill_counter++; | |
3184 | delay(1000000); /* 1 second */ | |
3185 | } | |
3186 | #endif /* !CONFIG_JETSAM */ | |
3187 | ||
39236c6e A |
3188 | while (1) { |
3189 | ||
3190 | /* | |
3191 | * There is a race window here. But it's not clear | |
3192 | * how much we benefit from having extra synchronization. | |
3193 | */ | |
3194 | level_snapshot = memorystatus_vm_pressure_level; | |
3195 | ||
fe8ab488 A |
3196 | if (prev_level_snapshot > level_snapshot) { |
3197 | /* | |
3198 | * Pressure decreased? Let's take a little breather | |
3199 | * and see if this condition stays. | |
3200 | */ | |
3201 | if (smoothing_window_started == FALSE) { | |
3202 | ||
3203 | smoothing_window_started = TRUE; | |
3204 | microuptime(&smoothing_window_start_tstamp); | |
3205 | } | |
3206 | ||
3207 | microuptime(&curr_tstamp); | |
3208 | timevalsub(&curr_tstamp, &smoothing_window_start_tstamp); | |
3209 | elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; | |
3210 | ||
3211 | if (elapsed_msecs < VM_PRESSURE_DECREASED_SMOOTHING_PERIOD) { | |
3212 | ||
3213 | delay(INTER_NOTIFICATION_DELAY); | |
3214 | continue; | |
3215 | } | |
3216 | } | |
3217 | ||
3218 | prev_level_snapshot = level_snapshot; | |
3219 | smoothing_window_started = FALSE; | |
3220 | ||
39236c6e | 3221 | memorystatus_klist_lock(); |
fe8ab488 | 3222 | kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot, target_foreground_process); |
39236c6e A |
3223 | |
3224 | if (kn_max == NULL) { | |
3225 | memorystatus_klist_unlock(); | |
3226 | ||
3227 | /* | |
3228 | * No more level-based clients to notify. | |
3229 | * Try the non-level based notification clients. | |
3230 | * | |
3231 | * However, these non-level clients don't understand | |
3232 | * the "return-to-normal" notification. | |
3233 | * | |
3234 | * So don't consider them for those notifications. Just | |
3235 | * return instead. | |
3236 | * | |
3237 | */ | |
3238 | ||
3239 | if (level_snapshot != kVMPressureNormal) { | |
3240 | goto try_dispatch_vm_clients; | |
3241 | } else { | |
3242 | return KERN_FAILURE; | |
3243 | } | |
3244 | } | |
3245 | ||
3246 | target_proc = kn_max->kn_kq->kq_p; | |
3247 | ||
3248 | proc_list_lock(); | |
3249 | if (target_proc != proc_ref_locked(target_proc)) { | |
3250 | target_proc = PROC_NULL; | |
3251 | proc_list_unlock(); | |
3252 | memorystatus_klist_unlock(); | |
3253 | continue; | |
3254 | } | |
3255 | proc_list_unlock(); | |
3256 | memorystatus_klist_unlock(); | |
3257 | ||
3258 | target_pid = target_proc->p_pid; | |
3259 | ||
3260 | task = (struct task *)(target_proc->task); | |
3261 | ||
3262 | if (level_snapshot != kVMPressureNormal) { | |
3263 | ||
3264 | if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) { | |
3265 | ||
3266 | if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_WARN, task, kVMPressureCritical, kVMPressureWarning) == TRUE) { | |
3267 | found_candidate = TRUE; | |
3268 | } | |
3269 | } else { | |
3270 | if (level_snapshot == kVMPressureCritical) { | |
3271 | ||
3272 | if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_CRITICAL, task, kVMPressureWarning, kVMPressureCritical) == TRUE) { | |
3273 | found_candidate = TRUE; | |
3274 | } | |
3275 | } | |
3276 | } | |
3277 | } else { | |
3278 | if (kn_max->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
3279 | ||
3280 | task_clear_has_been_notified(task, kVMPressureWarning); | |
3281 | task_clear_has_been_notified(task, kVMPressureCritical); | |
3282 | ||
3283 | found_candidate = TRUE; | |
6d2010ae A |
3284 | } |
3285 | } | |
39236c6e A |
3286 | |
3287 | if (found_candidate == FALSE) { | |
3288 | continue; | |
3289 | } | |
3290 | ||
3291 | memorystatus_klist_lock(); | |
3292 | KNOTE_DETACH(&memorystatus_klist, kn_max); | |
3293 | KNOTE_ATTACH(&dispatch_klist, kn_max); | |
3294 | memorystatus_klist_unlock(); | |
3295 | ||
3296 | KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure); | |
3297 | ||
3298 | memorystatus_klist_lock(); | |
3299 | KNOTE_DETACH(&dispatch_klist, kn_max); | |
3300 | KNOTE_ATTACH(&memorystatus_klist, kn_max); | |
3301 | memorystatus_klist_unlock(); | |
3302 | ||
3303 | microuptime(&target_proc->vm_pressure_last_notify_tstamp); | |
3304 | proc_rele(target_proc); | |
3305 | ||
fe8ab488 | 3306 | if (memorystatus_manual_testing_on == TRUE && target_foreground_process == TRUE) { |
39236c6e A |
3307 | break; |
3308 | } | |
3309 | ||
3310 | try_dispatch_vm_clients: | |
fe8ab488 A |
3311 | if (kn_max == NULL && level_snapshot != kVMPressureNormal) { |
3312 | /* | |
3313 | * We will exit this loop when we are done with | |
3314 | * notification clients (level and non-level based). | |
39236c6e | 3315 | */ |
fe8ab488 | 3316 | if ((vm_pressure_notify_dispatch_vm_clients(target_foreground_process) == KERN_FAILURE) && (kn_max == NULL)) { |
39236c6e A |
3317 | /* |
3318 | * kn_max == NULL i.e. we didn't find any eligible clients for the level-based notifications | |
3319 | * AND | |
3320 | * we have failed to find any eligible clients for the non-level based notifications too. | |
3321 | * So, we are done. | |
3322 | */ | |
3323 | ||
3324 | return KERN_FAILURE; | |
3325 | } | |
3326 | } | |
3327 | ||
fe8ab488 A |
3328 | /* |
3329 | * LD: This block of code below used to be invoked in the older memory notification scheme on embedded everytime | |
3330 | * a process was sent a memory pressure notification. The "memorystatus_klist" list was used to hold these | |
3331 | * privileged listeners. But now we have moved to the newer scheme and are trying to move away from the extra | |
3332 | * notifications. So the code is here in case we break compat. and need to send out notifications to the privileged | |
3333 | * apps. | |
3334 | */ | |
3335 | #if 0 | |
3336 | #endif /* 0 */ | |
3337 | ||
3338 | if (memorystatus_manual_testing_on == TRUE) { | |
3339 | /* | |
3340 | * Testing out the pressure notification scheme. | |
3341 | * No need for delays etc. | |
3342 | */ | |
3343 | } else { | |
3344 | ||
3345 | uint32_t sleep_interval = INTER_NOTIFICATION_DELAY; | |
3346 | #if CONFIG_JETSAM | |
3347 | unsigned int page_delta = 0; | |
3348 | unsigned int skip_delay_page_threshold = 0; | |
3349 | ||
3350 | assert(memorystatus_available_pages_pressure >= memorystatus_available_pages_critical_base); | |
3351 | ||
3352 | page_delta = (memorystatus_available_pages_pressure - memorystatus_available_pages_critical_base) / 2; | |
3353 | skip_delay_page_threshold = memorystatus_available_pages_pressure - page_delta; | |
3354 | ||
3355 | if (memorystatus_available_pages <= skip_delay_page_threshold) { | |
3356 | /* | |
3357 | * We are nearing the critcal mark fast and can't afford to wait between | |
3358 | * notifications. | |
3359 | */ | |
3360 | sleep_interval = 0; | |
3361 | } | |
3362 | #endif /* CONFIG_JETSAM */ | |
3363 | ||
3364 | if (sleep_interval) { | |
3365 | delay(sleep_interval); | |
3366 | } | |
39236c6e | 3367 | } |
6d2010ae | 3368 | } |
39236c6e A |
3369 | |
3370 | return KERN_SUCCESS; | |
6d2010ae A |
3371 | } |
3372 | ||
39236c6e A |
3373 | vm_pressure_level_t |
3374 | convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); | |
3375 | ||
3376 | vm_pressure_level_t | |
3377 | convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t internal_pressure_level) | |
3378 | { | |
3379 | vm_pressure_level_t dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; | |
3380 | ||
3381 | switch (internal_pressure_level) { | |
3382 | ||
3383 | case kVMPressureNormal: | |
3384 | { | |
3385 | dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; | |
3386 | break; | |
3387 | } | |
3388 | ||
3389 | case kVMPressureWarning: | |
3390 | case kVMPressureUrgent: | |
3391 | { | |
3392 | dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
3393 | break; | |
3394 | } | |
3395 | ||
3396 | case kVMPressureCritical: | |
3397 | { | |
3398 | dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; | |
3399 | break; | |
3400 | } | |
3401 | ||
3402 | default: | |
3403 | break; | |
3404 | } | |
316670eb | 3405 | |
39236c6e A |
3406 | return dispatch_level; |
3407 | } | |
6d2010ae | 3408 | |
b0d623f7 | 3409 | static int |
39236c6e | 3410 | sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS |
b0d623f7 | 3411 | { |
39236c6e | 3412 | #pragma unused(arg1, arg2, oidp) |
39236c6e A |
3413 | vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level); |
3414 | ||
3415 | return SYSCTL_OUT(req, &dispatch_level, sizeof(dispatch_level)); | |
3416 | } | |
3417 | ||
fe8ab488 A |
3418 | #if DEBUG || DEVELOPMENT |
3419 | ||
39236c6e A |
3420 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED, |
3421 | 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); | |
3422 | ||
fe8ab488 A |
3423 | #else /* DEBUG || DEVELOPMENT */ |
3424 | ||
3425 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED, | |
3426 | 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); | |
3427 | ||
3428 | #endif /* DEBUG || DEVELOPMENT */ | |
b0d623f7 | 3429 | |
39236c6e A |
3430 | extern int memorystatus_purge_on_warning; |
3431 | extern int memorystatus_purge_on_critical; | |
3432 | ||
3433 | static int | |
3434 | sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS | |
3435 | { | |
3436 | #pragma unused(arg1, arg2) | |
b0d623f7 | 3437 | |
39236c6e A |
3438 | int level = 0; |
3439 | int error = 0; | |
3440 | int pressure_level = 0; | |
3441 | int trigger_request = 0; | |
3442 | int force_purge; | |
3443 | ||
3444 | error = sysctl_handle_int(oidp, &level, 0, req); | |
3445 | if (error || !req->newptr) { | |
3446 | return (error); | |
3447 | } | |
3448 | ||
3449 | memorystatus_manual_testing_on = TRUE; | |
3450 | ||
3451 | trigger_request = (level >> 16) & 0xFFFF; | |
3452 | pressure_level = (level & 0xFFFF); | |
3453 | ||
3454 | if (trigger_request < TEST_LOW_MEMORY_TRIGGER_ONE || | |
3455 | trigger_request > TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL) { | |
3456 | return EINVAL; | |
3457 | } | |
3458 | switch (pressure_level) { | |
3459 | case NOTE_MEMORYSTATUS_PRESSURE_NORMAL: | |
3460 | case NOTE_MEMORYSTATUS_PRESSURE_WARN: | |
3461 | case NOTE_MEMORYSTATUS_PRESSURE_CRITICAL: | |
3462 | break; | |
3463 | default: | |
b0d623f7 A |
3464 | return EINVAL; |
3465 | } | |
b0d623f7 | 3466 | |
39236c6e A |
3467 | /* |
3468 | * The pressure level is being set from user-space. | |
3469 | * And user-space uses the constants in sys/event.h | |
3470 | * So we translate those events to our internal levels here. | |
3471 | */ | |
3472 | if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
3473 | ||
3474 | memorystatus_manual_testing_level = kVMPressureNormal; | |
3475 | force_purge = 0; | |
3476 | ||
3477 | } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_WARN) { | |
3478 | ||
3479 | memorystatus_manual_testing_level = kVMPressureWarning; | |
3480 | force_purge = memorystatus_purge_on_warning; | |
3481 | ||
3482 | } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { | |
3483 | ||
3484 | memorystatus_manual_testing_level = kVMPressureCritical; | |
3485 | force_purge = memorystatus_purge_on_critical; | |
b0d623f7 A |
3486 | } |
3487 | ||
39236c6e | 3488 | memorystatus_vm_pressure_level = memorystatus_manual_testing_level; |
316670eb | 3489 | |
39236c6e A |
3490 | /* purge according to the new pressure level */ |
3491 | switch (trigger_request) { | |
3492 | case TEST_PURGEABLE_TRIGGER_ONE: | |
3493 | case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE: | |
3494 | if (force_purge == 0) { | |
3495 | /* no purging requested */ | |
3496 | break; | |
3497 | } | |
3498 | vm_purgeable_object_purge_one_unlocked(force_purge); | |
3499 | break; | |
3500 | case TEST_PURGEABLE_TRIGGER_ALL: | |
3501 | case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL: | |
3502 | if (force_purge == 0) { | |
3503 | /* no purging requested */ | |
3504 | break; | |
3505 | } | |
3506 | while (vm_purgeable_object_purge_one_unlocked(force_purge)); | |
3507 | break; | |
3508 | } | |
3509 | ||
3510 | if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ONE) || | |
3511 | (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE)) { | |
3512 | ||
3513 | memorystatus_update_vm_pressure(TRUE); | |
3514 | } | |
3515 | ||
3516 | if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ALL) || | |
3517 | (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL)) { | |
3518 | ||
3519 | while (memorystatus_update_vm_pressure(FALSE) == KERN_SUCCESS) { | |
3520 | continue; | |
3521 | } | |
3522 | } | |
3523 | ||
3524 | if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
3525 | memorystatus_manual_testing_on = FALSE; | |
3526 | ||
3527 | vm_pressure_klist_lock(); | |
3528 | vm_reset_active_list(); | |
3529 | vm_pressure_klist_unlock(); | |
3530 | } else { | |
3531 | ||
3532 | vm_pressure_klist_lock(); | |
fe8ab488 | 3533 | vm_pressure_notification_without_levels(FALSE); |
39236c6e A |
3534 | vm_pressure_klist_unlock(); |
3535 | } | |
3536 | ||
3537 | return 0; | |
b0d623f7 A |
3538 | } |
3539 | ||
39236c6e A |
3540 | SYSCTL_PROC(_kern, OID_AUTO, memorypressure_manual_trigger, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED, |
3541 | 0, 0, &sysctl_memorypressure_manual_trigger, "I", ""); | |
3542 | ||
3543 | ||
3544 | extern int memorystatus_purge_on_warning; | |
3545 | extern int memorystatus_purge_on_urgent; | |
3546 | extern int memorystatus_purge_on_critical; | |
3547 | ||
3548 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_warning, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_warning, 0, ""); | |
3549 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_urgent, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_urgent, 0, ""); | |
3550 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_critical, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_critical, 0, ""); | |
3551 | ||
3552 | ||
fe8ab488 | 3553 | #endif /* VM_PRESSURE_EVENTS */ |
39236c6e A |
3554 | |
3555 | /* Return both allocated and actual size, since there's a race between allocation and list compilation */ | |
b0d623f7 | 3556 | static int |
39236c6e | 3557 | memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t *buffer_size, size_t *list_size, boolean_t size_only) |
b0d623f7 | 3558 | { |
316670eb | 3559 | uint32_t list_count, i = 0; |
39236c6e A |
3560 | memorystatus_priority_entry_t *list_entry; |
3561 | proc_t p; | |
3562 | ||
316670eb | 3563 | list_count = memorystatus_list_count; |
39236c6e A |
3564 | *list_size = sizeof(memorystatus_priority_entry_t) * list_count; |
3565 | ||
3566 | /* Just a size check? */ | |
3567 | if (size_only) { | |
3568 | return 0; | |
3569 | } | |
3570 | ||
3571 | /* Otherwise, validate the size of the buffer */ | |
3572 | if (*buffer_size < *list_size) { | |
3573 | return EINVAL; | |
3574 | } | |
3575 | ||
3576 | *list_ptr = (memorystatus_priority_entry_t*)kalloc(*list_size); | |
3577 | if (!list_ptr) { | |
316670eb A |
3578 | return ENOMEM; |
3579 | } | |
3580 | ||
39236c6e A |
3581 | memset(*list_ptr, 0, *list_size); |
3582 | ||
3583 | *buffer_size = *list_size; | |
3584 | *list_size = 0; | |
3585 | ||
3586 | list_entry = *list_ptr; | |
3587 | ||
3588 | proc_list_lock(); | |
3589 | ||
3590 | p = memorystatus_get_first_proc_locked(&i, TRUE); | |
3591 | while (p && (*list_size < *buffer_size)) { | |
3592 | list_entry->pid = p->p_pid; | |
3593 | list_entry->priority = p->p_memstat_effectivepriority; | |
3594 | list_entry->user_data = p->p_memstat_userdata; | |
3595 | #if LEGACY_HIWATER | |
3596 | if (((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) || | |
3597 | (p->p_memstat_memlimit <= 0)) { | |
3598 | task_get_phys_footprint_limit(p->task, &list_entry->limit); | |
3599 | } else { | |
3600 | list_entry->limit = p->p_memstat_memlimit; | |
3601 | } | |
3602 | #else | |
3603 | task_get_phys_footprint_limit(p->task, &list_entry->limit); | |
3604 | #endif | |
3605 | list_entry->state = memorystatus_build_state(p); | |
3606 | list_entry++; | |
3607 | ||
3608 | *list_size += sizeof(memorystatus_priority_entry_t); | |
3609 | ||
3610 | p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
316670eb A |
3611 | } |
3612 | ||
39236c6e | 3613 | proc_list_unlock(); |
316670eb | 3614 | |
39236c6e | 3615 | MEMORYSTATUS_DEBUG(1, "memorystatus_get_priority_list: returning %lu for size\n", (unsigned long)*list_size); |
316670eb | 3616 | |
39236c6e A |
3617 | return 0; |
3618 | } | |
b0d623f7 | 3619 | |
39236c6e A |
3620 | static int |
3621 | memorystatus_cmd_get_priority_list(user_addr_t buffer, size_t buffer_size, int32_t *retval) { | |
3622 | int error = EINVAL; | |
3623 | boolean_t size_only; | |
3624 | memorystatus_priority_entry_t *list = NULL; | |
3625 | size_t list_size; | |
316670eb | 3626 | |
39236c6e A |
3627 | size_only = ((buffer == USER_ADDR_NULL) ? TRUE: FALSE); |
3628 | ||
3629 | error = memorystatus_get_priority_list(&list, &buffer_size, &list_size, size_only); | |
3630 | if (error) { | |
3631 | goto out; | |
3632 | } | |
3633 | ||
3634 | if (!size_only) { | |
3635 | error = copyout(list, buffer, list_size); | |
3636 | } | |
3637 | ||
3638 | if (error == 0) { | |
3639 | *retval = list_size; | |
3640 | } | |
3641 | out: | |
3642 | ||
3643 | if (list) { | |
3644 | kfree(list, buffer_size); | |
3645 | } | |
3646 | ||
3647 | return error; | |
316670eb | 3648 | } |
b0d623f7 | 3649 | |
39236c6e A |
3650 | #if CONFIG_JETSAM |
3651 | ||
3652 | static void | |
3653 | memorystatus_clear_errors(void) | |
3654 | { | |
3655 | proc_t p; | |
3656 | unsigned int i = 0; | |
3657 | ||
3658 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_START, 0, 0, 0, 0, 0); | |
3659 | ||
3660 | proc_list_lock(); | |
3661 | ||
3662 | p = memorystatus_get_first_proc_locked(&i, TRUE); | |
3663 | while (p) { | |
3664 | if (p->p_memstat_state & P_MEMSTAT_ERROR) { | |
3665 | p->p_memstat_state &= ~P_MEMSTAT_ERROR; | |
3666 | } | |
3667 | p = memorystatus_get_next_proc_locked(&i, p, TRUE); | |
3668 | } | |
3669 | ||
3670 | proc_list_unlock(); | |
3671 | ||
3672 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_END, 0, 0, 0, 0, 0); | |
3673 | } | |
b0d623f7 | 3674 | |
316670eb | 3675 | static void |
39236c6e | 3676 | memorystatus_update_levels_locked(boolean_t critical_only) { |
fe8ab488 | 3677 | |
39236c6e | 3678 | memorystatus_available_pages_critical = memorystatus_available_pages_critical_base; |
fe8ab488 A |
3679 | |
3680 | /* | |
3681 | * If there's an entry in the first bucket, we have idle processes. | |
3682 | */ | |
3683 | memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; | |
3684 | if (first_bucket->count) { | |
3685 | memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset; | |
3686 | ||
3687 | if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure ) { | |
3688 | /* | |
3689 | * The critical threshold must never exceed the pressure threshold | |
3690 | */ | |
3691 | memorystatus_available_pages_critical = memorystatus_available_pages_pressure; | |
39236c6e A |
3692 | } |
3693 | } | |
fe8ab488 | 3694 | |
316670eb A |
3695 | #if DEBUG || DEVELOPMENT |
3696 | if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { | |
3697 | memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_diagnostic; | |
fe8ab488 A |
3698 | |
3699 | if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure ) { | |
3700 | /* | |
3701 | * The critical threshold must never exceed the pressure threshold | |
3702 | */ | |
3703 | memorystatus_available_pages_critical = memorystatus_available_pages_pressure; | |
3704 | } | |
39236c6e A |
3705 | } |
3706 | #endif | |
3707 | ||
3708 | if (critical_only) { | |
3709 | return; | |
3710 | } | |
3711 | ||
316670eb | 3712 | #if VM_PRESSURE_EVENTS |
39236c6e A |
3713 | memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta; |
3714 | #if DEBUG || DEVELOPMENT | |
3715 | if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) { | |
316670eb | 3716 | memorystatus_available_pages_pressure += memorystatus_jetsam_policy_offset_pages_diagnostic; |
316670eb A |
3717 | } |
3718 | #endif | |
39236c6e A |
3719 | #endif |
3720 | } | |
3721 | ||
3722 | static int | |
3723 | memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) { | |
3724 | size_t input_size = *snapshot_size; | |
316670eb | 3725 | |
39236c6e A |
3726 | if (memorystatus_jetsam_snapshot_count > 0) { |
3727 | *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count)); | |
3728 | } else { | |
3729 | *snapshot_size = 0; | |
3730 | } | |
3731 | ||
3732 | if (size_only) { | |
3733 | return 0; | |
316670eb | 3734 | } |
39236c6e A |
3735 | |
3736 | if (input_size < *snapshot_size) { | |
3737 | return EINVAL; | |
3738 | } | |
3739 | ||
3740 | *snapshot = memorystatus_jetsam_snapshot; | |
3741 | ||
3742 | MEMORYSTATUS_DEBUG(1, "memorystatus_snapshot: returning %ld for size\n", (long)*snapshot_size); | |
3743 | ||
3744 | return 0; | |
316670eb A |
3745 | } |
3746 | ||
fe8ab488 | 3747 | |
316670eb | 3748 | static int |
39236c6e A |
3749 | memorystatus_cmd_get_jetsam_snapshot(user_addr_t buffer, size_t buffer_size, int32_t *retval) { |
3750 | int error = EINVAL; | |
3751 | boolean_t size_only; | |
3752 | memorystatus_jetsam_snapshot_t *snapshot; | |
3753 | ||
3754 | size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE); | |
3755 | ||
3756 | error = memorystatus_get_snapshot(&snapshot, &buffer_size, size_only); | |
3757 | if (error) { | |
3758 | goto out; | |
3759 | } | |
316670eb | 3760 | |
39236c6e A |
3761 | /* Copy out and reset */ |
3762 | if (!size_only) { | |
3763 | if ((error = copyout(snapshot, buffer, buffer_size)) == 0) { | |
3764 | snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0; | |
3765 | } | |
3766 | } | |
316670eb | 3767 | |
39236c6e A |
3768 | if (error == 0) { |
3769 | *retval = buffer_size; | |
3770 | } | |
3771 | out: | |
3772 | return error; | |
3773 | } | |
316670eb | 3774 | |
fe8ab488 A |
3775 | /* |
3776 | * Routine: memorystatus_cmd_grp_set_properties | |
3777 | * Purpose: Update properties for a group of processes. | |
3778 | * | |
3779 | * Supported Properties: | |
3780 | * [priority] | |
3781 | * Move each process out of its effective priority | |
3782 | * band and into a new priority band. | |
3783 | * Maintains relative order from lowest to highest priority. | |
3784 | * In single band, maintains relative order from head to tail. | |
3785 | * | |
3786 | * eg: before [effectivepriority | pid] | |
3787 | * [18 | p101 ] | |
3788 | * [17 | p55, p67, p19 ] | |
3789 | * [12 | p103 p10 ] | |
3790 | * [ 7 | p25 ] | |
3791 | * [ 0 | p71, p82, ] | |
3792 | * | |
3793 | * after [ new band | pid] | |
3794 | * [ xxx | p71, p82, p25, p103, p10, p55, p67, p19, p101] | |
3795 | * | |
3796 | * Returns: 0 on success, else non-zero. | |
3797 | * | |
3798 | * Caveat: We know there is a race window regarding recycled pids. | |
3799 | * A process could be killed before the kernel can act on it here. | |
3800 | * If a pid cannot be found in any of the jetsam priority bands, | |
3801 | * then we simply ignore it. No harm. | |
3802 | * But, if the pid has been recycled then it could be an issue. | |
3803 | * In that scenario, we might move an unsuspecting process to the new | |
3804 | * priority band. It's not clear how the kernel can safeguard | |
3805 | * against this, but it would be an extremely rare case anyway. | |
3806 | * The caller of this api might avoid such race conditions by | |
3807 | * ensuring that the processes passed in the pid list are suspended. | |
3808 | */ | |
3809 | ||
3810 | ||
3811 | /* This internal structure can expand when we add support for more properties */ | |
3812 | typedef struct memorystatus_internal_properties | |
3813 | { | |
3814 | proc_t proc; | |
3815 | int32_t priority; /* see memorytstatus_priority_entry_t : priority */ | |
3816 | } memorystatus_internal_properties_t; | |
3817 | ||
3818 | ||
3819 | static int | |
3820 | memorystatus_cmd_grp_set_properties(int32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { | |
3821 | ||
3822 | #pragma unused (flags) | |
3823 | ||
3824 | /* | |
3825 | * We only handle setting priority | |
3826 | * per process | |
3827 | */ | |
3828 | ||
3829 | int error = 0; | |
3830 | memorystatus_priority_entry_t *entries = NULL; | |
3831 | uint32_t entry_count = 0; | |
3832 | ||
3833 | /* This will be the ordered proc list */ | |
3834 | memorystatus_internal_properties_t *table = NULL; | |
3835 | size_t table_size = 0; | |
3836 | uint32_t table_count = 0; | |
3837 | ||
3838 | uint32_t i = 0; | |
3839 | uint32_t bucket_index = 0; | |
3840 | boolean_t head_insert; | |
3841 | int32_t new_priority; | |
3842 | ||
3843 | proc_t p; | |
3844 | ||
3845 | /* Verify inputs */ | |
3846 | if ((buffer == USER_ADDR_NULL) || (buffer_size == 0) || ((buffer_size % sizeof(memorystatus_priority_entry_t)) != 0)) { | |
3847 | error = EINVAL; | |
3848 | goto out; | |
3849 | } | |
3850 | ||
3851 | entry_count = (buffer_size / sizeof(memorystatus_priority_entry_t)); | |
3852 | if ((entries = (memorystatus_priority_entry_t *)kalloc(buffer_size)) == NULL) { | |
3853 | error = ENOMEM; | |
3854 | goto out; | |
3855 | } | |
3856 | ||
3857 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_START, entry_count, 0, 0, 0, 0); | |
3858 | ||
3859 | if ((error = copyin(buffer, entries, buffer_size)) != 0) { | |
3860 | goto out; | |
3861 | } | |
3862 | ||
3863 | /* Verify sanity of input priorities */ | |
3864 | for (i=0; i < entry_count; i++) { | |
3865 | if (entries[i].priority == -1) { | |
3866 | /* Use as shorthand for default priority */ | |
3867 | entries[i].priority = JETSAM_PRIORITY_DEFAULT; | |
3868 | } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
3869 | /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; | |
3870 | * if requested, adjust to JETSAM_PRIORITY_IDLE. */ | |
3871 | entries[i].priority = JETSAM_PRIORITY_IDLE; | |
3872 | } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_HEAD) { | |
3873 | /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle | |
3874 | * queue */ | |
3875 | /* Deal with this later */ | |
3876 | } else if ((entries[i].priority < 0) || (entries[i].priority >= MEMSTAT_BUCKET_COUNT)) { | |
3877 | /* Sanity check */ | |
3878 | error = EINVAL; | |
3879 | goto out; | |
3880 | } | |
3881 | } | |
3882 | ||
3883 | table_size = sizeof(memorystatus_internal_properties_t) * entry_count; | |
3884 | if ( (table = (memorystatus_internal_properties_t *)kalloc(table_size)) == NULL) { | |
3885 | error = ENOMEM; | |
3886 | goto out; | |
3887 | } | |
3888 | memset(table, 0, table_size); | |
3889 | ||
3890 | ||
3891 | /* | |
3892 | * For each jetsam bucket entry, spin through the input property list. | |
3893 | * When a matching pid is found, populate an adjacent table with the | |
3894 | * appropriate proc pointer and new property values. | |
3895 | * This traversal automatically preserves order from lowest | |
3896 | * to highest priority. | |
3897 | */ | |
3898 | ||
3899 | bucket_index=0; | |
3900 | ||
3901 | proc_list_lock(); | |
3902 | ||
3903 | /* Create the ordered table */ | |
3904 | p = memorystatus_get_first_proc_locked(&bucket_index, TRUE); | |
3905 | while (p && (table_count < entry_count)) { | |
3906 | for (i=0; i < entry_count; i++ ) { | |
3907 | if (p->p_pid == entries[i].pid) { | |
3908 | /* Build the table data */ | |
3909 | table[table_count].proc = p; | |
3910 | table[table_count].priority = entries[i].priority; | |
3911 | table_count++; | |
3912 | break; | |
3913 | } | |
3914 | } | |
3915 | p = memorystatus_get_next_proc_locked(&bucket_index, p, TRUE); | |
3916 | } | |
3917 | ||
3918 | /* We now have ordered list of procs ready to move */ | |
3919 | for (i=0; i < table_count; i++) { | |
3920 | p = table[i].proc; | |
3921 | assert(p != NULL); | |
3922 | ||
3923 | /* Allow head inserts -- but relative order is now */ | |
3924 | if (table[i].priority == JETSAM_PRIORITY_IDLE_HEAD) { | |
3925 | new_priority = JETSAM_PRIORITY_IDLE; | |
3926 | head_insert = true; | |
3927 | } else { | |
3928 | new_priority = table[i].priority; | |
3929 | head_insert = false; | |
3930 | } | |
3931 | ||
3932 | /* Not allowed */ | |
3933 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { | |
3934 | continue; | |
3935 | } | |
3936 | ||
3937 | /* | |
3938 | * Take appropriate steps if moving proc out of the | |
3939 | * JETSAM_PRIORITY_IDLE_DEFERRED band. | |
3940 | */ | |
3941 | if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) { | |
3942 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
3943 | } | |
3944 | ||
3945 | memorystatus_update_priority_locked(p, new_priority, head_insert); | |
3946 | } | |
3947 | ||
3948 | proc_list_unlock(); | |
3949 | ||
3950 | /* | |
3951 | * if (table_count != entry_count) | |
3952 | * then some pids were not found in a jetsam band. | |
3953 | * harmless but interesting... | |
3954 | */ | |
3955 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_END, entry_count, table_count, 0, 0, 0); | |
3956 | ||
3957 | out: | |
3958 | if (entries) | |
3959 | kfree(entries, buffer_size); | |
3960 | if (table) | |
3961 | kfree(table, table_size); | |
3962 | ||
3963 | return (error); | |
3964 | } | |
3965 | ||
3966 | ||
3967 | /* | |
3968 | * This routine is meant solely for the purpose of adjusting jetsam priorities and bands. | |
3969 | * It is _not_ meant to be used for the setting of memory limits, especially, since we can't | |
3970 | * tell if the memory limit being set is fatal or not. | |
3971 | * | |
3972 | * So the the last 5 args to the memorystatus_update() call below, related to memory limits, are all 0 or FALSE. | |
3973 | */ | |
3974 | ||
39236c6e A |
3975 | static int |
3976 | memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) { | |
3977 | const uint32_t MAX_ENTRY_COUNT = 2; /* Cap the entry count */ | |
316670eb | 3978 | |
39236c6e A |
3979 | int error; |
3980 | uint32_t i; | |
3981 | uint32_t entry_count; | |
3982 | memorystatus_priority_properties_t *entries; | |
3983 | ||
3984 | /* Validate inputs */ | |
3985 | if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size == 0)) { | |
3986 | return EINVAL; | |
3987 | } | |
3988 | ||
3989 | /* Make sure the buffer is a multiple of the entry size, and that an excessive size isn't specified */ | |
3990 | entry_count = (buffer_size / sizeof(memorystatus_priority_properties_t)); | |
3991 | if (((buffer_size % sizeof(memorystatus_priority_properties_t)) != 0) || (entry_count > MAX_ENTRY_COUNT)) { | |
3992 | return EINVAL; | |
316670eb | 3993 | } |
316670eb | 3994 | |
39236c6e A |
3995 | entries = (memorystatus_priority_properties_t *)kalloc(buffer_size); |
3996 | ||
3997 | error = copyin(buffer, entries, buffer_size); | |
316670eb | 3998 | |
39236c6e A |
3999 | for (i = 0; i < entry_count; i++) { |
4000 | proc_t p; | |
4001 | ||
4002 | if (error) { | |
4003 | break; | |
4004 | } | |
4005 | ||
4006 | p = proc_find(pid); | |
4007 | if (!p) { | |
4008 | error = ESRCH; | |
4009 | break; | |
4010 | } | |
4011 | ||
4012 | if (p->p_memstat_state & P_MEMSTAT_INTERNAL) { | |
4013 | error = EPERM; | |
4014 | proc_rele(p); | |
4015 | break; | |
4016 | } | |
fe8ab488 A |
4017 | |
4018 | error = memorystatus_update(p, entries[i].priority, entries[i].user_data, FALSE, FALSE, 0, 0, FALSE); | |
39236c6e A |
4019 | proc_rele(p); |
4020 | } | |
4021 | ||
4022 | kfree(entries, buffer_size); | |
4023 | ||
4024 | return error; | |
b0d623f7 A |
4025 | } |
4026 | ||
39236c6e A |
4027 | static int |
4028 | memorystatus_cmd_get_pressure_status(int32_t *retval) { | |
4029 | int error; | |
4030 | ||
4031 | /* Need privilege for check */ | |
4032 | error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); | |
4033 | if (error) { | |
4034 | return (error); | |
4035 | } | |
4036 | ||
4037 | /* Inherently racy, so it's not worth taking a lock here */ | |
4038 | *retval = (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0; | |
4039 | ||
4040 | return error; | |
4041 | } | |
316670eb | 4042 | |
fe8ab488 A |
4043 | /* |
4044 | * Every process, including a P_MEMSTAT_INTERNAL process (currently only pid 1), is allowed to set a HWM. | |
4045 | */ | |
4046 | ||
b0d623f7 | 4047 | static int |
fe8ab488 | 4048 | memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit) { |
39236c6e A |
4049 | int error = 0; |
4050 | ||
4051 | proc_t p = proc_find(pid); | |
4052 | if (!p) { | |
4053 | return ESRCH; | |
4054 | } | |
4055 | ||
4056 | if (high_water_mark <= 0) { | |
4057 | high_water_mark = -1; /* Disable */ | |
4058 | } | |
4059 | ||
4060 | proc_list_lock(); | |
4061 | ||
39236c6e A |
4062 | p->p_memstat_memlimit = high_water_mark; |
4063 | if (memorystatus_highwater_enabled) { | |
4064 | if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) { | |
fe8ab488 A |
4065 | |
4066 | memorystatus_update_priority_locked(p, p->p_memstat_effectivepriority, false); | |
4067 | ||
4068 | /* | |
4069 | * The update priority call above takes care to set/reset the fatal memory limit state | |
4070 | * IF the process is transitioning between foreground <-> background and has a background | |
4071 | * memory limit. | |
4072 | * Here, however, the process won't be doing any such transitions and so we explicitly tackle | |
4073 | * the fatal limit state. | |
4074 | */ | |
4075 | is_fatal_limit = FALSE; | |
4076 | ||
39236c6e A |
4077 | } else { |
4078 | error = (task_set_phys_footprint_limit_internal(p->task, high_water_mark, NULL, TRUE) == 0) ? 0 : EINVAL; | |
4079 | } | |
4080 | } | |
4081 | ||
fe8ab488 A |
4082 | if (error == 0) { |
4083 | if (is_fatal_limit == TRUE) { | |
4084 | p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT; | |
4085 | } else { | |
4086 | p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT; | |
4087 | } | |
4088 | } | |
4089 | ||
39236c6e A |
4090 | proc_list_unlock(); |
4091 | proc_rele(p); | |
4092 | ||
4093 | return error; | |
4094 | } | |
4095 | ||
fe8ab488 A |
4096 | /* |
4097 | * Returns the jetsam priority (effective or requested) of the process | |
4098 | * associated with this task. | |
4099 | */ | |
4100 | int | |
4101 | proc_get_memstat_priority(proc_t p, boolean_t effective_priority) | |
4102 | { | |
4103 | if (p) { | |
4104 | if (effective_priority) { | |
4105 | return p->p_memstat_effectivepriority; | |
4106 | } else { | |
4107 | return p->p_memstat_requestedpriority; | |
4108 | } | |
4109 | } | |
4110 | return 0; | |
4111 | } | |
39236c6e | 4112 | #endif /* CONFIG_JETSAM */ |
b0d623f7 | 4113 | |
39236c6e A |
4114 | int |
4115 | memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *args, int *ret) { | |
4116 | int error = EINVAL; | |
4117 | ||
4118 | #if !CONFIG_JETSAM | |
4119 | #pragma unused(ret) | |
4120 | #endif | |
4121 | ||
4122 | /* Root only for now */ | |
4123 | if (!kauth_cred_issuser(kauth_cred_get())) { | |
4124 | error = EPERM; | |
4125 | goto out; | |
b0d623f7 | 4126 | } |
39236c6e A |
4127 | |
4128 | /* Sanity check */ | |
4129 | if (args->buffersize > MEMORYSTATUS_BUFFERSIZE_MAX) { | |
4130 | error = EINVAL; | |
4131 | goto out; | |
4132 | } | |
4133 | ||
4134 | switch (args->command) { | |
4135 | case MEMORYSTATUS_CMD_GET_PRIORITY_LIST: | |
4136 | error = memorystatus_cmd_get_priority_list(args->buffer, args->buffersize, ret); | |
4137 | break; | |
4138 | #if CONFIG_JETSAM | |
4139 | case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES: | |
4140 | error = memorystatus_cmd_set_priority_properties(args->pid, args->buffer, args->buffersize, ret); | |
4141 | break; | |
fe8ab488 A |
4142 | case MEMORYSTATUS_CMD_GRP_SET_PROPERTIES: |
4143 | error = memorystatus_cmd_grp_set_properties((int32_t)args->flags, args->buffer, args->buffersize, ret); | |
4144 | break; | |
39236c6e A |
4145 | case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT: |
4146 | error = memorystatus_cmd_get_jetsam_snapshot(args->buffer, args->buffersize, ret); | |
4147 | break; | |
4148 | case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS: | |
4149 | error = memorystatus_cmd_get_pressure_status(ret); | |
4150 | break; | |
4151 | case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK: | |
fe8ab488 A |
4152 | error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, FALSE); |
4153 | break; | |
4154 | case MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT: | |
4155 | error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, TRUE); | |
39236c6e A |
4156 | break; |
4157 | /* Test commands */ | |
4158 | #if DEVELOPMENT || DEBUG | |
4159 | case MEMORYSTATUS_CMD_TEST_JETSAM: | |
4160 | error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled) ? 0 : EINVAL; | |
4161 | break; | |
4162 | case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS: | |
4163 | error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize); | |
4164 | break; | |
4165 | #endif /* DEVELOPMENT || DEBUG */ | |
4166 | #endif /* CONFIG_JETSAM */ | |
4167 | default: | |
4168 | break; | |
4169 | } | |
4170 | ||
4171 | out: | |
4172 | return error; | |
4173 | } | |
4174 | ||
4175 | ||
4176 | static int | |
4177 | filt_memorystatusattach(struct knote *kn) | |
4178 | { | |
4179 | kn->kn_flags |= EV_CLEAR; | |
4180 | return memorystatus_knote_register(kn); | |
4181 | } | |
4182 | ||
4183 | static void | |
4184 | filt_memorystatusdetach(struct knote *kn) | |
4185 | { | |
4186 | memorystatus_knote_unregister(kn); | |
4187 | } | |
4188 | ||
4189 | static int | |
4190 | filt_memorystatus(struct knote *kn __unused, long hint) | |
4191 | { | |
4192 | if (hint) { | |
4193 | switch (hint) { | |
4194 | case kMemorystatusNoPressure: | |
4195 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
4196 | kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_NORMAL; | |
4197 | } | |
4198 | break; | |
4199 | case kMemorystatusPressure: | |
4200 | if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) { | |
4201 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { | |
4202 | kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
4203 | } | |
4204 | } else if (memorystatus_vm_pressure_level == kVMPressureCritical) { | |
4205 | ||
4206 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { | |
4207 | kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; | |
4208 | } | |
4209 | } | |
4210 | break; | |
fe8ab488 A |
4211 | case kMemorystatusLowSwap: |
4212 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) { | |
4213 | kn->kn_fflags |= NOTE_MEMORYSTATUS_LOW_SWAP; | |
4214 | } | |
4215 | break; | |
39236c6e A |
4216 | default: |
4217 | break; | |
b0d623f7 | 4218 | } |
39236c6e A |
4219 | } |
4220 | ||
4221 | return (kn->kn_fflags != 0); | |
4222 | } | |
4223 | ||
4224 | static void | |
4225 | memorystatus_klist_lock(void) { | |
4226 | lck_mtx_lock(&memorystatus_klist_mutex); | |
4227 | } | |
4228 | ||
4229 | static void | |
4230 | memorystatus_klist_unlock(void) { | |
4231 | lck_mtx_unlock(&memorystatus_klist_mutex); | |
4232 | } | |
4233 | ||
4234 | void | |
4235 | memorystatus_kevent_init(lck_grp_t *grp, lck_attr_t *attr) { | |
4236 | lck_mtx_init(&memorystatus_klist_mutex, grp, attr); | |
4237 | klist_init(&memorystatus_klist); | |
4238 | } | |
4239 | ||
4240 | int | |
4241 | memorystatus_knote_register(struct knote *kn) { | |
4242 | int error = 0; | |
4243 | ||
4244 | memorystatus_klist_lock(); | |
4245 | ||
fe8ab488 | 4246 | if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL | NOTE_MEMORYSTATUS_LOW_SWAP)) { |
39236c6e | 4247 | |
fe8ab488 A |
4248 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) { |
4249 | error = suser(kauth_cred_get(), 0); | |
4250 | } | |
39236c6e | 4251 | |
fe8ab488 | 4252 | if (error == 0) { |
39236c6e | 4253 | KNOTE_ATTACH(&memorystatus_klist, kn); |
b0d623f7 | 4254 | } |
39236c6e A |
4255 | } else { |
4256 | error = ENOTSUP; | |
b0d623f7 | 4257 | } |
39236c6e A |
4258 | |
4259 | memorystatus_klist_unlock(); | |
4260 | ||
4261 | return error; | |
b0d623f7 A |
4262 | } |
4263 | ||
39236c6e A |
4264 | void |
4265 | memorystatus_knote_unregister(struct knote *kn __unused) { | |
4266 | memorystatus_klist_lock(); | |
4267 | KNOTE_DETACH(&memorystatus_klist, kn); | |
4268 | memorystatus_klist_unlock(); | |
4269 | } | |
316670eb | 4270 | |
fe8ab488 A |
4271 | |
4272 | #if 0 | |
39236c6e A |
4273 | #if CONFIG_JETSAM && VM_PRESSURE_EVENTS |
4274 | static boolean_t | |
4275 | memorystatus_issue_pressure_kevent(boolean_t pressured) { | |
4276 | memorystatus_klist_lock(); | |
4277 | KNOTE(&memorystatus_klist, pressured ? kMemorystatusPressure : kMemorystatusNoPressure); | |
4278 | memorystatus_klist_unlock(); | |
4279 | return TRUE; | |
4280 | } | |
39236c6e | 4281 | #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ |
fe8ab488 | 4282 | #endif /* 0 */ |