]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_memorystatus.c
0d46cec142fde79b1013242f88be94ad37a9a4a0
[apple/xnu.git] / bsd / kern / kern_memorystatus.c
1 /*
2 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/locks.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <libkern/libkern.h>
39 #include <mach/mach_time.h>
40 #include <mach/task.h>
41 #include <mach/host_priv.h>
42 #include <mach/mach_host.h>
43 #include <pexpert/pexpert.h>
44 #include <sys/kern_event.h>
45 #include <sys/proc.h>
46 #include <sys/proc_info.h>
47 #include <sys/signal.h>
48 #include <sys/signalvar.h>
49 #include <sys/sysctl.h>
50 #include <sys/sysproto.h>
51 #include <sys/wait.h>
52 #include <sys/tree.h>
53 #include <sys/priv.h>
54 #include <vm/vm_pageout.h>
55 #include <vm/vm_protos.h>
56
57 #if CONFIG_FREEZE
58 #include <vm/vm_map.h>
59 #endif /* CONFIG_FREEZE */
60
61 #include <sys/kern_memorystatus.h>
62
63 #if CONFIG_JETSAM
64 /* For logging clarity */
65 static const char *jetsam_kill_cause_name[] = {
66 "" ,
67 "jettisoned" , /* kMemorystatusKilled */
68 "highwater" , /* kMemorystatusKilledHiwat */
69 "vnode-limit" , /* kMemorystatusKilledVnodes */
70 "vm-pageshortage" , /* kMemorystatusKilledVMPageShortage */
71 "vm-thrashing" , /* kMemorystatusKilledVMThrashing */
72 "fc-thrashing" , /* kMemorystatusKilledFCThrashing */
73 "per-process-limit" , /* kMemorystatusKilledPerProcessLimit */
74 "diagnostic" , /* kMemorystatusKilledDiagnostic */
75 "idle-exit" , /* kMemorystatusKilledIdleExit */
76 };
77
78 /* Does cause indicate vm or fc thrashing? */
79 static boolean_t
80 is_thrashing(unsigned cause)
81 {
82 switch (cause) {
83 case kMemorystatusKilledVMThrashing:
84 case kMemorystatusKilledFCThrashing:
85 return TRUE;
86 default:
87 return FALSE;
88 }
89 }
90
91 /* Callback into vm_compressor.c to signal that thrashing has been mitigated. */
92 extern void vm_thrashing_jetsam_done(void);
93 #endif
94
95 /* These are very verbose printfs(), enable with
96 * MEMORYSTATUS_DEBUG_LOG
97 */
98 #if MEMORYSTATUS_DEBUG_LOG
99 #define MEMORYSTATUS_DEBUG(cond, format, ...) \
100 do { \
101 if (cond) { printf(format, ##__VA_ARGS__); } \
102 } while(0)
103 #else
104 #define MEMORYSTATUS_DEBUG(cond, format, ...)
105 #endif
106
107 /* General tunables */
108
109 unsigned long delta_percentage = 5;
110 unsigned long critical_threshold_percentage = 5;
111 unsigned long idle_offset_percentage = 5;
112 unsigned long pressure_threshold_percentage = 15;
113 unsigned long freeze_threshold_percentage = 50;
114
115 /* General memorystatus stuff */
116
117 struct klist memorystatus_klist;
118 static lck_mtx_t memorystatus_klist_mutex;
119
120 static void memorystatus_klist_lock(void);
121 static void memorystatus_klist_unlock(void);
122
123 static uint64_t memorystatus_idle_delay_time = 0;
124
125 /*
126 * Memorystatus kevents
127 */
128
129 static int filt_memorystatusattach(struct knote *kn);
130 static void filt_memorystatusdetach(struct knote *kn);
131 static int filt_memorystatus(struct knote *kn, long hint);
132
133 struct filterops memorystatus_filtops = {
134 .f_attach = filt_memorystatusattach,
135 .f_detach = filt_memorystatusdetach,
136 .f_event = filt_memorystatus,
137 };
138
139 enum {
140 kMemorystatusNoPressure = 0x1,
141 kMemorystatusPressure = 0x2,
142 kMemorystatusLowSwap = 0x4
143 };
144
145 /* Idle guard handling */
146
147 static int32_t memorystatus_scheduled_idle_demotions = 0;
148
149 static thread_call_t memorystatus_idle_demotion_call;
150
151 static void memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2);
152 static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state);
153 static void memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clean_state);
154 static void memorystatus_reschedule_idle_demotion_locked(void);
155
156 static void memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert);
157
158 boolean_t is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t);
159 void memorystatus_send_low_swap_note(void);
160
161 int memorystatus_wakeup = 0;
162
163 unsigned int memorystatus_level = 0;
164
165 static int memorystatus_list_count = 0;
166
167 #define MEMSTAT_BUCKET_COUNT (JETSAM_PRIORITY_MAX + 1)
168
169 typedef struct memstat_bucket {
170 TAILQ_HEAD(, proc) list;
171 int count;
172 } memstat_bucket_t;
173
174 memstat_bucket_t memstat_bucket[MEMSTAT_BUCKET_COUNT];
175
176 uint64_t memstat_idle_demotion_deadline = 0;
177
178 static unsigned int memorystatus_dirty_count = 0;
179
180
181 int
182 memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret)
183 {
184 user_addr_t level = 0;
185
186 level = args->level;
187
188 if (copyout(&memorystatus_level, level, sizeof(memorystatus_level)) != 0) {
189 return EFAULT;
190 }
191
192 return 0;
193 }
194
195 static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search);
196 static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search);
197
198 static void memorystatus_thread(void *param __unused, wait_result_t wr __unused);
199
200 /* Jetsam */
201
202 #if CONFIG_JETSAM
203
204 int proc_get_memstat_priority(proc_t, boolean_t);
205
206 /* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */
207 #define LEGACY_HIWATER 1
208
209 static boolean_t memorystatus_idle_snapshot = 0;
210
211 static int memorystatus_highwater_enabled = 1;
212
213 unsigned int memorystatus_delta = 0;
214
215 static unsigned int memorystatus_available_pages_critical_base = 0;
216 //static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1;
217 static unsigned int memorystatus_available_pages_critical_idle_offset = 0;
218
219 #if DEVELOPMENT || DEBUG
220 static unsigned int memorystatus_jetsam_panic_debug = 0;
221
222 static unsigned int memorystatus_jetsam_policy = kPolicyDefault;
223 static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0;
224 #endif
225
226 static unsigned int memorystatus_thread_wasted_wakeup = 0;
227
228 static uint32_t kill_under_pressure_cause = 0;
229
230 static memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot;
231 #define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries
232
233 static unsigned int memorystatus_jetsam_snapshot_count = 0;
234 static unsigned int memorystatus_jetsam_snapshot_max = 0;
235
236 static void memorystatus_clear_errors(void);
237 static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages);
238 static uint32_t memorystatus_build_state(proc_t p);
239 static void memorystatus_update_levels_locked(boolean_t critical_only);
240 //static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured);
241
242 static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause);
243 static boolean_t memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors);
244 #if LEGACY_HIWATER
245 static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors);
246 #endif
247
248 static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause);
249 static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause);
250
251 #endif /* CONFIG_JETSAM */
252
253 /* VM pressure */
254
255 extern unsigned int vm_page_free_count;
256 extern unsigned int vm_page_active_count;
257 extern unsigned int vm_page_inactive_count;
258 extern unsigned int vm_page_throttled_count;
259 extern unsigned int vm_page_purgeable_count;
260 extern unsigned int vm_page_wire_count;
261
262 #if VM_PRESSURE_EVENTS
263
264 #include "vm_pressure.h"
265
266 extern boolean_t memorystatus_warn_process(pid_t pid, boolean_t critical);
267
268 vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal;
269
270 #if CONFIG_MEMORYSTATUS
271 unsigned int memorystatus_available_pages = (unsigned int)-1;
272 unsigned int memorystatus_available_pages_pressure = 0;
273 unsigned int memorystatus_available_pages_critical = 0;
274 unsigned int memorystatus_frozen_count = 0;
275 unsigned int memorystatus_suspended_count = 0;
276
277 /*
278 * We use this flag to signal if we have any HWM offenders
279 * on the system. This way we can reduce the number of wakeups
280 * of the memorystatus_thread when the system is between the
281 * "pressure" and "critical" threshold.
282 *
283 * The (re-)setting of this variable is done without any locks
284 * or synchronization simply because it is not possible (currently)
285 * to keep track of HWM offenders that drop down below their memory
286 * limit and/or exit. So, we choose to burn a couple of wasted wakeups
287 * by allowing the unguarded modification of this variable.
288 */
289 boolean_t memorystatus_hwm_candidates = 0;
290
291 static int memorystatus_send_note(int event_code, void *data, size_t data_length);
292 #endif /* CONFIG_MEMORYSTATUS */
293
294 #endif /* VM_PRESSURE_EVENTS */
295
296 /* Freeze */
297
298 #if CONFIG_FREEZE
299
300 boolean_t memorystatus_freeze_enabled = FALSE;
301 int memorystatus_freeze_wakeup = 0;
302
303 static inline boolean_t memorystatus_can_freeze_processes(void);
304 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
305
306 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
307
308 /* Thresholds */
309 static unsigned int memorystatus_freeze_threshold = 0;
310
311 static unsigned int memorystatus_freeze_pages_min = 0;
312 static unsigned int memorystatus_freeze_pages_max = 0;
313
314 static unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
315
316 /* Stats */
317 static uint64_t memorystatus_freeze_count = 0;
318 static uint64_t memorystatus_freeze_pageouts = 0;
319
320 /* Throttling */
321 static throttle_interval_t throttle_intervals[] = {
322 { 60, 8, 0, 0, { 0, 0 }, FALSE }, /* 1 hour intermediate interval, 8x burst */
323 { 24 * 60, 1, 0, 0, { 0, 0 }, FALSE }, /* 24 hour long interval, no burst */
324 };
325
326 static uint64_t memorystatus_freeze_throttle_count = 0;
327
328 static unsigned int memorystatus_suspended_footprint_total = 0;
329
330 #endif /* CONFIG_FREEZE */
331
332 /* Debug */
333
334 extern struct knote *vm_find_knote_from_pid(pid_t, struct klist *);
335
336 #if DEVELOPMENT || DEBUG
337
338 #if CONFIG_JETSAM
339
340 /* Debug aid to aid determination of limit */
341
342 static int
343 sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS
344 {
345 #pragma unused(oidp, arg2)
346 proc_t p;
347 unsigned int b = 0;
348 int error, enable = 0;
349 int32_t memlimit;
350
351 error = SYSCTL_OUT(req, arg1, sizeof(int));
352 if (error || !req->newptr) {
353 return (error);
354 }
355
356 error = SYSCTL_IN(req, &enable, sizeof(int));
357 if (error || !req->newptr) {
358 return (error);
359 }
360
361 if (!(enable == 0 || enable == 1)) {
362 return EINVAL;
363 }
364
365 proc_list_lock();
366
367 p = memorystatus_get_first_proc_locked(&b, TRUE);
368 while (p) {
369 if (enable) {
370 if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) {
371 memlimit = -1;
372 } else {
373 memlimit = p->p_memstat_memlimit;
374 }
375 } else {
376 memlimit = -1;
377 }
378 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE);
379
380 if (memlimit == -1) {
381 p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;
382 } else {
383 if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) {
384 p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT;
385 }
386 }
387
388 p = memorystatus_get_next_proc_locked(&b, p, TRUE);
389 }
390
391 memorystatus_highwater_enabled = enable;
392
393 proc_list_unlock();
394
395 return 0;
396 }
397
398 SYSCTL_INT(_kern, OID_AUTO, memorystatus_idle_snapshot, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_idle_snapshot, 0, "");
399
400 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_highwater_enabled, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_highwater_enabled, 0, sysctl_memorystatus_highwater_enable, "I", "");
401
402 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages, 0, "");
403 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages_critical, 0, "");
404 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_base, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_base, 0, "");
405 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_idle_offset, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_idle_offset, 0, "");
406
407 /* Diagnostic code */
408
409 enum {
410 kJetsamDiagnosticModeNone = 0,
411 kJetsamDiagnosticModeAll = 1,
412 kJetsamDiagnosticModeStopAtFirstActive = 2,
413 kJetsamDiagnosticModeCount
414 } jetsam_diagnostic_mode = kJetsamDiagnosticModeNone;
415
416 static int jetsam_diagnostic_suspended_one_active_proc = 0;
417
418 static int
419 sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS
420 {
421 #pragma unused(arg1, arg2)
422
423 const char *diagnosticStrings[] = {
424 "jetsam: diagnostic mode: resetting critical level.",
425 "jetsam: diagnostic mode: will examine all processes",
426 "jetsam: diagnostic mode: will stop at first active process"
427 };
428
429 int error, val = jetsam_diagnostic_mode;
430 boolean_t changed = FALSE;
431
432 error = sysctl_handle_int(oidp, &val, 0, req);
433 if (error || !req->newptr)
434 return (error);
435 if ((val < 0) || (val >= kJetsamDiagnosticModeCount)) {
436 printf("jetsam: diagnostic mode: invalid value - %d\n", val);
437 return EINVAL;
438 }
439
440 proc_list_lock();
441
442 if ((unsigned int) val != jetsam_diagnostic_mode) {
443 jetsam_diagnostic_mode = val;
444
445 memorystatus_jetsam_policy &= ~kPolicyDiagnoseActive;
446
447 switch (jetsam_diagnostic_mode) {
448 case kJetsamDiagnosticModeNone:
449 /* Already cleared */
450 break;
451 case kJetsamDiagnosticModeAll:
452 memorystatus_jetsam_policy |= kPolicyDiagnoseAll;
453 break;
454 case kJetsamDiagnosticModeStopAtFirstActive:
455 memorystatus_jetsam_policy |= kPolicyDiagnoseFirst;
456 break;
457 default:
458 /* Already validated */
459 break;
460 }
461
462 memorystatus_update_levels_locked(FALSE);
463 changed = TRUE;
464 }
465
466 proc_list_unlock();
467
468 if (changed) {
469 printf("%s\n", diagnosticStrings[val]);
470 }
471
472 return (0);
473 }
474
475 SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED|CTLFLAG_ANYBODY,
476 &jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode");
477
478 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_diagnostic, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jetsam_policy_offset_pages_diagnostic, 0, "");
479
480 #if VM_PRESSURE_EVENTS
481
482 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_pressure, 0, "");
483
484
485 /*
486 * This routine is used for targeted notifications
487 * regardless of system memory pressure.
488 * "memnote" is the current user.
489 */
490
491 static int
492 sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS
493 {
494 #pragma unused(arg1, arg2)
495
496 int error = 0, pid = 0;
497 int ret = 0;
498 struct knote *kn = NULL;
499
500 error = sysctl_handle_int(oidp, &pid, 0, req);
501 if (error || !req->newptr)
502 return (error);
503
504 /*
505 * We inspect 3 lists here for targeted notifications:
506 * - memorystatus_klist
507 * - vm_pressure_klist
508 * - vm_pressure_dormant_klist
509 *
510 * The vm_pressure_* lists are tied to the old VM_PRESSURE
511 * notification mechanism. We intend to stop using that
512 * mechanism and, in turn, get rid of the 2 lists and
513 * vm_dispatch_pressure_note_to_pid() too.
514 */
515
516 memorystatus_klist_lock();
517 kn = vm_find_knote_from_pid(pid, &memorystatus_klist);
518 if (kn) {
519 /*
520 * Forcibly send this pid a "warning" memory pressure notification.
521 */
522 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN;
523 KNOTE(&memorystatus_klist, kMemorystatusPressure);
524 ret = 0;
525 } else {
526 ret = vm_dispatch_pressure_note_to_pid(pid, FALSE);
527 }
528 memorystatus_klist_unlock();
529
530 return ret;
531 }
532
533 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
534 0, 0, &sysctl_memorystatus_vm_pressure_send, "I", "");
535
536 #endif /* VM_PRESSURE_EVENTS */
537
538 #endif /* CONFIG_JETSAM */
539
540 #if CONFIG_FREEZE
541
542 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
543
544 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, "");
545 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, "");
546
547 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_count, "");
548 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
549 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_throttle_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_count, "");
550 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, "");
551
552 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
553 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
554
555 /*
556 * Manual trigger of freeze and thaw for dev / debug kernels only.
557 */
558 static int
559 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
560 {
561 #pragma unused(arg1, arg2)
562
563 int error, pid = 0;
564 proc_t p;
565
566 if (memorystatus_freeze_enabled == FALSE) {
567 return ENOTSUP;
568 }
569
570 error = sysctl_handle_int(oidp, &pid, 0, req);
571 if (error || !req->newptr)
572 return (error);
573
574 p = proc_find(pid);
575 if (p != NULL) {
576 uint32_t purgeable, wired, clean, dirty;
577 boolean_t shared;
578 uint32_t max_pages = 0;
579
580 if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
581 max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max);
582 } else {
583 max_pages = UINT32_MAX - 1;
584 }
585 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE);
586 proc_rele(p);
587
588 if (error)
589 error = EIO;
590 return error;
591 }
592 return EINVAL;
593 }
594
595 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
596 0, 0, &sysctl_memorystatus_freeze, "I", "");
597
598 static int
599 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
600 {
601 #pragma unused(arg1, arg2)
602
603 int error, pid = 0;
604 proc_t p;
605
606 if (memorystatus_freeze_enabled == FALSE) {
607 return ENOTSUP;
608 }
609
610 error = sysctl_handle_int(oidp, &pid, 0, req);
611 if (error || !req->newptr)
612 return (error);
613
614 p = proc_find(pid);
615 if (p != NULL) {
616 error = task_thaw(p->task);
617 proc_rele(p);
618
619 if (error)
620 error = EIO;
621 return error;
622 }
623
624 return EINVAL;
625 }
626
627 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
628 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
629
630 #endif /* CONFIG_FREEZE */
631
632 #endif /* DEVELOPMENT || DEBUG */
633
634 extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation,
635 void *parameter,
636 integer_t priority,
637 thread_t *new_thread);
638
639 #if CONFIG_JETSAM
640 /*
641 * Sort processes by size for a single jetsam bucket.
642 */
643
644 static void memorystatus_sort_by_largest_process_locked(unsigned int bucket_index)
645 {
646 proc_t p = NULL, insert_after_proc = NULL, max_proc = NULL;
647 uint32_t pages = 0, max_pages = 0;
648 memstat_bucket_t *current_bucket;
649
650 if (bucket_index >= MEMSTAT_BUCKET_COUNT) {
651 return;
652 }
653
654 current_bucket = &memstat_bucket[bucket_index];
655
656 p = TAILQ_FIRST(&current_bucket->list);
657
658 if (p) {
659 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL);
660 max_pages = pages;
661 insert_after_proc = NULL;
662
663 p = TAILQ_NEXT(p, p_memstat_list);
664
665 restart:
666 while (p) {
667
668 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL);
669
670 if (pages > max_pages) {
671 max_pages = pages;
672 max_proc = p;
673 }
674
675 p = TAILQ_NEXT(p, p_memstat_list);
676 }
677
678 if (max_proc) {
679
680 TAILQ_REMOVE(&current_bucket->list, max_proc, p_memstat_list);
681
682 if (insert_after_proc == NULL) {
683 TAILQ_INSERT_HEAD(&current_bucket->list, max_proc, p_memstat_list);
684 } else {
685 TAILQ_INSERT_AFTER(&current_bucket->list, insert_after_proc, max_proc, p_memstat_list);
686 }
687
688 insert_after_proc = max_proc;
689
690 /* Reset parameters for the new search. */
691 p = TAILQ_NEXT(max_proc, p_memstat_list);
692 if (p) {
693 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL);
694 max_pages = pages;
695 }
696 max_proc = NULL;
697
698 goto restart;
699 }
700 }
701 }
702
703 #endif /* CONFIG_JETSAM */
704
705 static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search) {
706 memstat_bucket_t *current_bucket;
707 proc_t next_p;
708
709 if ((*bucket_index) >= MEMSTAT_BUCKET_COUNT) {
710 return NULL;
711 }
712
713 current_bucket = &memstat_bucket[*bucket_index];
714 next_p = TAILQ_FIRST(&current_bucket->list);
715 if (!next_p && search) {
716 while (!next_p && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) {
717 current_bucket = &memstat_bucket[*bucket_index];
718 next_p = TAILQ_FIRST(&current_bucket->list);
719 }
720 }
721
722 return next_p;
723 }
724
725 static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search) {
726 memstat_bucket_t *current_bucket;
727 proc_t next_p;
728
729 if (!p || ((*bucket_index) >= MEMSTAT_BUCKET_COUNT)) {
730 return NULL;
731 }
732
733 next_p = TAILQ_NEXT(p, p_memstat_list);
734 while (!next_p && search && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) {
735 current_bucket = &memstat_bucket[*bucket_index];
736 next_p = TAILQ_FIRST(&current_bucket->list);
737 }
738
739 return next_p;
740 }
741
742 __private_extern__ void
743 memorystatus_init(void)
744 {
745 thread_t thread = THREAD_NULL;
746 kern_return_t result;
747 int i;
748
749 #if CONFIG_FREEZE
750 memorystatus_freeze_pages_min = FREEZE_PAGES_MIN;
751 memorystatus_freeze_pages_max = FREEZE_PAGES_MAX;
752 #endif
753
754 nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_idle_delay_time);
755
756 /* Init buckets */
757 for (i = 0; i < MEMSTAT_BUCKET_COUNT; i++) {
758 TAILQ_INIT(&memstat_bucket[i].list);
759 memstat_bucket[i].count = 0;
760 }
761
762 memorystatus_idle_demotion_call = thread_call_allocate((thread_call_func_t)memorystatus_perform_idle_demotion, NULL);
763
764 /* Apply overrides */
765 PE_get_default("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage));
766 assert(delta_percentage < 100);
767 PE_get_default("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage));
768 assert(critical_threshold_percentage < 100);
769 PE_get_default("kern.jetsam_idle_offset", &idle_offset_percentage, sizeof(idle_offset_percentage));
770 assert(idle_offset_percentage < 100);
771 PE_get_default("kern.jetsam_pressure_threshold", &pressure_threshold_percentage, sizeof(pressure_threshold_percentage));
772 assert(pressure_threshold_percentage < 100);
773 PE_get_default("kern.jetsam_freeze_threshold", &freeze_threshold_percentage, sizeof(freeze_threshold_percentage));
774 assert(freeze_threshold_percentage < 100);
775
776 #if CONFIG_JETSAM
777 memorystatus_delta = delta_percentage * atop_64(max_mem) / 100;
778 memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100;
779 memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta;
780
781 memorystatus_jetsam_snapshot_max = maxproc;
782 memorystatus_jetsam_snapshot =
783 (memorystatus_jetsam_snapshot_t*)kalloc(sizeof(memorystatus_jetsam_snapshot_t) +
784 sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_max);
785 if (!memorystatus_jetsam_snapshot) {
786 panic("Could not allocate memorystatus_jetsam_snapshot");
787 }
788
789 /* No contention at this point */
790 memorystatus_update_levels_locked(FALSE);
791 #endif
792
793 #if CONFIG_FREEZE
794 memorystatus_freeze_threshold = (freeze_threshold_percentage / delta_percentage) * memorystatus_delta;
795 #endif
796
797 result = kernel_thread_start_priority(memorystatus_thread, NULL, 95 /* MAXPRI_KERNEL */, &thread);
798 if (result == KERN_SUCCESS) {
799 thread_deallocate(thread);
800 } else {
801 panic("Could not create memorystatus_thread");
802 }
803 }
804
805 /* Centralised for the purposes of allowing panic-on-jetsam */
806 extern void
807 vm_wake_compactor_swapper(void);
808
809 /*
810 * The jetsam no frills kill call
811 * Return: 0 on success
812 * error code on failure (EINVAL...)
813 */
814 static int
815 jetsam_do_kill(proc_t p, int jetsam_flags) {
816 int error = 0;
817 error = exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags);
818 return(error);
819 }
820
821 /*
822 * Wrapper for processes exiting with memorystatus details
823 */
824 static boolean_t
825 memorystatus_do_kill(proc_t p, uint32_t cause) {
826
827 int error = 0;
828 __unused pid_t victim_pid = p->p_pid;
829
830 KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_START,
831 victim_pid, cause, vm_page_free_count, 0, 0);
832
833 #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
834 if (memorystatus_jetsam_panic_debug & (1 << cause)) {
835 panic("memorystatus_do_kill(): jetsam debug panic (cause: %d)", cause);
836 }
837 #else
838 #pragma unused(cause)
839 #endif
840 int jetsam_flags = P_LTERM_JETSAM;
841 switch (cause) {
842 case kMemorystatusKilledHiwat: jetsam_flags |= P_JETSAM_HIWAT; break;
843 case kMemorystatusKilledVnodes: jetsam_flags |= P_JETSAM_VNODE; break;
844 case kMemorystatusKilledVMPageShortage: jetsam_flags |= P_JETSAM_VMPAGESHORTAGE; break;
845 case kMemorystatusKilledVMThrashing: jetsam_flags |= P_JETSAM_VMTHRASHING; break;
846 case kMemorystatusKilledFCThrashing: jetsam_flags |= P_JETSAM_FCTHRASHING; break;
847 case kMemorystatusKilledPerProcessLimit: jetsam_flags |= P_JETSAM_PID; break;
848 case kMemorystatusKilledIdleExit: jetsam_flags |= P_JETSAM_IDLEEXIT; break;
849 }
850 error = jetsam_do_kill(p, jetsam_flags);
851
852 KERNEL_DEBUG_CONSTANT( (BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DO_KILL)) | DBG_FUNC_END,
853 victim_pid, cause, vm_page_free_count, error, 0);
854
855 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
856 vm_wake_compactor_swapper();
857 }
858
859 return (error == 0);
860 }
861
862 /*
863 * Node manipulation
864 */
865
866 static void
867 memorystatus_check_levels_locked(void) {
868 #if CONFIG_JETSAM
869 /* Update levels */
870 memorystatus_update_levels_locked(TRUE);
871 #endif
872 }
873
874 static void
875 memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2)
876 {
877 proc_t p;
878 uint64_t current_time;
879 memstat_bucket_t *demotion_bucket;
880
881 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion()\n");
882
883 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_START, 0, 0, 0, 0, 0);
884
885 current_time = mach_absolute_time();
886
887 proc_list_lock();
888
889 demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED];
890 p = TAILQ_FIRST(&demotion_bucket->list);
891
892 while (p) {
893 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion() found %d\n", p->p_pid);
894
895 assert(p->p_memstat_idledeadline);
896 assert(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS);
897 assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED);
898
899 if (current_time >= p->p_memstat_idledeadline) {
900 #if DEBUG || DEVELOPMENT
901 if (!(p->p_memstat_dirty & P_DIRTY_MARKED)) {
902 printf("memorystatus_perform_idle_demotion: moving process %d [%s] to idle band, but never dirtied (0x%x)!\n",
903 p->p_pid, (p->p_comm ? p->p_comm : "(unknown)"), p->p_memstat_dirty);
904 }
905 #endif
906 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
907 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, false);
908
909 // The prior process has moved out of the demotion bucket, so grab the new head and continue
910 p = TAILQ_FIRST(&demotion_bucket->list);
911 continue;
912 }
913
914 // No further candidates
915 break;
916 }
917
918 memorystatus_reschedule_idle_demotion_locked();
919
920 proc_list_unlock();
921
922 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
923 }
924
925 static void
926 memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state)
927 {
928 boolean_t present_in_deferred_bucket = FALSE;
929
930 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
931 present_in_deferred_bucket = TRUE;
932 }
933
934 MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for process %d (dirty:0x%x, set_state %d, demotions %d).\n",
935 p->p_pid, p->p_memstat_dirty, set_state, memorystatus_scheduled_idle_demotions);
936
937 assert((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED);
938
939 if (set_state) {
940 assert(p->p_memstat_idledeadline == 0);
941 p->p_memstat_dirty |= P_DIRTY_DEFER_IN_PROGRESS;
942 p->p_memstat_idledeadline = mach_absolute_time() + memorystatus_idle_delay_time;
943 }
944
945 assert(p->p_memstat_idledeadline);
946
947 if (present_in_deferred_bucket == FALSE) {
948 memorystatus_scheduled_idle_demotions++;
949 }
950 }
951
952 static void
953 memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state)
954 {
955 boolean_t present_in_deferred_bucket = FALSE;
956
957 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
958 present_in_deferred_bucket = TRUE;
959 assert(p->p_memstat_idledeadline);
960 }
961
962 MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for process %d (clear_state %d, demotions %d).\n",
963 p->p_pid, clear_state, memorystatus_scheduled_idle_demotions);
964
965
966 if (clear_state) {
967 p->p_memstat_idledeadline = 0;
968 p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS;
969 }
970
971 if (present_in_deferred_bucket == TRUE) {
972 memorystatus_scheduled_idle_demotions--;
973 }
974
975 assert(memorystatus_scheduled_idle_demotions >= 0);
976 }
977
978 static void
979 memorystatus_reschedule_idle_demotion_locked(void) {
980 if (0 == memorystatus_scheduled_idle_demotions) {
981 if (memstat_idle_demotion_deadline) {
982 /* Transitioned 1->0, so cancel next call */
983 thread_call_cancel(memorystatus_idle_demotion_call);
984 memstat_idle_demotion_deadline = 0;
985 }
986 } else {
987 memstat_bucket_t *demotion_bucket;
988 proc_t p;
989 demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED];
990 p = TAILQ_FIRST(&demotion_bucket->list);
991
992 assert(p && p->p_memstat_idledeadline);
993
994 if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline){
995 thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline);
996 memstat_idle_demotion_deadline = p->p_memstat_idledeadline;
997 }
998 }
999 }
1000
1001 /*
1002 * List manipulation
1003 */
1004
1005 int
1006 memorystatus_add(proc_t p, boolean_t locked)
1007 {
1008 memstat_bucket_t *bucket;
1009
1010 MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p->p_pid, p->p_memstat_effectivepriority);
1011
1012 if (!locked) {
1013 proc_list_lock();
1014 }
1015
1016 /* Processes marked internal do not have priority tracked */
1017 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
1018 goto exit;
1019 }
1020
1021 bucket = &memstat_bucket[p->p_memstat_effectivepriority];
1022
1023 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
1024 assert(bucket->count == memorystatus_scheduled_idle_demotions);
1025 }
1026
1027 TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list);
1028 bucket->count++;
1029
1030 memorystatus_list_count++;
1031
1032 memorystatus_check_levels_locked();
1033
1034 exit:
1035 if (!locked) {
1036 proc_list_unlock();
1037 }
1038
1039 return 0;
1040 }
1041
1042 static void
1043 memorystatus_update_priority_locked(proc_t p, int priority, boolean_t head_insert)
1044 {
1045 memstat_bucket_t *old_bucket, *new_bucket;
1046
1047 assert(priority < MEMSTAT_BUCKET_COUNT);
1048
1049 /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */
1050 if ((p->p_listflag & P_LIST_EXITED) != 0) {
1051 return;
1052 }
1053
1054 MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d, inserting at %s\n",
1055 p->p_pid, priority, head_insert ? "head" : "tail");
1056
1057 old_bucket = &memstat_bucket[p->p_memstat_effectivepriority];
1058 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
1059 assert(old_bucket->count == (memorystatus_scheduled_idle_demotions + 1));
1060 }
1061
1062 TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list);
1063 old_bucket->count--;
1064
1065 new_bucket = &memstat_bucket[priority];
1066 if (head_insert)
1067 TAILQ_INSERT_HEAD(&new_bucket->list, p, p_memstat_list);
1068 else
1069 TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list);
1070 new_bucket->count++;
1071
1072 #if CONFIG_JETSAM
1073 if (memorystatus_highwater_enabled && (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND)) {
1074
1075 /*
1076 * Adjust memory limit based on if the task is going to/from foreground and background.
1077 */
1078
1079 if (((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) ||
1080 ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND))) {
1081 int32_t memlimit = (priority >= JETSAM_PRIORITY_FOREGROUND) ? -1 : p->p_memstat_memlimit;
1082 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE);
1083
1084 if (memlimit <= 0) {
1085 p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;
1086 } else {
1087 p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT;
1088 }
1089 }
1090 }
1091 #endif
1092
1093 p->p_memstat_effectivepriority = priority;
1094
1095 memorystatus_check_levels_locked();
1096 }
1097
1098 int
1099 memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background, boolean_t is_fatal_limit)
1100 {
1101 int ret;
1102 boolean_t head_insert = false;
1103
1104 #if !CONFIG_JETSAM
1105 #pragma unused(update_memlimit, memlimit, memlimit_background, is_fatal_limit)
1106 #endif
1107
1108 MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing process %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data);
1109
1110 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0);
1111
1112 if (priority == -1) {
1113 /* Use as shorthand for default priority */
1114 priority = JETSAM_PRIORITY_DEFAULT;
1115 } else if (priority == JETSAM_PRIORITY_IDLE_DEFERRED) {
1116 /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */
1117 priority = JETSAM_PRIORITY_IDLE;
1118 } else if (priority == JETSAM_PRIORITY_IDLE_HEAD) {
1119 /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle queue */
1120 priority = JETSAM_PRIORITY_IDLE;
1121 head_insert = true;
1122 } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) {
1123 /* Sanity check */
1124 ret = EINVAL;
1125 goto out;
1126 }
1127
1128 proc_list_lock();
1129
1130 assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL));
1131
1132 if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) {
1133 ret = EALREADY;
1134 proc_list_unlock();
1135 MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", p->p_pid);
1136 goto out;
1137 }
1138
1139 if ((p->p_memstat_state & P_MEMSTAT_TERMINATED) || ((p->p_listflag & P_LIST_EXITED) != 0)) {
1140 /*
1141 * This could happen when a process calling posix_spawn() is exiting on the jetsam thread.
1142 */
1143 ret = EBUSY;
1144 proc_list_unlock();
1145 goto out;
1146 }
1147
1148 p->p_memstat_state |= P_MEMSTAT_PRIORITYUPDATED;
1149 p->p_memstat_userdata = user_data;
1150 p->p_memstat_requestedpriority = priority;
1151
1152 #if CONFIG_JETSAM
1153 if (update_memlimit) {
1154 p->p_memstat_memlimit = memlimit;
1155 if (memlimit_background) {
1156 /* Will be set as priority is updated */
1157 p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND;
1158
1159 /* Cannot have a background memory limit and be fatal. */
1160 is_fatal_limit = FALSE;
1161
1162 } else {
1163 /* Otherwise, apply now */
1164 if (memorystatus_highwater_enabled) {
1165 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE);
1166 }
1167 }
1168
1169 if (is_fatal_limit || memlimit <= 0) {
1170 p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;
1171 } else {
1172 p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT;
1173 }
1174 }
1175 #endif
1176
1177 /*
1178 * We can't add to the JETSAM_PRIORITY_IDLE_DEFERRED bucket here.
1179 * But, we could be removing it from the bucket.
1180 * Check and take appropriate steps if so.
1181 */
1182
1183 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
1184
1185 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1186 }
1187
1188 memorystatus_update_priority_locked(p, priority, head_insert);
1189
1190 proc_list_unlock();
1191 ret = 0;
1192
1193 out:
1194 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_END, ret, 0, 0, 0, 0);
1195
1196 return ret;
1197 }
1198
1199 int
1200 memorystatus_remove(proc_t p, boolean_t locked)
1201 {
1202 int ret;
1203 memstat_bucket_t *bucket;
1204
1205 MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", p->p_pid);
1206
1207 if (!locked) {
1208 proc_list_lock();
1209 }
1210
1211 assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL));
1212
1213 bucket = &memstat_bucket[p->p_memstat_effectivepriority];
1214 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
1215 assert(bucket->count == memorystatus_scheduled_idle_demotions);
1216 }
1217
1218 TAILQ_REMOVE(&bucket->list, p, p_memstat_list);
1219 bucket->count--;
1220
1221 memorystatus_list_count--;
1222
1223 /* If awaiting demotion to the idle band, clean up */
1224 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
1225 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1226 memorystatus_reschedule_idle_demotion_locked();
1227 }
1228
1229 memorystatus_check_levels_locked();
1230
1231 #if CONFIG_FREEZE
1232 if (p->p_memstat_state & (P_MEMSTAT_FROZEN)) {
1233 memorystatus_frozen_count--;
1234 }
1235
1236 if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) {
1237 memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint;
1238 memorystatus_suspended_count--;
1239 }
1240 #endif
1241
1242 if (!locked) {
1243 proc_list_unlock();
1244 }
1245
1246 if (p) {
1247 ret = 0;
1248 } else {
1249 ret = ESRCH;
1250 }
1251
1252 return ret;
1253 }
1254
1255 static boolean_t
1256 memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) {
1257 /* See that the process isn't marked for termination */
1258 if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) {
1259 return FALSE;
1260 }
1261
1262 /* Idle exit requires that process be tracked */
1263 if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) &&
1264 !(pcontrol & PROC_DIRTY_TRACK)) {
1265 return FALSE;
1266 }
1267
1268 /* 'Launch in progress' tracking requires that process have enabled dirty tracking too. */
1269 if ((pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) &&
1270 !(pcontrol & PROC_DIRTY_TRACK)) {
1271 return FALSE;
1272 }
1273
1274 /* Deferral is only relevant if idle exit is specified */
1275 if ((pcontrol & PROC_DIRTY_DEFER) &&
1276 !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) {
1277 return FALSE;
1278 }
1279
1280 return TRUE;
1281 }
1282
1283 static void
1284 memorystatus_update_idle_priority_locked(proc_t p) {
1285 int32_t priority;
1286
1287 MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty);
1288
1289 if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) {
1290 priority = (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) ? JETSAM_PRIORITY_IDLE_DEFERRED : JETSAM_PRIORITY_IDLE;
1291 } else {
1292 priority = p->p_memstat_requestedpriority;
1293 }
1294
1295 if (priority != p->p_memstat_effectivepriority) {
1296 memorystatus_update_priority_locked(p, priority, false);
1297 }
1298 }
1299
1300 /*
1301 * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle
1302 * (clean). They may also indicate that they support termination when idle, with the result that they are promoted
1303 * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low
1304 * priority idle band when clean (and killed earlier, protecting higher priority procesess).
1305 *
1306 * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by
1307 * memorystatus_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band
1308 * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to
1309 * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle
1310 * band. The deferral can be cleared early by clearing the appropriate flag.
1311 *
1312 * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process
1313 * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be
1314 * re-enabled or the guard state cleared, depending on whether the guard deadline has passed.
1315 */
1316
1317 int
1318 memorystatus_dirty_track(proc_t p, uint32_t pcontrol) {
1319 unsigned int old_dirty;
1320 boolean_t reschedule = FALSE;
1321 boolean_t already_deferred = FALSE;
1322 boolean_t defer_now = FALSE;
1323 int ret;
1324
1325 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_TRACK),
1326 p->p_pid, p->p_memstat_dirty, pcontrol, 0, 0);
1327
1328 proc_list_lock();
1329
1330 if ((p->p_listflag & P_LIST_EXITED) != 0) {
1331 /*
1332 * Process is on its way out.
1333 */
1334 ret = EBUSY;
1335 goto exit;
1336 }
1337
1338 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
1339 ret = EPERM;
1340 goto exit;
1341 }
1342
1343 if (!memorystatus_validate_track_flags(p, pcontrol)) {
1344 ret = EINVAL;
1345 goto exit;
1346 }
1347
1348 old_dirty = p->p_memstat_dirty;
1349
1350 /* These bits are cumulative, as per <rdar://problem/11159924> */
1351 if (pcontrol & PROC_DIRTY_TRACK) {
1352 p->p_memstat_dirty |= P_DIRTY_TRACK;
1353 }
1354
1355 if (pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) {
1356 p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT;
1357 }
1358
1359 if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) {
1360 p->p_memstat_dirty |= P_DIRTY_LAUNCH_IN_PROGRESS;
1361 }
1362
1363 if (old_dirty & P_DIRTY_DEFER_IN_PROGRESS) {
1364 already_deferred = TRUE;
1365 }
1366
1367 /* This can be set and cleared exactly once. */
1368 if (pcontrol & PROC_DIRTY_DEFER) {
1369
1370 if ( !(old_dirty & P_DIRTY_DEFER)) {
1371 p->p_memstat_dirty |= P_DIRTY_DEFER;
1372 }
1373
1374 defer_now = TRUE;
1375 }
1376
1377 MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / defer %s / dirty %s for process %d\n",
1378 ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N",
1379 defer_now ? "Y" : "N",
1380 p->p_memstat_dirty & P_DIRTY ? "Y" : "N",
1381 p->p_pid);
1382
1383 /* Kick off or invalidate the idle exit deferment if there's a state transition. */
1384 if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) {
1385 if (((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) &&
1386 defer_now && !already_deferred) {
1387
1388 /*
1389 * Request to defer a clean process that's idle-exit enabled
1390 * and not already in the jetsam deferred band.
1391 */
1392 memorystatus_schedule_idle_demotion_locked(p, TRUE);
1393 reschedule = TRUE;
1394
1395 } else if (!defer_now && already_deferred) {
1396
1397 /*
1398 * Either the process is no longer idle-exit enabled OR
1399 * there's a request to cancel a currently active deferral.
1400 */
1401 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1402 reschedule = TRUE;
1403 }
1404 } else {
1405
1406 /*
1407 * We are trying to operate on a dirty process. Dirty processes have to
1408 * be removed from the deferred band. The question is do we reset the
1409 * deferred state or not?
1410 *
1411 * This could be a legal request like:
1412 * - this process had opted into the JETSAM_DEFERRED band
1413 * - but it's now dirty and requests to opt out.
1414 * In this case, we remove the process from the band and reset its
1415 * state too. It'll opt back in properly when needed.
1416 *
1417 * OR, this request could be a user-space bug. E.g.:
1418 * - this process had opted into the JETSAM_DEFERRED band when clean
1419 * - and, then issues another request to again put it into the band except
1420 * this time the process is dirty.
1421 * The process going dirty, as a transition in memorystatus_dirty_set(), will pull the process out of
1422 * the deferred band with its state intact. So our request below is no-op.
1423 * But we do it here anyways for coverage.
1424 *
1425 * memorystatus_update_idle_priority_locked()
1426 * single-mindedly treats a dirty process as "cannot be in the deferred band".
1427 */
1428
1429 if (!defer_now && already_deferred) {
1430 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1431 reschedule = TRUE;
1432 } else {
1433 memorystatus_invalidate_idle_demotion_locked(p, FALSE);
1434 reschedule = TRUE;
1435 }
1436 }
1437
1438 memorystatus_update_idle_priority_locked(p);
1439
1440 if (reschedule) {
1441 memorystatus_reschedule_idle_demotion_locked();
1442 }
1443
1444 ret = 0;
1445
1446 exit:
1447 proc_list_unlock();
1448
1449 return ret;
1450 }
1451
1452 int
1453 memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) {
1454 int ret;
1455 boolean_t kill = false;
1456 boolean_t reschedule = FALSE;
1457 boolean_t was_dirty = FALSE;
1458 boolean_t now_dirty = FALSE;
1459
1460 MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty);
1461
1462 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_SET), p->p_pid, self, pcontrol, 0, 0);
1463
1464 proc_list_lock();
1465
1466 if ((p->p_listflag & P_LIST_EXITED) != 0) {
1467 /*
1468 * Process is on its way out.
1469 */
1470 ret = EBUSY;
1471 goto exit;
1472 }
1473
1474 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
1475 ret = EPERM;
1476 goto exit;
1477 }
1478
1479 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY)
1480 was_dirty = TRUE;
1481
1482 if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) {
1483 /* Dirty tracking not enabled */
1484 ret = EINVAL;
1485 } else if (pcontrol && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) {
1486 /*
1487 * Process is set to be terminated and we're attempting to mark it dirty.
1488 * Set for termination and marking as clean is OK - see <rdar://problem/10594349>.
1489 */
1490 ret = EBUSY;
1491 } else {
1492 int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN;
1493 if (pcontrol && !(p->p_memstat_dirty & flag)) {
1494 /* Mark the process as having been dirtied at some point */
1495 p->p_memstat_dirty |= (flag | P_DIRTY_MARKED);
1496 memorystatus_dirty_count++;
1497 ret = 0;
1498 } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) {
1499 if ((flag == P_DIRTY_SHUTDOWN) && (!p->p_memstat_dirty & P_DIRTY)) {
1500 /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */
1501 p->p_memstat_dirty |= P_DIRTY_TERMINATED;
1502 kill = true;
1503 } else if ((flag == P_DIRTY) && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) {
1504 /* Kill previously terminated processes if set clean */
1505 kill = true;
1506 }
1507 p->p_memstat_dirty &= ~flag;
1508 memorystatus_dirty_count--;
1509 ret = 0;
1510 } else {
1511 /* Already set */
1512 ret = EALREADY;
1513 }
1514 }
1515
1516 if (ret != 0) {
1517 goto exit;
1518 }
1519
1520 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY)
1521 now_dirty = TRUE;
1522
1523 if ((was_dirty == TRUE && now_dirty == FALSE) ||
1524 (was_dirty == FALSE && now_dirty == TRUE)) {
1525
1526 /* Manage idle exit deferral, if applied */
1527 if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) ==
1528 (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) {
1529
1530 /*
1531 * P_DIRTY_DEFER_IN_PROGRESS means the process is in the deferred band OR it might be heading back
1532 * there once it's clean again and has some protection window left.
1533 */
1534
1535 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
1536 /*
1537 * New dirty process i.e. "was_dirty == FALSE && now_dirty == TRUE"
1538 *
1539 * The process will move from the deferred band to its higher requested
1540 * jetsam band. But we don't clear its state i.e. we want to remember that
1541 * this process was part of the "deferred" band and will return to it.
1542 *
1543 * This way, we don't let it age beyond the protection
1544 * window when it returns to "clean". All the while giving
1545 * it a chance to perform its work while "dirty".
1546 *
1547 */
1548 memorystatus_invalidate_idle_demotion_locked(p, FALSE);
1549 reschedule = TRUE;
1550 } else {
1551
1552 /*
1553 * Process is back from "dirty" to "clean".
1554 *
1555 * Is its timer up OR does it still have some protection
1556 * window left?
1557 */
1558
1559 if (mach_absolute_time() >= p->p_memstat_idledeadline) {
1560 /*
1561 * The process' deadline has expired. It currently
1562 * does not reside in the DEFERRED bucket.
1563 *
1564 * It's on its way to the JETSAM_PRIORITY_IDLE
1565 * bucket via memorystatus_update_idle_priority_locked()
1566 * below.
1567
1568 * So all we need to do is reset all the state on the
1569 * process that's related to the DEFERRED bucket i.e.
1570 * the DIRTY_DEFER_IN_PROGRESS flag and the timer deadline.
1571 *
1572 */
1573
1574 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1575 reschedule = TRUE;
1576 } else {
1577 /*
1578 * It still has some protection window left and so
1579 * we just re-arm the timer without modifying any
1580 * state on the process.
1581 */
1582 memorystatus_schedule_idle_demotion_locked(p, FALSE);
1583 reschedule = TRUE;
1584 }
1585 }
1586 }
1587
1588 memorystatus_update_idle_priority_locked(p);
1589
1590 /* If the deferral state changed, reschedule the demotion timer */
1591 if (reschedule) {
1592 memorystatus_reschedule_idle_demotion_locked();
1593 }
1594 }
1595
1596 if (kill) {
1597 psignal(p, SIGKILL);
1598 }
1599
1600 exit:
1601 proc_list_unlock();
1602
1603 return ret;
1604 }
1605
1606 int
1607 memorystatus_dirty_clear(proc_t p, uint32_t pcontrol) {
1608
1609 int ret = 0;
1610
1611 MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_clear(): %d 0x%x 0x%x\n", p->p_pid, pcontrol, p->p_memstat_dirty);
1612
1613 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_DIRTY_CLEAR), p->p_pid, pcontrol, 0, 0, 0);
1614
1615 proc_list_lock();
1616
1617 if ((p->p_listflag & P_LIST_EXITED) != 0) {
1618 /*
1619 * Process is on its way out.
1620 */
1621 ret = EBUSY;
1622 goto exit;
1623 }
1624
1625 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
1626 ret = EPERM;
1627 goto exit;
1628 }
1629
1630 if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) {
1631 /* Dirty tracking not enabled */
1632 ret = EINVAL;
1633 goto exit;
1634 }
1635
1636 if (!pcontrol || (pcontrol & (PROC_DIRTY_LAUNCH_IN_PROGRESS | PROC_DIRTY_DEFER)) == 0) {
1637 ret = EINVAL;
1638 goto exit;
1639 }
1640
1641 if (pcontrol & PROC_DIRTY_LAUNCH_IN_PROGRESS) {
1642 p->p_memstat_dirty &= ~P_DIRTY_LAUNCH_IN_PROGRESS;
1643 }
1644
1645 /* This can be set and cleared exactly once. */
1646 if (pcontrol & PROC_DIRTY_DEFER) {
1647
1648 if (p->p_memstat_dirty & P_DIRTY_DEFER) {
1649
1650 p->p_memstat_dirty &= ~P_DIRTY_DEFER;
1651
1652 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1653 memorystatus_update_idle_priority_locked(p);
1654 memorystatus_reschedule_idle_demotion_locked();
1655 }
1656 }
1657
1658 ret = 0;
1659 exit:
1660 proc_list_unlock();
1661
1662 return ret;
1663 }
1664
1665 int
1666 memorystatus_dirty_get(proc_t p) {
1667 int ret = 0;
1668
1669 proc_list_lock();
1670
1671 if (p->p_memstat_dirty & P_DIRTY_TRACK) {
1672 ret |= PROC_DIRTY_TRACKED;
1673 if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) {
1674 ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT;
1675 }
1676 if (p->p_memstat_dirty & P_DIRTY) {
1677 ret |= PROC_DIRTY_IS_DIRTY;
1678 }
1679 if (p->p_memstat_dirty & P_DIRTY_LAUNCH_IN_PROGRESS) {
1680 ret |= PROC_DIRTY_LAUNCH_IS_IN_PROGRESS;
1681 }
1682 }
1683
1684 proc_list_unlock();
1685
1686 return ret;
1687 }
1688
1689 int
1690 memorystatus_on_terminate(proc_t p) {
1691 int sig;
1692
1693 proc_list_lock();
1694
1695 p->p_memstat_dirty |= P_DIRTY_TERMINATED;
1696
1697 if ((p->p_memstat_dirty & (P_DIRTY_TRACK|P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) {
1698 /* Clean; mark as terminated and issue SIGKILL */
1699 sig = SIGKILL;
1700 } else {
1701 /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */
1702 sig = SIGTERM;
1703 }
1704
1705 proc_list_unlock();
1706
1707 return sig;
1708 }
1709
1710 void
1711 memorystatus_on_suspend(proc_t p)
1712 {
1713 #if CONFIG_FREEZE
1714 uint32_t pages;
1715 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL);
1716 #endif
1717 proc_list_lock();
1718 #if CONFIG_FREEZE
1719 p->p_memstat_suspendedfootprint = pages;
1720 memorystatus_suspended_footprint_total += pages;
1721 memorystatus_suspended_count++;
1722 #endif
1723 p->p_memstat_state |= P_MEMSTAT_SUSPENDED;
1724 proc_list_unlock();
1725 }
1726
1727 void
1728 memorystatus_on_resume(proc_t p)
1729 {
1730 #if CONFIG_FREEZE
1731 boolean_t frozen;
1732 pid_t pid;
1733 #endif
1734
1735 proc_list_lock();
1736
1737 #if CONFIG_FREEZE
1738 frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN);
1739 if (frozen) {
1740 memorystatus_frozen_count--;
1741 p->p_memstat_state |= P_MEMSTAT_PRIOR_THAW;
1742 }
1743
1744 memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint;
1745 memorystatus_suspended_count--;
1746
1747 pid = p->p_pid;
1748 #endif
1749
1750 p->p_memstat_state &= ~(P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN);
1751
1752 proc_list_unlock();
1753
1754 #if CONFIG_FREEZE
1755 if (frozen) {
1756 memorystatus_freeze_entry_t data = { pid, FALSE, 0 };
1757 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1758 }
1759 #endif
1760 }
1761
1762 void
1763 memorystatus_on_inactivity(proc_t p)
1764 {
1765 #pragma unused(p)
1766 #if CONFIG_FREEZE
1767 /* Wake the freeze thread */
1768 thread_wakeup((event_t)&memorystatus_freeze_wakeup);
1769 #endif
1770 }
1771
1772 static uint32_t
1773 memorystatus_build_state(proc_t p) {
1774 uint32_t snapshot_state = 0;
1775
1776 /* General */
1777 if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) {
1778 snapshot_state |= kMemorystatusSuspended;
1779 }
1780 if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
1781 snapshot_state |= kMemorystatusFrozen;
1782 }
1783 if (p->p_memstat_state & P_MEMSTAT_PRIOR_THAW) {
1784 snapshot_state |= kMemorystatusWasThawed;
1785 }
1786
1787 /* Tracking */
1788 if (p->p_memstat_dirty & P_DIRTY_TRACK) {
1789 snapshot_state |= kMemorystatusTracked;
1790 }
1791 if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) {
1792 snapshot_state |= kMemorystatusSupportsIdleExit;
1793 }
1794 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
1795 snapshot_state |= kMemorystatusDirty;
1796 }
1797
1798 return snapshot_state;
1799 }
1800
1801 #if !CONFIG_JETSAM
1802
1803 static boolean_t
1804 kill_idle_exit_proc(void)
1805 {
1806 proc_t p, victim_p = PROC_NULL;
1807 uint64_t current_time;
1808 boolean_t killed = FALSE;
1809 unsigned int i = 0;
1810
1811 /* Pick next idle exit victim. */
1812 current_time = mach_absolute_time();
1813
1814 proc_list_lock();
1815
1816 p = memorystatus_get_first_proc_locked(&i, FALSE);
1817 while (p) {
1818 /* No need to look beyond the idle band */
1819 if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) {
1820 break;
1821 }
1822
1823 if ((p->p_memstat_dirty & (P_DIRTY_ALLOW_IDLE_EXIT|P_DIRTY_IS_DIRTY|P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) {
1824 if (current_time >= p->p_memstat_idledeadline) {
1825 p->p_memstat_dirty |= P_DIRTY_TERMINATED;
1826 victim_p = proc_ref_locked(p);
1827 break;
1828 }
1829 }
1830
1831 p = memorystatus_get_next_proc_locked(&i, p, FALSE);
1832 }
1833
1834 proc_list_unlock();
1835
1836 if (victim_p) {
1837 printf("memorystatus_thread: idle exiting pid %d [%s]\n", victim_p->p_pid, (victim_p->p_comm ? victim_p->p_comm : "(unknown)"));
1838 killed = memorystatus_do_kill(victim_p, kMemorystatusKilledIdleExit);
1839 proc_rele(victim_p);
1840 }
1841
1842 return killed;
1843 }
1844 #endif
1845
1846 #if CONFIG_JETSAM
1847 static void
1848 memorystatus_thread_wake(void) {
1849 thread_wakeup((event_t)&memorystatus_wakeup);
1850 }
1851 #endif /* CONFIG_JETSAM */
1852
1853 extern void vm_pressure_response(void);
1854
1855 static int
1856 memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation)
1857 {
1858 if (interval_ms) {
1859 assert_wait_timeout(&memorystatus_wakeup, THREAD_UNINT, interval_ms, 1000 * NSEC_PER_USEC);
1860 } else {
1861 assert_wait(&memorystatus_wakeup, THREAD_UNINT);
1862 }
1863
1864 return thread_block(continuation);
1865 }
1866
1867 static void
1868 memorystatus_thread(void *param __unused, wait_result_t wr __unused)
1869 {
1870 static boolean_t is_vm_privileged = FALSE;
1871 #if CONFIG_JETSAM
1872 boolean_t post_snapshot = FALSE;
1873 uint32_t errors = 0;
1874 uint32_t hwm_kill = 0;
1875 #endif
1876
1877 if (is_vm_privileged == FALSE) {
1878 /*
1879 * It's the first time the thread has run, so just mark the thread as privileged and block.
1880 * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>.
1881 */
1882 thread_wire(host_priv_self(), current_thread(), TRUE);
1883 is_vm_privileged = TRUE;
1884
1885 memorystatus_thread_block(0, memorystatus_thread);
1886 }
1887
1888 #if CONFIG_JETSAM
1889
1890 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START,
1891 memorystatus_available_pages, 0, 0, 0, 0);
1892
1893 /*
1894 * Jetsam aware version.
1895 *
1896 * The VM pressure notification thread is working it's way through clients in parallel.
1897 *
1898 * So, while the pressure notification thread is targeting processes in order of
1899 * increasing jetsam priority, we can hopefully reduce / stop it's work by killing
1900 * any processes that have exceeded their highwater mark.
1901 *
1902 * If we run out of HWM processes and our available pages drops below the critical threshold, then,
1903 * we target the least recently used process in order of increasing jetsam priority (exception: the FG band).
1904 */
1905 while (is_thrashing(kill_under_pressure_cause) ||
1906 memorystatus_available_pages <= memorystatus_available_pages_pressure) {
1907 boolean_t killed;
1908 int32_t priority;
1909 uint32_t cause;
1910
1911 if (kill_under_pressure_cause) {
1912 cause = kill_under_pressure_cause;
1913 } else {
1914 cause = kMemorystatusKilledVMPageShortage;
1915 }
1916
1917 #if LEGACY_HIWATER
1918 /* Highwater */
1919 killed = memorystatus_kill_hiwat_proc(&errors);
1920 if (killed) {
1921 hwm_kill++;
1922 post_snapshot = TRUE;
1923 goto done;
1924 } else {
1925 memorystatus_hwm_candidates = FALSE;
1926 }
1927
1928 /* No highwater processes to kill. Continue or stop for now? */
1929 if (!is_thrashing(kill_under_pressure_cause) &&
1930 (memorystatus_available_pages > memorystatus_available_pages_critical)) {
1931 /*
1932 * We are _not_ out of pressure but we are above the critical threshold and there's:
1933 * - no compressor thrashing
1934 * - no more HWM processes left.
1935 * For now, don't kill any other processes.
1936 */
1937
1938 if (hwm_kill == 0) {
1939 memorystatus_thread_wasted_wakeup++;
1940 }
1941
1942 break;
1943 }
1944 #endif
1945
1946 /* LRU */
1947 killed = memorystatus_kill_top_process(TRUE, cause, &priority, &errors);
1948 if (killed) {
1949 /* Don't generate logs for steady-state idle-exit kills (unless overridden for debug) */
1950 if ((priority != JETSAM_PRIORITY_IDLE) || memorystatus_idle_snapshot) {
1951 post_snapshot = TRUE;
1952 }
1953 goto done;
1954 }
1955
1956 if (memorystatus_available_pages <= memorystatus_available_pages_critical) {
1957 /* Under pressure and unable to kill a process - panic */
1958 panic("memorystatus_jetsam_thread: no victim! available pages:%d\n", memorystatus_available_pages);
1959 }
1960
1961 done:
1962
1963 /*
1964 * We do not want to over-kill when thrashing has been detected.
1965 * To avoid that, we reset the flag here and notify the
1966 * compressor.
1967 */
1968 if (is_thrashing(kill_under_pressure_cause)) {
1969 kill_under_pressure_cause = 0;
1970 vm_thrashing_jetsam_done();
1971 }
1972 }
1973
1974 kill_under_pressure_cause = 0;
1975
1976 if (errors) {
1977 memorystatus_clear_errors();
1978 }
1979
1980 #if VM_PRESSURE_EVENTS
1981 /*
1982 * LD: We used to target the foreground process first and foremost here.
1983 * Now, we target all processes, starting from the non-suspended, background
1984 * processes first. We will target foreground too.
1985 *
1986 * memorystatus_update_vm_pressure(TRUE);
1987 */
1988 //vm_pressure_response();
1989 #endif
1990
1991 if (post_snapshot) {
1992 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1993 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1994 memorystatus_jetsam_snapshot->notification_time = mach_absolute_time();
1995 memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1996 }
1997
1998 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END,
1999 memorystatus_available_pages, 0, 0, 0, 0);
2000
2001 #else /* CONFIG_JETSAM */
2002
2003 /*
2004 * Jetsam not enabled
2005 */
2006
2007 #endif /* CONFIG_JETSAM */
2008
2009 memorystatus_thread_block(0, memorystatus_thread);
2010 }
2011
2012 #if !CONFIG_JETSAM
2013 /*
2014 * Returns TRUE:
2015 * when an idle-exitable proc was killed
2016 * Returns FALSE:
2017 * when there are no more idle-exitable procs found
2018 * when the attempt to kill an idle-exitable proc failed
2019 */
2020 boolean_t memorystatus_idle_exit_from_VM(void) {
2021 return(kill_idle_exit_proc());
2022 }
2023 #endif /* !CONFIG_JETSAM */
2024
2025 #if CONFIG_JETSAM
2026
2027 /*
2028 * Callback invoked when allowable physical memory footprint exceeded
2029 * (dirty pages + IOKit mappings)
2030 *
2031 * This is invoked for both advisory, non-fatal per-task high watermarks,
2032 * as well as the fatal task memory limits.
2033 */
2034 void
2035 memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footprint_mb)
2036 {
2037 proc_t p = current_proc();
2038
2039 if (warning == FALSE) {
2040 printf("process %d (%s) exceeded physical memory footprint limit of %d MB\n",
2041 p->p_pid, p->p_comm, max_footprint_mb);
2042 }
2043
2044 #if VM_PRESSURE_EVENTS
2045 if (warning == TRUE) {
2046 if (memorystatus_warn_process(p->p_pid, TRUE /* critical? */) != TRUE) {
2047 /* Print warning, since it's possible that task has not registered for pressure notifications */
2048 printf("task_exceeded_footprint: failed to warn the current task (exiting, or no handler registered?).\n");
2049 }
2050 return;
2051 }
2052 #endif /* VM_PRESSURE_EVENTS */
2053
2054 if ((p->p_memstat_state & P_MEMSTAT_FATAL_MEMLIMIT) == P_MEMSTAT_FATAL_MEMLIMIT) {
2055 /*
2056 * If this process has no high watermark or has a fatal task limit, then we have been invoked because the task
2057 * has violated either the system-wide per-task memory limit OR its own task limit.
2058 */
2059 if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit) != TRUE) {
2060 printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n");
2061 }
2062 } else {
2063 /*
2064 * HWM offender exists. Done without locks or synchronization.
2065 * See comment near its declaration for more details.
2066 */
2067 memorystatus_hwm_candidates = TRUE;
2068 }
2069 }
2070
2071 /*
2072 * This is invoked when cpulimits have been exceeded while in fatal mode.
2073 * The jetsam_flags do not apply as those are for memory related kills.
2074 * We call this routine so that the offending process is killed with
2075 * a non-zero exit status.
2076 */
2077 void
2078 jetsam_on_ledger_cpulimit_exceeded(void)
2079 {
2080 int retval = 0;
2081 int jetsam_flags = 0; /* make it obvious */
2082 proc_t p = current_proc();
2083
2084 printf("task_exceeded_cpulimit: killing pid %d [%s]\n",
2085 p->p_pid, (p->p_comm ? p->p_comm : "(unknown)"));
2086
2087 retval = jetsam_do_kill(p, jetsam_flags);
2088
2089 if (retval) {
2090 printf("task_exceeded_cpulimit: failed to kill current task (exiting?).\n");
2091 }
2092 }
2093
2094 static void
2095 memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint, uint32_t *max_footprint_lifetime, uint32_t *purgeable_pages)
2096 {
2097 assert(task);
2098 assert(footprint);
2099
2100 *footprint = (uint32_t)(get_task_phys_footprint(task) / PAGE_SIZE_64);
2101 if (max_footprint) {
2102 *max_footprint = (uint32_t)(get_task_phys_footprint_max(task) / PAGE_SIZE_64);
2103 }
2104 if (max_footprint_lifetime) {
2105 *max_footprint_lifetime = (uint32_t)(get_task_resident_max(task) / PAGE_SIZE_64);
2106 }
2107 if (purgeable_pages) {
2108 *purgeable_pages = (uint32_t)(get_task_purgeable_size(task) / PAGE_SIZE_64);
2109 }
2110 }
2111
2112
2113 static void
2114 memorystatus_update_snapshot_locked(proc_t p, uint32_t kill_cause)
2115 {
2116 unsigned int i;
2117
2118 for (i = 0; i < memorystatus_jetsam_snapshot_count; i++) {
2119 if (memorystatus_jetsam_snapshot_list[i].pid == p->p_pid) {
2120 /* Update if the priority has changed since the snapshot was taken */
2121 if (memorystatus_jetsam_snapshot_list[i].priority != p->p_memstat_effectivepriority) {
2122 memorystatus_jetsam_snapshot_list[i].priority = p->p_memstat_effectivepriority;
2123 strlcpy(memorystatus_jetsam_snapshot_list[i].name, p->p_comm, MAXCOMLEN+1);
2124 memorystatus_jetsam_snapshot_list[i].state = memorystatus_build_state(p);
2125 memorystatus_jetsam_snapshot_list[i].user_data = p->p_memstat_userdata;
2126 memorystatus_jetsam_snapshot_list[i].fds = p->p_fd->fd_nfiles;
2127 }
2128 memorystatus_jetsam_snapshot_list[i].killed = kill_cause;
2129 return;
2130 }
2131 }
2132 }
2133
2134 void memorystatus_pages_update(unsigned int pages_avail)
2135 {
2136 memorystatus_available_pages = pages_avail;
2137
2138 #if VM_PRESSURE_EVENTS
2139 /*
2140 * Since memorystatus_available_pages changes, we should
2141 * re-evaluate the pressure levels on the system and
2142 * check if we need to wake the pressure thread.
2143 * We also update memorystatus_level in that routine.
2144 */
2145 vm_pressure_response();
2146
2147 if (memorystatus_available_pages <= memorystatus_available_pages_pressure) {
2148
2149 if (memorystatus_hwm_candidates || (memorystatus_available_pages <= memorystatus_available_pages_critical)) {
2150 memorystatus_thread_wake();
2151 }
2152 }
2153 #else /* VM_PRESSURE_EVENTS */
2154
2155 boolean_t critical, delta;
2156
2157 if (!memorystatus_delta) {
2158 return;
2159 }
2160
2161 critical = (pages_avail < memorystatus_available_pages_critical) ? TRUE : FALSE;
2162 delta = ((pages_avail >= (memorystatus_available_pages + memorystatus_delta))
2163 || (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) ? TRUE : FALSE;
2164
2165 if (critical || delta) {
2166 memorystatus_level = memorystatus_available_pages * 100 / atop_64(max_mem);
2167 memorystatus_thread_wake();
2168 }
2169 #endif /* VM_PRESSURE_EVENTS */
2170 }
2171
2172 static boolean_t
2173 memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry)
2174 {
2175 clock_sec_t tv_sec;
2176 clock_usec_t tv_usec;
2177
2178 memset(entry, 0, sizeof(memorystatus_jetsam_snapshot_entry_t));
2179
2180 entry->pid = p->p_pid;
2181 strlcpy(&entry->name[0], p->p_comm, MAXCOMLEN+1);
2182 entry->priority = p->p_memstat_effectivepriority;
2183 memorystatus_get_task_page_counts(p->task, &entry->pages, &entry->max_pages, &entry->max_pages_lifetime, &entry->purgeable_pages);
2184 entry->state = memorystatus_build_state(p);
2185 entry->user_data = p->p_memstat_userdata;
2186 memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid));
2187 entry->fds = p->p_fd->fd_nfiles;
2188
2189 absolutetime_to_microtime(get_task_cpu_time(p->task), &tv_sec, &tv_usec);
2190 entry->cpu_time.tv_sec = tv_sec;
2191 entry->cpu_time.tv_usec = tv_usec;
2192
2193 return TRUE;
2194 }
2195
2196 static void
2197 memorystatus_jetsam_snapshot_procs_locked(void)
2198 {
2199 proc_t p, next_p;
2200 unsigned int b = 0, i = 0;
2201 kern_return_t kr = KERN_SUCCESS;
2202
2203 mach_msg_type_number_t count = HOST_VM_INFO64_COUNT;
2204 vm_statistics64_data_t vm_stat;
2205
2206 if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count) != KERN_SUCCESS)) {
2207 printf("memorystatus_jetsam_snapshot_procs_locked: host_statistics64 failed with %d\n", kr);
2208 memset(&memorystatus_jetsam_snapshot->stats, 0, sizeof(memorystatus_jetsam_snapshot->stats));
2209 } else {
2210 memorystatus_jetsam_snapshot->stats.free_pages = vm_stat.free_count;
2211 memorystatus_jetsam_snapshot->stats.active_pages = vm_stat.active_count;
2212 memorystatus_jetsam_snapshot->stats.inactive_pages = vm_stat.inactive_count;
2213 memorystatus_jetsam_snapshot->stats.throttled_pages = vm_stat.throttled_count;
2214 memorystatus_jetsam_snapshot->stats.purgeable_pages = vm_stat.purgeable_count;
2215 memorystatus_jetsam_snapshot->stats.wired_pages = vm_stat.wire_count;
2216
2217 memorystatus_jetsam_snapshot->stats.speculative_pages = vm_stat.speculative_count;
2218 memorystatus_jetsam_snapshot->stats.filebacked_pages = vm_stat.external_page_count;
2219 memorystatus_jetsam_snapshot->stats.anonymous_pages = vm_stat.internal_page_count;
2220 memorystatus_jetsam_snapshot->stats.compressions = vm_stat.compressions;
2221 memorystatus_jetsam_snapshot->stats.decompressions = vm_stat.decompressions;
2222 memorystatus_jetsam_snapshot->stats.compressor_pages = vm_stat.compressor_page_count;
2223 memorystatus_jetsam_snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor;
2224 }
2225
2226 next_p = memorystatus_get_first_proc_locked(&b, TRUE);
2227 while (next_p) {
2228 p = next_p;
2229 next_p = memorystatus_get_next_proc_locked(&b, p, TRUE);
2230
2231 if (FALSE == memorystatus_get_snapshot_properties_for_proc_locked(p, &memorystatus_jetsam_snapshot_list[i])) {
2232 continue;
2233 }
2234
2235 MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
2236 p->p_pid,
2237 p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7],
2238 p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]);
2239
2240 if (++i == memorystatus_jetsam_snapshot_max) {
2241 break;
2242 }
2243 }
2244
2245 memorystatus_jetsam_snapshot->snapshot_time = mach_absolute_time();
2246 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = i;
2247 }
2248
2249 #if DEVELOPMENT || DEBUG
2250
2251 static int
2252 memorystatus_cmd_set_panic_bits(user_addr_t buffer, uint32_t buffer_size) {
2253 int ret;
2254 memorystatus_jetsam_panic_options_t debug;
2255
2256 if (buffer_size != sizeof(memorystatus_jetsam_panic_options_t)) {
2257 return EINVAL;
2258 }
2259
2260 ret = copyin(buffer, &debug, buffer_size);
2261 if (ret) {
2262 return ret;
2263 }
2264
2265 /* Panic bits match kMemorystatusKilled* enum */
2266 memorystatus_jetsam_panic_debug = (memorystatus_jetsam_panic_debug & ~debug.mask) | (debug.data & debug.mask);
2267
2268 /* Copyout new value */
2269 debug.data = memorystatus_jetsam_panic_debug;
2270 ret = copyout(&debug, buffer, sizeof(memorystatus_jetsam_panic_options_t));
2271
2272 return ret;
2273 }
2274
2275 #endif
2276
2277 /*
2278 * Jetsam a specific process.
2279 */
2280 static boolean_t
2281 memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) {
2282 boolean_t killed;
2283 proc_t p;
2284
2285 /* TODO - add a victim queue and push this into the main jetsam thread */
2286
2287 p = proc_find(victim_pid);
2288 if (!p) {
2289 return FALSE;
2290 }
2291
2292 printf("memorystatus: specifically killing pid %d [%s] (%s) - memorystatus_available_pages: %d\n",
2293 victim_pid, (p->p_comm ? p->p_comm : "(unknown)"),
2294 jetsam_kill_cause_name[cause], memorystatus_available_pages);
2295
2296 proc_list_lock();
2297
2298 if (memorystatus_jetsam_snapshot_count == 0) {
2299 memorystatus_jetsam_snapshot_procs_locked();
2300 }
2301
2302 memorystatus_update_snapshot_locked(p, cause);
2303 proc_list_unlock();
2304
2305 killed = memorystatus_do_kill(p, cause);
2306 proc_rele(p);
2307
2308 return killed;
2309 }
2310
2311 /*
2312 * Jetsam the first process in the queue.
2313 */
2314 static boolean_t
2315 memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors)
2316 {
2317 pid_t aPid;
2318 proc_t p = PROC_NULL, next_p = PROC_NULL;
2319 boolean_t new_snapshot = FALSE, killed = FALSE;
2320 unsigned int i = 0;
2321
2322 #ifndef CONFIG_FREEZE
2323 #pragma unused(any)
2324 #endif
2325
2326 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START,
2327 memorystatus_available_pages, 0, 0, 0, 0);
2328
2329 proc_list_lock();
2330
2331 memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND);
2332
2333 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
2334 while (next_p) {
2335 #if DEVELOPMENT || DEBUG
2336 int activeProcess;
2337 int procSuspendedForDiagnosis;
2338 #endif /* DEVELOPMENT || DEBUG */
2339
2340 p = next_p;
2341 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
2342
2343 #if DEVELOPMENT || DEBUG
2344 activeProcess = p->p_memstat_state & P_MEMSTAT_FOREGROUND;
2345 procSuspendedForDiagnosis = p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED;
2346 #endif /* DEVELOPMENT || DEBUG */
2347
2348 aPid = p->p_pid;
2349
2350 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
2351 continue;
2352 }
2353
2354 #if DEVELOPMENT || DEBUG
2355 if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) {
2356 printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid);
2357 continue;
2358 }
2359 #endif /* DEVELOPMENT || DEBUG */
2360
2361 if (cause == kMemorystatusKilledVnodes)
2362 {
2363 /*
2364 * If the system runs out of vnodes, we systematically jetsam
2365 * processes in hopes of stumbling onto a vnode gain that helps
2366 * the system recover. The process that happens to trigger
2367 * this path has no known relationship to the vnode consumption.
2368 * We attempt to safeguard that process e.g: do not jetsam it.
2369 */
2370
2371 if (p == current_proc()) {
2372 /* do not jetsam the current process */
2373 continue;
2374 }
2375 }
2376
2377 #if CONFIG_FREEZE
2378 boolean_t skip;
2379 boolean_t reclaim_proc = !(p->p_memstat_state & (P_MEMSTAT_LOCKED | P_MEMSTAT_NORECLAIM));
2380 if (any || reclaim_proc) {
2381 skip = FALSE;
2382 } else {
2383 skip = TRUE;
2384 }
2385
2386 if (skip) {
2387 continue;
2388 } else
2389 #endif
2390 {
2391 if (priority) {
2392 *priority = p->p_memstat_effectivepriority;
2393 }
2394
2395 /*
2396 * Capture a snapshot if none exists and:
2397 * - priority was not requested (this is something other than an ambient kill)
2398 * - the priority was requested *and* the targeted process is not at idle priority
2399 */
2400 if ((memorystatus_jetsam_snapshot_count == 0) &&
2401 (memorystatus_idle_snapshot || ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE))))) {
2402 memorystatus_jetsam_snapshot_procs_locked();
2403 new_snapshot = TRUE;
2404 }
2405
2406 /*
2407 * Mark as terminated so that if exit1() indicates success, but the process (for example)
2408 * is blocked in task_exception_notify(), it'll be skipped if encountered again - see
2409 * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the
2410 * acquisition of the proc lock.
2411 */
2412 p->p_memstat_state |= P_MEMSTAT_TERMINATED;
2413
2414 #if DEVELOPMENT || DEBUG
2415 if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) {
2416 MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n",
2417 aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level);
2418 memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic);
2419 p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED;
2420 if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) {
2421 jetsam_diagnostic_suspended_one_active_proc = 1;
2422 printf("jetsam: returning after suspending first active proc - %d\n", aPid);
2423 }
2424
2425 p = proc_ref_locked(p);
2426 proc_list_unlock();
2427 if (p) {
2428 task_suspend(p->task);
2429 proc_rele(p);
2430 killed = TRUE;
2431 }
2432
2433 goto exit;
2434 } else
2435 #endif /* DEVELOPMENT || DEBUG */
2436 {
2437 /* Shift queue, update stats */
2438 memorystatus_update_snapshot_locked(p, cause);
2439
2440 p = proc_ref_locked(p);
2441 proc_list_unlock();
2442 if (p) {
2443 printf("memorystatus: %s %d [%s] (%s) - memorystatus_available_pages: %d\n",
2444 ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE) ?
2445 "idle exiting pid" : "jetsam killing pid"),
2446 aPid, (p->p_comm ? p->p_comm : "(unknown)"),
2447 jetsam_kill_cause_name[cause], memorystatus_available_pages);
2448 killed = memorystatus_do_kill(p, cause);
2449 }
2450
2451 /* Success? */
2452 if (killed) {
2453 proc_rele(p);
2454 goto exit;
2455 }
2456
2457 /* Failure - unwind and restart. */
2458 proc_list_lock();
2459 proc_rele_locked(p);
2460 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
2461 p->p_memstat_state |= P_MEMSTAT_ERROR;
2462 *errors += 1;
2463 i = 0;
2464 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
2465 }
2466 }
2467 }
2468
2469 proc_list_unlock();
2470
2471 exit:
2472 /* Clear snapshot if freshly captured and no target was found */
2473 if (new_snapshot && !killed) {
2474 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
2475 }
2476
2477 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END,
2478 memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0);
2479
2480 return killed;
2481 }
2482
2483 #if LEGACY_HIWATER
2484
2485 static boolean_t
2486 memorystatus_kill_hiwat_proc(uint32_t *errors)
2487 {
2488 pid_t aPid = 0;
2489 proc_t p = PROC_NULL, next_p = PROC_NULL;
2490 boolean_t new_snapshot = FALSE, killed = FALSE;
2491 unsigned int i = 0;
2492
2493 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START,
2494 memorystatus_available_pages, 0, 0, 0, 0);
2495
2496 proc_list_lock();
2497 memorystatus_sort_by_largest_process_locked(JETSAM_PRIORITY_FOREGROUND);
2498
2499 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
2500 while (next_p) {
2501 uint32_t footprint;
2502 boolean_t skip;
2503
2504 p = next_p;
2505 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
2506
2507 aPid = p->p_pid;
2508
2509 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
2510 continue;
2511 }
2512
2513 /* skip if no limit set */
2514 if (p->p_memstat_memlimit <= 0) {
2515 continue;
2516 }
2517
2518 /* skip if a currently inapplicable limit is encountered */
2519 if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) {
2520 continue;
2521 }
2522
2523 footprint = (uint32_t)(get_task_phys_footprint(p->task) / (1024 * 1024));
2524 skip = (((int32_t)footprint) <= p->p_memstat_memlimit);
2525 #if DEVELOPMENT || DEBUG
2526 if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) {
2527 if (p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED) {
2528 continue;
2529 }
2530 }
2531 #endif /* DEVELOPMENT || DEBUG */
2532
2533 #if CONFIG_FREEZE
2534 if (!skip) {
2535 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2536 skip = TRUE;
2537 } else {
2538 skip = FALSE;
2539 }
2540 }
2541 #endif
2542
2543 if (skip) {
2544 continue;
2545 } else {
2546 MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d Mb > 1 (%d Mb)\n",
2547 (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, footprint, p->p_memstat_memlimit);
2548
2549 if (memorystatus_jetsam_snapshot_count == 0) {
2550 memorystatus_jetsam_snapshot_procs_locked();
2551 new_snapshot = TRUE;
2552 }
2553
2554 p->p_memstat_state |= P_MEMSTAT_TERMINATED;
2555
2556 #if DEVELOPMENT || DEBUG
2557 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) {
2558 MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages);
2559 memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic);
2560 p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED;
2561
2562 p = proc_ref_locked(p);
2563 proc_list_unlock();
2564 if (p) {
2565 task_suspend(p->task);
2566 proc_rele(p);
2567 killed = TRUE;
2568 }
2569
2570 goto exit;
2571 } else
2572 #endif /* DEVELOPMENT || DEBUG */
2573 {
2574 memorystatus_update_snapshot_locked(p, kMemorystatusKilledHiwat);
2575
2576 p = proc_ref_locked(p);
2577 proc_list_unlock();
2578 if (p) {
2579 printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n",
2580 aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages);
2581 killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat);
2582 }
2583
2584 /* Success? */
2585 if (killed) {
2586 proc_rele(p);
2587 goto exit;
2588 }
2589
2590 /* Failure - unwind and restart. */
2591 proc_list_lock();
2592 proc_rele_locked(p);
2593 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
2594 p->p_memstat_state |= P_MEMSTAT_ERROR;
2595 *errors += 1;
2596 i = 0;
2597 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
2598 }
2599 }
2600 }
2601
2602 proc_list_unlock();
2603
2604 exit:
2605 /* Clear snapshot if freshly captured and no target was found */
2606 if (new_snapshot && !killed) {
2607 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
2608 }
2609
2610 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END,
2611 memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0);
2612
2613 return killed;
2614 }
2615
2616 #endif /* LEGACY_HIWATER */
2617
2618 static boolean_t
2619 memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause) {
2620 /* TODO: allow a general async path */
2621 if ((victim_pid != -1) || (cause != kMemorystatusKilledVMPageShortage && cause != kMemorystatusKilledVMThrashing &&
2622 cause != kMemorystatusKilledFCThrashing)) {
2623 return FALSE;
2624 }
2625
2626 kill_under_pressure_cause = cause;
2627 memorystatus_thread_wake();
2628 return TRUE;
2629 }
2630
2631 static boolean_t
2632 memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) {
2633 boolean_t res;
2634 uint32_t errors = 0;
2635
2636 if (victim_pid == -1) {
2637 /* No pid, so kill first process */
2638 res = memorystatus_kill_top_process(TRUE, cause, NULL, &errors);
2639 } else {
2640 res = memorystatus_kill_specific_process(victim_pid, cause);
2641 }
2642
2643 if (errors) {
2644 memorystatus_clear_errors();
2645 }
2646
2647 if (res == TRUE) {
2648 /* Fire off snapshot notification */
2649 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
2650 sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count;
2651 memorystatus_jetsam_snapshot->notification_time = mach_absolute_time();
2652 memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
2653 }
2654
2655 return res;
2656 }
2657
2658 boolean_t
2659 memorystatus_kill_on_VM_page_shortage(boolean_t async) {
2660 if (async) {
2661 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMPageShortage);
2662 } else {
2663 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMPageShortage);
2664 }
2665 }
2666
2667 boolean_t
2668 memorystatus_kill_on_VM_thrashing(boolean_t async) {
2669 if (async) {
2670 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMThrashing);
2671 } else {
2672 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMThrashing);
2673 }
2674 }
2675
2676 boolean_t
2677 memorystatus_kill_on_FC_thrashing(boolean_t async) {
2678 if (async) {
2679 return memorystatus_kill_process_async(-1, kMemorystatusKilledFCThrashing);
2680 } else {
2681 return memorystatus_kill_process_sync(-1, kMemorystatusKilledFCThrashing);
2682 }
2683 }
2684
2685 boolean_t
2686 memorystatus_kill_on_vnode_limit(void) {
2687 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes);
2688 }
2689
2690 #endif /* CONFIG_JETSAM */
2691
2692 #if CONFIG_FREEZE
2693
2694 __private_extern__ void
2695 memorystatus_freeze_init(void)
2696 {
2697 kern_return_t result;
2698 thread_t thread;
2699
2700 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
2701 if (result == KERN_SUCCESS) {
2702 thread_deallocate(thread);
2703 } else {
2704 panic("Could not create memorystatus_freeze_thread");
2705 }
2706 }
2707
2708 static int
2709 memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low)
2710 {
2711 pid_t aPid = 0;
2712 int ret = -1;
2713 proc_t p = PROC_NULL, next_p = PROC_NULL;
2714 unsigned int i = 0;
2715
2716 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
2717 memorystatus_available_pages, 0, 0, 0, 0);
2718
2719 proc_list_lock();
2720
2721 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
2722 while (next_p) {
2723 kern_return_t kr;
2724 uint32_t purgeable, wired, clean, dirty;
2725 boolean_t shared;
2726 uint32_t pages;
2727 uint32_t max_pages = 0;
2728 uint32_t state;
2729
2730 p = next_p;
2731 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
2732
2733 aPid = p->p_pid;
2734 state = p->p_memstat_state;
2735
2736 /* Ensure the process is eligible for freezing */
2737 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FROZEN)) || !(state & P_MEMSTAT_SUSPENDED)) {
2738 continue; // with lock held
2739 }
2740
2741 /* Only freeze processes meeting our minimum resident page criteria */
2742 memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL, NULL);
2743 if (pages < memorystatus_freeze_pages_min) {
2744 continue; // with lock held
2745 }
2746
2747 if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPBACKED) {
2748 /* Ensure there's enough free space to freeze this process. */
2749 max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max);
2750 if (max_pages < memorystatus_freeze_pages_min) {
2751 *memorystatus_freeze_swap_low = TRUE;
2752 proc_list_unlock();
2753 goto exit;
2754 }
2755 } else {
2756 max_pages = UINT32_MAX - 1;
2757 }
2758
2759 /* Mark as locked temporarily to avoid kill */
2760 p->p_memstat_state |= P_MEMSTAT_LOCKED;
2761
2762 p = proc_ref_locked(p);
2763 proc_list_unlock();
2764 if (!p) {
2765 goto exit;
2766 }
2767
2768 kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE);
2769
2770 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
2771 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n",
2772 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"),
2773 memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free());
2774
2775 proc_list_lock();
2776 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
2777
2778 /* Success? */
2779 if (KERN_SUCCESS == kr) {
2780 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
2781
2782 memorystatus_frozen_count++;
2783
2784 p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM));
2785
2786 /* Update stats */
2787 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
2788 throttle_intervals[i].pageouts += dirty;
2789 }
2790
2791 memorystatus_freeze_pageouts += dirty;
2792 memorystatus_freeze_count++;
2793
2794 proc_list_unlock();
2795
2796 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
2797
2798 /* Return the number of reclaimed pages */
2799 ret = dirty;
2800
2801 } else {
2802 proc_list_unlock();
2803 }
2804
2805 proc_rele(p);
2806 goto exit;
2807 }
2808
2809 proc_list_unlock();
2810
2811 exit:
2812 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
2813 memorystatus_available_pages, aPid, 0, 0, 0);
2814
2815 return ret;
2816 }
2817
2818 static inline boolean_t
2819 memorystatus_can_freeze_processes(void)
2820 {
2821 boolean_t ret;
2822
2823 proc_list_lock();
2824
2825 if (memorystatus_suspended_count) {
2826 uint32_t average_resident_pages, estimated_processes;
2827
2828 /* Estimate the number of suspended processes we can fit */
2829 average_resident_pages = memorystatus_suspended_footprint_total / memorystatus_suspended_count;
2830 estimated_processes = memorystatus_suspended_count +
2831 ((memorystatus_available_pages - memorystatus_available_pages_critical) / average_resident_pages);
2832
2833 /* If it's predicted that no freeze will occur, lower the threshold temporarily */
2834 if (estimated_processes <= FREEZE_SUSPENDED_THRESHOLD_DEFAULT) {
2835 memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_LOW;
2836 } else {
2837 memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
2838 }
2839
2840 MEMORYSTATUS_DEBUG(1, "memorystatus_can_freeze_processes: %d suspended processes, %d average resident pages / process, %d suspended processes estimated\n",
2841 memorystatus_suspended_count, average_resident_pages, estimated_processes);
2842
2843 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2844 ret = TRUE;
2845 } else {
2846 ret = FALSE;
2847 }
2848 } else {
2849 ret = FALSE;
2850 }
2851
2852 proc_list_unlock();
2853
2854 return ret;
2855 }
2856
2857 static boolean_t
2858 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2859 {
2860 /* Only freeze if we're sufficiently low on memory; this holds off freeze right
2861 after boot, and is generally is a no-op once we've reached steady state. */
2862 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2863 return FALSE;
2864 }
2865
2866 /* Check minimum suspended process threshold. */
2867 if (!memorystatus_can_freeze_processes()) {
2868 return FALSE;
2869 }
2870
2871 /* Is swap running low? */
2872 if (*memorystatus_freeze_swap_low) {
2873 /* If there's been no movement in free swap pages since we last attempted freeze, return. */
2874 if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) {
2875 return FALSE;
2876 }
2877
2878 /* Pages have been freed - we can retry. */
2879 *memorystatus_freeze_swap_low = FALSE;
2880 }
2881
2882 /* OK */
2883 return TRUE;
2884 }
2885
2886 static void
2887 memorystatus_freeze_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval)
2888 {
2889 if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) {
2890 if (!interval->max_pageouts) {
2891 interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * FREEZE_DAILY_PAGEOUTS_MAX) / (24 * 60)));
2892 } else {
2893 printf("memorystatus_freeze_update_throttle_interval: %d minute throttle timeout, resetting\n", interval->mins);
2894 }
2895 interval->ts.tv_sec = interval->mins * 60;
2896 interval->ts.tv_nsec = 0;
2897 ADD_MACH_TIMESPEC(&interval->ts, ts);
2898 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2899 if (interval->pageouts > interval->max_pageouts) {
2900 interval->pageouts -= interval->max_pageouts;
2901 } else {
2902 interval->pageouts = 0;
2903 }
2904 interval->throttle = FALSE;
2905 } else if (!interval->throttle && interval->pageouts >= interval->max_pageouts) {
2906 printf("memorystatus_freeze_update_throttle_interval: %d minute pageout limit exceeded; enabling throttle\n", interval->mins);
2907 interval->throttle = TRUE;
2908 }
2909
2910 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
2911 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60,
2912 interval->throttle ? "on" : "off");
2913 }
2914
2915 static boolean_t
2916 memorystatus_freeze_update_throttle(void)
2917 {
2918 clock_sec_t sec;
2919 clock_nsec_t nsec;
2920 mach_timespec_t ts;
2921 uint32_t i;
2922 boolean_t throttled = FALSE;
2923
2924 #if DEVELOPMENT || DEBUG
2925 if (!memorystatus_freeze_throttle_enabled)
2926 return FALSE;
2927 #endif
2928
2929 clock_get_system_nanotime(&sec, &nsec);
2930 ts.tv_sec = sec;
2931 ts.tv_nsec = nsec;
2932
2933 /* Check freeze pageouts over multiple intervals and throttle if we've exceeded our budget.
2934 *
2935 * This ensures that periods of inactivity can't be used as 'credit' towards freeze if the device has
2936 * remained dormant for a long period. We do, however, allow increased thresholds for shorter intervals in
2937 * order to allow for bursts of activity.
2938 */
2939 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
2940 memorystatus_freeze_update_throttle_interval(&ts, &throttle_intervals[i]);
2941 if (throttle_intervals[i].throttle == TRUE)
2942 throttled = TRUE;
2943 }
2944
2945 return throttled;
2946 }
2947
2948 static void
2949 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2950 {
2951 static boolean_t memorystatus_freeze_swap_low = FALSE;
2952
2953 if (memorystatus_freeze_enabled) {
2954 if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2955 /* Only freeze if we've not exceeded our pageout budgets or we're not backed by swap. */
2956 if (DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS ||
2957 !memorystatus_freeze_update_throttle()) {
2958 memorystatus_freeze_top_process(&memorystatus_freeze_swap_low);
2959 } else {
2960 printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n");
2961 memorystatus_freeze_throttle_count++; /* Throttled, update stats */
2962 }
2963 }
2964 }
2965
2966 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2967 thread_block((thread_continue_t) memorystatus_freeze_thread);
2968 }
2969
2970 #endif /* CONFIG_FREEZE */
2971
2972 #if VM_PRESSURE_EVENTS
2973
2974 #if CONFIG_MEMORYSTATUS
2975
2976 static int
2977 memorystatus_send_note(int event_code, void *data, size_t data_length) {
2978 int ret;
2979 struct kev_msg ev_msg;
2980
2981 ev_msg.vendor_code = KEV_VENDOR_APPLE;
2982 ev_msg.kev_class = KEV_SYSTEM_CLASS;
2983 ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS;
2984
2985 ev_msg.event_code = event_code;
2986
2987 ev_msg.dv[0].data_length = data_length;
2988 ev_msg.dv[0].data_ptr = data;
2989 ev_msg.dv[1].data_length = 0;
2990
2991 ret = kev_post_msg(&ev_msg);
2992 if (ret) {
2993 printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
2994 }
2995
2996 return ret;
2997 }
2998
2999 boolean_t
3000 memorystatus_warn_process(pid_t pid, boolean_t critical) {
3001
3002 boolean_t ret = FALSE;
3003 struct knote *kn = NULL;
3004
3005 /*
3006 * See comment in sysctl_memorystatus_vm_pressure_send.
3007 */
3008
3009 memorystatus_klist_lock();
3010 kn = vm_find_knote_from_pid(pid, &memorystatus_klist);
3011 if (kn) {
3012 /*
3013 * By setting the "fflags" here, we are forcing
3014 * a process to deal with the case where it's
3015 * bumping up into its memory limits. If we don't
3016 * do this here, we will end up depending on the
3017 * system pressure snapshot evaluation in
3018 * filt_memorystatus().
3019 */
3020
3021 if (critical) {
3022 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
3023 } else {
3024 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN;
3025 }
3026 KNOTE(&memorystatus_klist, kMemorystatusPressure);
3027 ret = TRUE;
3028 } else {
3029 if (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0) {
3030 ret = TRUE;
3031 }
3032 }
3033 memorystatus_klist_unlock();
3034
3035 return ret;
3036 }
3037
3038 int
3039 memorystatus_send_pressure_note(pid_t pid) {
3040 MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid);
3041 return memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid));
3042 }
3043
3044 void
3045 memorystatus_send_low_swap_note(void) {
3046
3047 struct knote *kn = NULL;
3048
3049 memorystatus_klist_lock();
3050 SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) {
3051 if (is_knote_registered_modify_task_pressure_bits(kn, NOTE_MEMORYSTATUS_LOW_SWAP, NULL, 0, 0) == TRUE) {
3052 KNOTE(&memorystatus_klist, kMemorystatusLowSwap);
3053 }
3054 }
3055 memorystatus_klist_unlock();
3056 }
3057
3058 boolean_t
3059 memorystatus_bg_pressure_eligible(proc_t p) {
3060 boolean_t eligible = FALSE;
3061
3062 proc_list_lock();
3063
3064 MEMORYSTATUS_DEBUG(1, "memorystatus_bg_pressure_eligible: pid %d, state 0x%x\n", p->p_pid, p->p_memstat_state);
3065
3066 /* Foreground processes have already been dealt with at this point, so just test for eligibility */
3067 if (!(p->p_memstat_state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN))) {
3068 eligible = TRUE;
3069 }
3070
3071 proc_list_unlock();
3072
3073 return eligible;
3074 }
3075
3076 boolean_t
3077 memorystatus_is_foreground_locked(proc_t p) {
3078 return ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND) ||
3079 (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND_SUPPORT));
3080 }
3081 #endif /* CONFIG_MEMORYSTATUS */
3082
3083 /*
3084 * Trigger levels to test the mechanism.
3085 * Can be used via a sysctl.
3086 */
3087 #define TEST_LOW_MEMORY_TRIGGER_ONE 1
3088 #define TEST_LOW_MEMORY_TRIGGER_ALL 2
3089 #define TEST_PURGEABLE_TRIGGER_ONE 3
3090 #define TEST_PURGEABLE_TRIGGER_ALL 4
3091 #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE 5
3092 #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL 6
3093
3094 boolean_t memorystatus_manual_testing_on = FALSE;
3095 vm_pressure_level_t memorystatus_manual_testing_level = kVMPressureNormal;
3096
3097 extern struct knote *
3098 vm_pressure_select_optimal_candidate_to_notify(struct klist *, int, boolean_t);
3099
3100 extern
3101 kern_return_t vm_pressure_notification_without_levels(boolean_t);
3102
3103 extern void vm_pressure_klist_lock(void);
3104 extern void vm_pressure_klist_unlock(void);
3105
3106 extern void vm_reset_active_list(void);
3107
3108 extern void delay(int);
3109
3110 #define INTER_NOTIFICATION_DELAY (250000) /* .25 second */
3111
3112 void memorystatus_on_pageout_scan_end(void) {
3113 /* No-op */
3114 }
3115
3116 /*
3117 * kn_max - knote
3118 *
3119 * knote_pressure_level - to check if the knote is registered for this notification level.
3120 *
3121 * task - task whose bits we'll be modifying
3122 *
3123 * pressure_level_to_clear - if the task has been notified of this past level, clear that notification bit so that if/when we revert to that level, the task will be notified again.
3124 *
3125 * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately.
3126 *
3127 */
3128
3129 boolean_t
3130 is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pressure_level, task_t task, vm_pressure_level_t pressure_level_to_clear, vm_pressure_level_t pressure_level_to_set)
3131 {
3132 if (kn_max->kn_sfflags & knote_pressure_level) {
3133
3134 if (task_has_been_notified(task, pressure_level_to_clear) == TRUE) {
3135
3136 task_clear_has_been_notified(task, pressure_level_to_clear);
3137 }
3138
3139 task_mark_has_been_notified(task, pressure_level_to_set);
3140 return TRUE;
3141 }
3142
3143 return FALSE;
3144 }
3145
3146 extern kern_return_t vm_pressure_notify_dispatch_vm_clients(boolean_t target_foreground_process);
3147
3148 #define VM_PRESSURE_DECREASED_SMOOTHING_PERIOD 5000 /* milliseconds */
3149
3150 kern_return_t
3151 memorystatus_update_vm_pressure(boolean_t target_foreground_process)
3152 {
3153 struct knote *kn_max = NULL;
3154 pid_t target_pid = -1;
3155 struct klist dispatch_klist = { NULL };
3156 proc_t target_proc = PROC_NULL;
3157 struct task *task = NULL;
3158 boolean_t found_candidate = FALSE;
3159
3160 static vm_pressure_level_t level_snapshot = kVMPressureNormal;
3161 static vm_pressure_level_t prev_level_snapshot = kVMPressureNormal;
3162 boolean_t smoothing_window_started = FALSE;
3163 struct timeval smoothing_window_start_tstamp = {0, 0};
3164 struct timeval curr_tstamp = {0, 0};
3165 int elapsed_msecs = 0;
3166
3167 #if !CONFIG_JETSAM
3168 #define MAX_IDLE_KILLS 100 /* limit the number of idle kills allowed */
3169
3170 int idle_kill_counter = 0;
3171
3172 /*
3173 * On desktop we take this opportunity to free up memory pressure
3174 * by immediately killing idle exitable processes. We use a delay
3175 * to avoid overkill. And we impose a max counter as a fail safe
3176 * in case daemons re-launch too fast.
3177 */
3178 while ((memorystatus_vm_pressure_level != kVMPressureNormal) && (idle_kill_counter < MAX_IDLE_KILLS)) {
3179 if (memorystatus_idle_exit_from_VM() == FALSE) {
3180 /* No idle exitable processes left to kill */
3181 break;
3182 }
3183 idle_kill_counter++;
3184 delay(1000000); /* 1 second */
3185 }
3186 #endif /* !CONFIG_JETSAM */
3187
3188 while (1) {
3189
3190 /*
3191 * There is a race window here. But it's not clear
3192 * how much we benefit from having extra synchronization.
3193 */
3194 level_snapshot = memorystatus_vm_pressure_level;
3195
3196 if (prev_level_snapshot > level_snapshot) {
3197 /*
3198 * Pressure decreased? Let's take a little breather
3199 * and see if this condition stays.
3200 */
3201 if (smoothing_window_started == FALSE) {
3202
3203 smoothing_window_started = TRUE;
3204 microuptime(&smoothing_window_start_tstamp);
3205 }
3206
3207 microuptime(&curr_tstamp);
3208 timevalsub(&curr_tstamp, &smoothing_window_start_tstamp);
3209 elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000;
3210
3211 if (elapsed_msecs < VM_PRESSURE_DECREASED_SMOOTHING_PERIOD) {
3212
3213 delay(INTER_NOTIFICATION_DELAY);
3214 continue;
3215 }
3216 }
3217
3218 prev_level_snapshot = level_snapshot;
3219 smoothing_window_started = FALSE;
3220
3221 memorystatus_klist_lock();
3222 kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot, target_foreground_process);
3223
3224 if (kn_max == NULL) {
3225 memorystatus_klist_unlock();
3226
3227 /*
3228 * No more level-based clients to notify.
3229 * Try the non-level based notification clients.
3230 *
3231 * However, these non-level clients don't understand
3232 * the "return-to-normal" notification.
3233 *
3234 * So don't consider them for those notifications. Just
3235 * return instead.
3236 *
3237 */
3238
3239 if (level_snapshot != kVMPressureNormal) {
3240 goto try_dispatch_vm_clients;
3241 } else {
3242 return KERN_FAILURE;
3243 }
3244 }
3245
3246 target_proc = kn_max->kn_kq->kq_p;
3247
3248 proc_list_lock();
3249 if (target_proc != proc_ref_locked(target_proc)) {
3250 target_proc = PROC_NULL;
3251 proc_list_unlock();
3252 memorystatus_klist_unlock();
3253 continue;
3254 }
3255 proc_list_unlock();
3256 memorystatus_klist_unlock();
3257
3258 target_pid = target_proc->p_pid;
3259
3260 task = (struct task *)(target_proc->task);
3261
3262 if (level_snapshot != kVMPressureNormal) {
3263
3264 if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) {
3265
3266 if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_WARN, task, kVMPressureCritical, kVMPressureWarning) == TRUE) {
3267 found_candidate = TRUE;
3268 }
3269 } else {
3270 if (level_snapshot == kVMPressureCritical) {
3271
3272 if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_CRITICAL, task, kVMPressureWarning, kVMPressureCritical) == TRUE) {
3273 found_candidate = TRUE;
3274 }
3275 }
3276 }
3277 } else {
3278 if (kn_max->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
3279
3280 task_clear_has_been_notified(task, kVMPressureWarning);
3281 task_clear_has_been_notified(task, kVMPressureCritical);
3282
3283 found_candidate = TRUE;
3284 }
3285 }
3286
3287 if (found_candidate == FALSE) {
3288 continue;
3289 }
3290
3291 memorystatus_klist_lock();
3292 KNOTE_DETACH(&memorystatus_klist, kn_max);
3293 KNOTE_ATTACH(&dispatch_klist, kn_max);
3294 memorystatus_klist_unlock();
3295
3296 KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure);
3297
3298 memorystatus_klist_lock();
3299 KNOTE_DETACH(&dispatch_klist, kn_max);
3300 KNOTE_ATTACH(&memorystatus_klist, kn_max);
3301 memorystatus_klist_unlock();
3302
3303 microuptime(&target_proc->vm_pressure_last_notify_tstamp);
3304 proc_rele(target_proc);
3305
3306 if (memorystatus_manual_testing_on == TRUE && target_foreground_process == TRUE) {
3307 break;
3308 }
3309
3310 try_dispatch_vm_clients:
3311 if (kn_max == NULL && level_snapshot != kVMPressureNormal) {
3312 /*
3313 * We will exit this loop when we are done with
3314 * notification clients (level and non-level based).
3315 */
3316 if ((vm_pressure_notify_dispatch_vm_clients(target_foreground_process) == KERN_FAILURE) && (kn_max == NULL)) {
3317 /*
3318 * kn_max == NULL i.e. we didn't find any eligible clients for the level-based notifications
3319 * AND
3320 * we have failed to find any eligible clients for the non-level based notifications too.
3321 * So, we are done.
3322 */
3323
3324 return KERN_FAILURE;
3325 }
3326 }
3327
3328 /*
3329 * LD: This block of code below used to be invoked in the older memory notification scheme on embedded everytime
3330 * a process was sent a memory pressure notification. The "memorystatus_klist" list was used to hold these
3331 * privileged listeners. But now we have moved to the newer scheme and are trying to move away from the extra
3332 * notifications. So the code is here in case we break compat. and need to send out notifications to the privileged
3333 * apps.
3334 */
3335 #if 0
3336 #endif /* 0 */
3337
3338 if (memorystatus_manual_testing_on == TRUE) {
3339 /*
3340 * Testing out the pressure notification scheme.
3341 * No need for delays etc.
3342 */
3343 } else {
3344
3345 uint32_t sleep_interval = INTER_NOTIFICATION_DELAY;
3346 #if CONFIG_JETSAM
3347 unsigned int page_delta = 0;
3348 unsigned int skip_delay_page_threshold = 0;
3349
3350 assert(memorystatus_available_pages_pressure >= memorystatus_available_pages_critical_base);
3351
3352 page_delta = (memorystatus_available_pages_pressure - memorystatus_available_pages_critical_base) / 2;
3353 skip_delay_page_threshold = memorystatus_available_pages_pressure - page_delta;
3354
3355 if (memorystatus_available_pages <= skip_delay_page_threshold) {
3356 /*
3357 * We are nearing the critcal mark fast and can't afford to wait between
3358 * notifications.
3359 */
3360 sleep_interval = 0;
3361 }
3362 #endif /* CONFIG_JETSAM */
3363
3364 if (sleep_interval) {
3365 delay(sleep_interval);
3366 }
3367 }
3368 }
3369
3370 return KERN_SUCCESS;
3371 }
3372
3373 vm_pressure_level_t
3374 convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t);
3375
3376 vm_pressure_level_t
3377 convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t internal_pressure_level)
3378 {
3379 vm_pressure_level_t dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
3380
3381 switch (internal_pressure_level) {
3382
3383 case kVMPressureNormal:
3384 {
3385 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
3386 break;
3387 }
3388
3389 case kVMPressureWarning:
3390 case kVMPressureUrgent:
3391 {
3392 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_WARN;
3393 break;
3394 }
3395
3396 case kVMPressureCritical:
3397 {
3398 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
3399 break;
3400 }
3401
3402 default:
3403 break;
3404 }
3405
3406 return dispatch_level;
3407 }
3408
3409 static int
3410 sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS
3411 {
3412 #pragma unused(arg1, arg2, oidp)
3413 vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level);
3414
3415 return SYSCTL_OUT(req, &dispatch_level, sizeof(dispatch_level));
3416 }
3417
3418 #if DEBUG || DEVELOPMENT
3419
3420 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED,
3421 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", "");
3422
3423 #else /* DEBUG || DEVELOPMENT */
3424
3425 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED,
3426 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", "");
3427
3428 #endif /* DEBUG || DEVELOPMENT */
3429
3430 extern int memorystatus_purge_on_warning;
3431 extern int memorystatus_purge_on_critical;
3432
3433 static int
3434 sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS
3435 {
3436 #pragma unused(arg1, arg2)
3437
3438 int level = 0;
3439 int error = 0;
3440 int pressure_level = 0;
3441 int trigger_request = 0;
3442 int force_purge;
3443
3444 error = sysctl_handle_int(oidp, &level, 0, req);
3445 if (error || !req->newptr) {
3446 return (error);
3447 }
3448
3449 memorystatus_manual_testing_on = TRUE;
3450
3451 trigger_request = (level >> 16) & 0xFFFF;
3452 pressure_level = (level & 0xFFFF);
3453
3454 if (trigger_request < TEST_LOW_MEMORY_TRIGGER_ONE ||
3455 trigger_request > TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL) {
3456 return EINVAL;
3457 }
3458 switch (pressure_level) {
3459 case NOTE_MEMORYSTATUS_PRESSURE_NORMAL:
3460 case NOTE_MEMORYSTATUS_PRESSURE_WARN:
3461 case NOTE_MEMORYSTATUS_PRESSURE_CRITICAL:
3462 break;
3463 default:
3464 return EINVAL;
3465 }
3466
3467 /*
3468 * The pressure level is being set from user-space.
3469 * And user-space uses the constants in sys/event.h
3470 * So we translate those events to our internal levels here.
3471 */
3472 if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
3473
3474 memorystatus_manual_testing_level = kVMPressureNormal;
3475 force_purge = 0;
3476
3477 } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_WARN) {
3478
3479 memorystatus_manual_testing_level = kVMPressureWarning;
3480 force_purge = memorystatus_purge_on_warning;
3481
3482 } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) {
3483
3484 memorystatus_manual_testing_level = kVMPressureCritical;
3485 force_purge = memorystatus_purge_on_critical;
3486 }
3487
3488 memorystatus_vm_pressure_level = memorystatus_manual_testing_level;
3489
3490 /* purge according to the new pressure level */
3491 switch (trigger_request) {
3492 case TEST_PURGEABLE_TRIGGER_ONE:
3493 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE:
3494 if (force_purge == 0) {
3495 /* no purging requested */
3496 break;
3497 }
3498 vm_purgeable_object_purge_one_unlocked(force_purge);
3499 break;
3500 case TEST_PURGEABLE_TRIGGER_ALL:
3501 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL:
3502 if (force_purge == 0) {
3503 /* no purging requested */
3504 break;
3505 }
3506 while (vm_purgeable_object_purge_one_unlocked(force_purge));
3507 break;
3508 }
3509
3510 if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ONE) ||
3511 (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE)) {
3512
3513 memorystatus_update_vm_pressure(TRUE);
3514 }
3515
3516 if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ALL) ||
3517 (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL)) {
3518
3519 while (memorystatus_update_vm_pressure(FALSE) == KERN_SUCCESS) {
3520 continue;
3521 }
3522 }
3523
3524 if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
3525 memorystatus_manual_testing_on = FALSE;
3526
3527 vm_pressure_klist_lock();
3528 vm_reset_active_list();
3529 vm_pressure_klist_unlock();
3530 } else {
3531
3532 vm_pressure_klist_lock();
3533 vm_pressure_notification_without_levels(FALSE);
3534 vm_pressure_klist_unlock();
3535 }
3536
3537 return 0;
3538 }
3539
3540 SYSCTL_PROC(_kern, OID_AUTO, memorypressure_manual_trigger, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
3541 0, 0, &sysctl_memorypressure_manual_trigger, "I", "");
3542
3543
3544 extern int memorystatus_purge_on_warning;
3545 extern int memorystatus_purge_on_urgent;
3546 extern int memorystatus_purge_on_critical;
3547
3548 SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_warning, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_warning, 0, "");
3549 SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_urgent, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_urgent, 0, "");
3550 SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_critical, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_critical, 0, "");
3551
3552
3553 #endif /* VM_PRESSURE_EVENTS */
3554
3555 /* Return both allocated and actual size, since there's a race between allocation and list compilation */
3556 static int
3557 memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t *buffer_size, size_t *list_size, boolean_t size_only)
3558 {
3559 uint32_t list_count, i = 0;
3560 memorystatus_priority_entry_t *list_entry;
3561 proc_t p;
3562
3563 list_count = memorystatus_list_count;
3564 *list_size = sizeof(memorystatus_priority_entry_t) * list_count;
3565
3566 /* Just a size check? */
3567 if (size_only) {
3568 return 0;
3569 }
3570
3571 /* Otherwise, validate the size of the buffer */
3572 if (*buffer_size < *list_size) {
3573 return EINVAL;
3574 }
3575
3576 *list_ptr = (memorystatus_priority_entry_t*)kalloc(*list_size);
3577 if (!list_ptr) {
3578 return ENOMEM;
3579 }
3580
3581 memset(*list_ptr, 0, *list_size);
3582
3583 *buffer_size = *list_size;
3584 *list_size = 0;
3585
3586 list_entry = *list_ptr;
3587
3588 proc_list_lock();
3589
3590 p = memorystatus_get_first_proc_locked(&i, TRUE);
3591 while (p && (*list_size < *buffer_size)) {
3592 list_entry->pid = p->p_pid;
3593 list_entry->priority = p->p_memstat_effectivepriority;
3594 list_entry->user_data = p->p_memstat_userdata;
3595 #if LEGACY_HIWATER
3596 if (((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) ||
3597 (p->p_memstat_memlimit <= 0)) {
3598 task_get_phys_footprint_limit(p->task, &list_entry->limit);
3599 } else {
3600 list_entry->limit = p->p_memstat_memlimit;
3601 }
3602 #else
3603 task_get_phys_footprint_limit(p->task, &list_entry->limit);
3604 #endif
3605 list_entry->state = memorystatus_build_state(p);
3606 list_entry++;
3607
3608 *list_size += sizeof(memorystatus_priority_entry_t);
3609
3610 p = memorystatus_get_next_proc_locked(&i, p, TRUE);
3611 }
3612
3613 proc_list_unlock();
3614
3615 MEMORYSTATUS_DEBUG(1, "memorystatus_get_priority_list: returning %lu for size\n", (unsigned long)*list_size);
3616
3617 return 0;
3618 }
3619
3620 static int
3621 memorystatus_cmd_get_priority_list(user_addr_t buffer, size_t buffer_size, int32_t *retval) {
3622 int error = EINVAL;
3623 boolean_t size_only;
3624 memorystatus_priority_entry_t *list = NULL;
3625 size_t list_size;
3626
3627 size_only = ((buffer == USER_ADDR_NULL) ? TRUE: FALSE);
3628
3629 error = memorystatus_get_priority_list(&list, &buffer_size, &list_size, size_only);
3630 if (error) {
3631 goto out;
3632 }
3633
3634 if (!size_only) {
3635 error = copyout(list, buffer, list_size);
3636 }
3637
3638 if (error == 0) {
3639 *retval = list_size;
3640 }
3641 out:
3642
3643 if (list) {
3644 kfree(list, buffer_size);
3645 }
3646
3647 return error;
3648 }
3649
3650 #if CONFIG_JETSAM
3651
3652 static void
3653 memorystatus_clear_errors(void)
3654 {
3655 proc_t p;
3656 unsigned int i = 0;
3657
3658 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_START, 0, 0, 0, 0, 0);
3659
3660 proc_list_lock();
3661
3662 p = memorystatus_get_first_proc_locked(&i, TRUE);
3663 while (p) {
3664 if (p->p_memstat_state & P_MEMSTAT_ERROR) {
3665 p->p_memstat_state &= ~P_MEMSTAT_ERROR;
3666 }
3667 p = memorystatus_get_next_proc_locked(&i, p, TRUE);
3668 }
3669
3670 proc_list_unlock();
3671
3672 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_END, 0, 0, 0, 0, 0);
3673 }
3674
3675 static void
3676 memorystatus_update_levels_locked(boolean_t critical_only) {
3677
3678 memorystatus_available_pages_critical = memorystatus_available_pages_critical_base;
3679
3680 /*
3681 * If there's an entry in the first bucket, we have idle processes.
3682 */
3683 memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
3684 if (first_bucket->count) {
3685 memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset;
3686
3687 if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure ) {
3688 /*
3689 * The critical threshold must never exceed the pressure threshold
3690 */
3691 memorystatus_available_pages_critical = memorystatus_available_pages_pressure;
3692 }
3693 }
3694
3695 #if DEBUG || DEVELOPMENT
3696 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) {
3697 memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_diagnostic;
3698
3699 if (memorystatus_available_pages_critical > memorystatus_available_pages_pressure ) {
3700 /*
3701 * The critical threshold must never exceed the pressure threshold
3702 */
3703 memorystatus_available_pages_critical = memorystatus_available_pages_pressure;
3704 }
3705 }
3706 #endif
3707
3708 if (critical_only) {
3709 return;
3710 }
3711
3712 #if VM_PRESSURE_EVENTS
3713 memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta;
3714 #if DEBUG || DEVELOPMENT
3715 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) {
3716 memorystatus_available_pages_pressure += memorystatus_jetsam_policy_offset_pages_diagnostic;
3717 }
3718 #endif
3719 #endif
3720 }
3721
3722 static int
3723 memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) {
3724 size_t input_size = *snapshot_size;
3725
3726 if (memorystatus_jetsam_snapshot_count > 0) {
3727 *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count));
3728 } else {
3729 *snapshot_size = 0;
3730 }
3731
3732 if (size_only) {
3733 return 0;
3734 }
3735
3736 if (input_size < *snapshot_size) {
3737 return EINVAL;
3738 }
3739
3740 *snapshot = memorystatus_jetsam_snapshot;
3741
3742 MEMORYSTATUS_DEBUG(1, "memorystatus_snapshot: returning %ld for size\n", (long)*snapshot_size);
3743
3744 return 0;
3745 }
3746
3747
3748 static int
3749 memorystatus_cmd_get_jetsam_snapshot(user_addr_t buffer, size_t buffer_size, int32_t *retval) {
3750 int error = EINVAL;
3751 boolean_t size_only;
3752 memorystatus_jetsam_snapshot_t *snapshot;
3753
3754 size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE);
3755
3756 error = memorystatus_get_snapshot(&snapshot, &buffer_size, size_only);
3757 if (error) {
3758 goto out;
3759 }
3760
3761 /* Copy out and reset */
3762 if (!size_only) {
3763 if ((error = copyout(snapshot, buffer, buffer_size)) == 0) {
3764 snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
3765 }
3766 }
3767
3768 if (error == 0) {
3769 *retval = buffer_size;
3770 }
3771 out:
3772 return error;
3773 }
3774
3775 /*
3776 * Routine: memorystatus_cmd_grp_set_properties
3777 * Purpose: Update properties for a group of processes.
3778 *
3779 * Supported Properties:
3780 * [priority]
3781 * Move each process out of its effective priority
3782 * band and into a new priority band.
3783 * Maintains relative order from lowest to highest priority.
3784 * In single band, maintains relative order from head to tail.
3785 *
3786 * eg: before [effectivepriority | pid]
3787 * [18 | p101 ]
3788 * [17 | p55, p67, p19 ]
3789 * [12 | p103 p10 ]
3790 * [ 7 | p25 ]
3791 * [ 0 | p71, p82, ]
3792 *
3793 * after [ new band | pid]
3794 * [ xxx | p71, p82, p25, p103, p10, p55, p67, p19, p101]
3795 *
3796 * Returns: 0 on success, else non-zero.
3797 *
3798 * Caveat: We know there is a race window regarding recycled pids.
3799 * A process could be killed before the kernel can act on it here.
3800 * If a pid cannot be found in any of the jetsam priority bands,
3801 * then we simply ignore it. No harm.
3802 * But, if the pid has been recycled then it could be an issue.
3803 * In that scenario, we might move an unsuspecting process to the new
3804 * priority band. It's not clear how the kernel can safeguard
3805 * against this, but it would be an extremely rare case anyway.
3806 * The caller of this api might avoid such race conditions by
3807 * ensuring that the processes passed in the pid list are suspended.
3808 */
3809
3810
3811 /* This internal structure can expand when we add support for more properties */
3812 typedef struct memorystatus_internal_properties
3813 {
3814 proc_t proc;
3815 int32_t priority; /* see memorytstatus_priority_entry_t : priority */
3816 } memorystatus_internal_properties_t;
3817
3818
3819 static int
3820 memorystatus_cmd_grp_set_properties(int32_t flags, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) {
3821
3822 #pragma unused (flags)
3823
3824 /*
3825 * We only handle setting priority
3826 * per process
3827 */
3828
3829 int error = 0;
3830 memorystatus_priority_entry_t *entries = NULL;
3831 uint32_t entry_count = 0;
3832
3833 /* This will be the ordered proc list */
3834 memorystatus_internal_properties_t *table = NULL;
3835 size_t table_size = 0;
3836 uint32_t table_count = 0;
3837
3838 uint32_t i = 0;
3839 uint32_t bucket_index = 0;
3840 boolean_t head_insert;
3841 int32_t new_priority;
3842
3843 proc_t p;
3844
3845 /* Verify inputs */
3846 if ((buffer == USER_ADDR_NULL) || (buffer_size == 0) || ((buffer_size % sizeof(memorystatus_priority_entry_t)) != 0)) {
3847 error = EINVAL;
3848 goto out;
3849 }
3850
3851 entry_count = (buffer_size / sizeof(memorystatus_priority_entry_t));
3852 if ((entries = (memorystatus_priority_entry_t *)kalloc(buffer_size)) == NULL) {
3853 error = ENOMEM;
3854 goto out;
3855 }
3856
3857 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_START, entry_count, 0, 0, 0, 0);
3858
3859 if ((error = copyin(buffer, entries, buffer_size)) != 0) {
3860 goto out;
3861 }
3862
3863 /* Verify sanity of input priorities */
3864 for (i=0; i < entry_count; i++) {
3865 if (entries[i].priority == -1) {
3866 /* Use as shorthand for default priority */
3867 entries[i].priority = JETSAM_PRIORITY_DEFAULT;
3868 } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_DEFERRED) {
3869 /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use;
3870 * if requested, adjust to JETSAM_PRIORITY_IDLE. */
3871 entries[i].priority = JETSAM_PRIORITY_IDLE;
3872 } else if (entries[i].priority == JETSAM_PRIORITY_IDLE_HEAD) {
3873 /* JETSAM_PRIORITY_IDLE_HEAD inserts at the head of the idle
3874 * queue */
3875 /* Deal with this later */
3876 } else if ((entries[i].priority < 0) || (entries[i].priority >= MEMSTAT_BUCKET_COUNT)) {
3877 /* Sanity check */
3878 error = EINVAL;
3879 goto out;
3880 }
3881 }
3882
3883 table_size = sizeof(memorystatus_internal_properties_t) * entry_count;
3884 if ( (table = (memorystatus_internal_properties_t *)kalloc(table_size)) == NULL) {
3885 error = ENOMEM;
3886 goto out;
3887 }
3888 memset(table, 0, table_size);
3889
3890
3891 /*
3892 * For each jetsam bucket entry, spin through the input property list.
3893 * When a matching pid is found, populate an adjacent table with the
3894 * appropriate proc pointer and new property values.
3895 * This traversal automatically preserves order from lowest
3896 * to highest priority.
3897 */
3898
3899 bucket_index=0;
3900
3901 proc_list_lock();
3902
3903 /* Create the ordered table */
3904 p = memorystatus_get_first_proc_locked(&bucket_index, TRUE);
3905 while (p && (table_count < entry_count)) {
3906 for (i=0; i < entry_count; i++ ) {
3907 if (p->p_pid == entries[i].pid) {
3908 /* Build the table data */
3909 table[table_count].proc = p;
3910 table[table_count].priority = entries[i].priority;
3911 table_count++;
3912 break;
3913 }
3914 }
3915 p = memorystatus_get_next_proc_locked(&bucket_index, p, TRUE);
3916 }
3917
3918 /* We now have ordered list of procs ready to move */
3919 for (i=0; i < table_count; i++) {
3920 p = table[i].proc;
3921 assert(p != NULL);
3922
3923 /* Allow head inserts -- but relative order is now */
3924 if (table[i].priority == JETSAM_PRIORITY_IDLE_HEAD) {
3925 new_priority = JETSAM_PRIORITY_IDLE;
3926 head_insert = true;
3927 } else {
3928 new_priority = table[i].priority;
3929 head_insert = false;
3930 }
3931
3932 /* Not allowed */
3933 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
3934 continue;
3935 }
3936
3937 /*
3938 * Take appropriate steps if moving proc out of the
3939 * JETSAM_PRIORITY_IDLE_DEFERRED band.
3940 */
3941 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
3942 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
3943 }
3944
3945 memorystatus_update_priority_locked(p, new_priority, head_insert);
3946 }
3947
3948 proc_list_unlock();
3949
3950 /*
3951 * if (table_count != entry_count)
3952 * then some pids were not found in a jetsam band.
3953 * harmless but interesting...
3954 */
3955 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_GRP_SET_PROP) | DBG_FUNC_END, entry_count, table_count, 0, 0, 0);
3956
3957 out:
3958 if (entries)
3959 kfree(entries, buffer_size);
3960 if (table)
3961 kfree(table, table_size);
3962
3963 return (error);
3964 }
3965
3966
3967 /*
3968 * This routine is meant solely for the purpose of adjusting jetsam priorities and bands.
3969 * It is _not_ meant to be used for the setting of memory limits, especially, since we can't
3970 * tell if the memory limit being set is fatal or not.
3971 *
3972 * So the the last 5 args to the memorystatus_update() call below, related to memory limits, are all 0 or FALSE.
3973 */
3974
3975 static int
3976 memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) {
3977 const uint32_t MAX_ENTRY_COUNT = 2; /* Cap the entry count */
3978
3979 int error;
3980 uint32_t i;
3981 uint32_t entry_count;
3982 memorystatus_priority_properties_t *entries;
3983
3984 /* Validate inputs */
3985 if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3986 return EINVAL;
3987 }
3988
3989 /* Make sure the buffer is a multiple of the entry size, and that an excessive size isn't specified */
3990 entry_count = (buffer_size / sizeof(memorystatus_priority_properties_t));
3991 if (((buffer_size % sizeof(memorystatus_priority_properties_t)) != 0) || (entry_count > MAX_ENTRY_COUNT)) {
3992 return EINVAL;
3993 }
3994
3995 entries = (memorystatus_priority_properties_t *)kalloc(buffer_size);
3996
3997 error = copyin(buffer, entries, buffer_size);
3998
3999 for (i = 0; i < entry_count; i++) {
4000 proc_t p;
4001
4002 if (error) {
4003 break;
4004 }
4005
4006 p = proc_find(pid);
4007 if (!p) {
4008 error = ESRCH;
4009 break;
4010 }
4011
4012 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
4013 error = EPERM;
4014 proc_rele(p);
4015 break;
4016 }
4017
4018 error = memorystatus_update(p, entries[i].priority, entries[i].user_data, FALSE, FALSE, 0, 0, FALSE);
4019 proc_rele(p);
4020 }
4021
4022 kfree(entries, buffer_size);
4023
4024 return error;
4025 }
4026
4027 static int
4028 memorystatus_cmd_get_pressure_status(int32_t *retval) {
4029 int error;
4030
4031 /* Need privilege for check */
4032 error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0);
4033 if (error) {
4034 return (error);
4035 }
4036
4037 /* Inherently racy, so it's not worth taking a lock here */
4038 *retval = (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0;
4039
4040 return error;
4041 }
4042
4043 /*
4044 * Every process, including a P_MEMSTAT_INTERNAL process (currently only pid 1), is allowed to set a HWM.
4045 */
4046
4047 static int
4048 memorystatus_cmd_set_jetsam_memory_limit(pid_t pid, int32_t high_water_mark, __unused int32_t *retval, boolean_t is_fatal_limit) {
4049 int error = 0;
4050
4051 proc_t p = proc_find(pid);
4052 if (!p) {
4053 return ESRCH;
4054 }
4055
4056 if (high_water_mark <= 0) {
4057 high_water_mark = -1; /* Disable */
4058 }
4059
4060 proc_list_lock();
4061
4062 p->p_memstat_memlimit = high_water_mark;
4063 if (memorystatus_highwater_enabled) {
4064 if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) {
4065
4066 memorystatus_update_priority_locked(p, p->p_memstat_effectivepriority, false);
4067
4068 /*
4069 * The update priority call above takes care to set/reset the fatal memory limit state
4070 * IF the process is transitioning between foreground <-> background and has a background
4071 * memory limit.
4072 * Here, however, the process won't be doing any such transitions and so we explicitly tackle
4073 * the fatal limit state.
4074 */
4075 is_fatal_limit = FALSE;
4076
4077 } else {
4078 error = (task_set_phys_footprint_limit_internal(p->task, high_water_mark, NULL, TRUE) == 0) ? 0 : EINVAL;
4079 }
4080 }
4081
4082 if (error == 0) {
4083 if (is_fatal_limit == TRUE) {
4084 p->p_memstat_state |= P_MEMSTAT_FATAL_MEMLIMIT;
4085 } else {
4086 p->p_memstat_state &= ~P_MEMSTAT_FATAL_MEMLIMIT;
4087 }
4088 }
4089
4090 proc_list_unlock();
4091 proc_rele(p);
4092
4093 return error;
4094 }
4095
4096 /*
4097 * Returns the jetsam priority (effective or requested) of the process
4098 * associated with this task.
4099 */
4100 int
4101 proc_get_memstat_priority(proc_t p, boolean_t effective_priority)
4102 {
4103 if (p) {
4104 if (effective_priority) {
4105 return p->p_memstat_effectivepriority;
4106 } else {
4107 return p->p_memstat_requestedpriority;
4108 }
4109 }
4110 return 0;
4111 }
4112 #endif /* CONFIG_JETSAM */
4113
4114 int
4115 memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *args, int *ret) {
4116 int error = EINVAL;
4117
4118 #if !CONFIG_JETSAM
4119 #pragma unused(ret)
4120 #endif
4121
4122 /* Root only for now */
4123 if (!kauth_cred_issuser(kauth_cred_get())) {
4124 error = EPERM;
4125 goto out;
4126 }
4127
4128 /* Sanity check */
4129 if (args->buffersize > MEMORYSTATUS_BUFFERSIZE_MAX) {
4130 error = EINVAL;
4131 goto out;
4132 }
4133
4134 switch (args->command) {
4135 case MEMORYSTATUS_CMD_GET_PRIORITY_LIST:
4136 error = memorystatus_cmd_get_priority_list(args->buffer, args->buffersize, ret);
4137 break;
4138 #if CONFIG_JETSAM
4139 case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES:
4140 error = memorystatus_cmd_set_priority_properties(args->pid, args->buffer, args->buffersize, ret);
4141 break;
4142 case MEMORYSTATUS_CMD_GRP_SET_PROPERTIES:
4143 error = memorystatus_cmd_grp_set_properties((int32_t)args->flags, args->buffer, args->buffersize, ret);
4144 break;
4145 case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT:
4146 error = memorystatus_cmd_get_jetsam_snapshot(args->buffer, args->buffersize, ret);
4147 break;
4148 case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS:
4149 error = memorystatus_cmd_get_pressure_status(ret);
4150 break;
4151 case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK:
4152 error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, FALSE);
4153 break;
4154 case MEMORYSTATUS_CMD_SET_JETSAM_TASK_LIMIT:
4155 error = memorystatus_cmd_set_jetsam_memory_limit(args->pid, (int32_t)args->flags, ret, TRUE);
4156 break;
4157 /* Test commands */
4158 #if DEVELOPMENT || DEBUG
4159 case MEMORYSTATUS_CMD_TEST_JETSAM:
4160 error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled) ? 0 : EINVAL;
4161 break;
4162 case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS:
4163 error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize);
4164 break;
4165 #endif /* DEVELOPMENT || DEBUG */
4166 #endif /* CONFIG_JETSAM */
4167 default:
4168 break;
4169 }
4170
4171 out:
4172 return error;
4173 }
4174
4175
4176 static int
4177 filt_memorystatusattach(struct knote *kn)
4178 {
4179 kn->kn_flags |= EV_CLEAR;
4180 return memorystatus_knote_register(kn);
4181 }
4182
4183 static void
4184 filt_memorystatusdetach(struct knote *kn)
4185 {
4186 memorystatus_knote_unregister(kn);
4187 }
4188
4189 static int
4190 filt_memorystatus(struct knote *kn __unused, long hint)
4191 {
4192 if (hint) {
4193 switch (hint) {
4194 case kMemorystatusNoPressure:
4195 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
4196 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
4197 }
4198 break;
4199 case kMemorystatusPressure:
4200 if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) {
4201 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) {
4202 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN;
4203 }
4204 } else if (memorystatus_vm_pressure_level == kVMPressureCritical) {
4205
4206 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) {
4207 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
4208 }
4209 }
4210 break;
4211 case kMemorystatusLowSwap:
4212 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) {
4213 kn->kn_fflags |= NOTE_MEMORYSTATUS_LOW_SWAP;
4214 }
4215 break;
4216 default:
4217 break;
4218 }
4219 }
4220
4221 return (kn->kn_fflags != 0);
4222 }
4223
4224 static void
4225 memorystatus_klist_lock(void) {
4226 lck_mtx_lock(&memorystatus_klist_mutex);
4227 }
4228
4229 static void
4230 memorystatus_klist_unlock(void) {
4231 lck_mtx_unlock(&memorystatus_klist_mutex);
4232 }
4233
4234 void
4235 memorystatus_kevent_init(lck_grp_t *grp, lck_attr_t *attr) {
4236 lck_mtx_init(&memorystatus_klist_mutex, grp, attr);
4237 klist_init(&memorystatus_klist);
4238 }
4239
4240 int
4241 memorystatus_knote_register(struct knote *kn) {
4242 int error = 0;
4243
4244 memorystatus_klist_lock();
4245
4246 if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL | NOTE_MEMORYSTATUS_LOW_SWAP)) {
4247
4248 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) {
4249 error = suser(kauth_cred_get(), 0);
4250 }
4251
4252 if (error == 0) {
4253 KNOTE_ATTACH(&memorystatus_klist, kn);
4254 }
4255 } else {
4256 error = ENOTSUP;
4257 }
4258
4259 memorystatus_klist_unlock();
4260
4261 return error;
4262 }
4263
4264 void
4265 memorystatus_knote_unregister(struct knote *kn __unused) {
4266 memorystatus_klist_lock();
4267 KNOTE_DETACH(&memorystatus_klist, kn);
4268 memorystatus_klist_unlock();
4269 }
4270
4271
4272 #if 0
4273 #if CONFIG_JETSAM && VM_PRESSURE_EVENTS
4274 static boolean_t
4275 memorystatus_issue_pressure_kevent(boolean_t pressured) {
4276 memorystatus_klist_lock();
4277 KNOTE(&memorystatus_klist, pressured ? kMemorystatusPressure : kMemorystatusNoPressure);
4278 memorystatus_klist_unlock();
4279 return TRUE;
4280 }
4281 #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */
4282 #endif /* 0 */