]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_memorystatus.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / bsd / kern / kern_memorystatus.c
1 /*
2 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #include <kern/sched_prim.h>
31 #include <kern/kalloc.h>
32 #include <kern/assert.h>
33 #include <kern/debug.h>
34 #include <kern/lock.h>
35 #include <kern/task.h>
36 #include <kern/thread.h>
37 #include <kern/host.h>
38 #include <libkern/libkern.h>
39 #include <mach/mach_time.h>
40 #include <mach/task.h>
41 #include <mach/host_priv.h>
42 #include <mach/mach_host.h>
43 #include <pexpert/pexpert.h>
44 #include <sys/kern_event.h>
45 #include <sys/proc.h>
46 #include <sys/proc_info.h>
47 #include <sys/signal.h>
48 #include <sys/signalvar.h>
49 #include <sys/sysctl.h>
50 #include <sys/sysproto.h>
51 #include <sys/wait.h>
52 #include <sys/tree.h>
53 #include <sys/priv.h>
54 #include <vm/vm_pageout.h>
55 #include <vm/vm_protos.h>
56
57 #if CONFIG_FREEZE
58 #include <vm/vm_map.h>
59 #endif /* CONFIG_FREEZE */
60
61 #include <sys/kern_memorystatus.h>
62
63 /* These are very verbose printfs(), enable with
64 * MEMORYSTATUS_DEBUG_LOG
65 */
66 #if MEMORYSTATUS_DEBUG_LOG
67 #define MEMORYSTATUS_DEBUG(cond, format, ...) \
68 do { \
69 if (cond) { printf(format, ##__VA_ARGS__); } \
70 } while(0)
71 #else
72 #define MEMORYSTATUS_DEBUG(cond, format, ...)
73 #endif
74
75 /* General tunables */
76
77 unsigned long delta_percentage = 5;
78 unsigned long critical_threshold_percentage = 5;
79 unsigned long idle_offset_percentage = 5;
80 unsigned long pressure_threshold_percentage = 15;
81 unsigned long freeze_threshold_percentage = 50;
82
83 /* General memorystatus stuff */
84
85 struct klist memorystatus_klist;
86 static lck_mtx_t memorystatus_klist_mutex;
87
88 static void memorystatus_klist_lock(void);
89 static void memorystatus_klist_unlock(void);
90
91 static uint64_t memorystatus_idle_delay_time = 0;
92
93 /*
94 * Memorystatus kevents
95 */
96
97 static int filt_memorystatusattach(struct knote *kn);
98 static void filt_memorystatusdetach(struct knote *kn);
99 static int filt_memorystatus(struct knote *kn, long hint);
100
101 struct filterops memorystatus_filtops = {
102 .f_attach = filt_memorystatusattach,
103 .f_detach = filt_memorystatusdetach,
104 .f_event = filt_memorystatus,
105 };
106
107 enum {
108 kMemorystatusNoPressure = 1,
109 kMemorystatusPressure = 2
110 };
111
112 /* Idle guard handling */
113
114 static int32_t memorystatus_scheduled_idle_demotions = 0;
115
116 static thread_call_t memorystatus_idle_demotion_call;
117
118 static void memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2);
119 static void memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state);
120 static void memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clean_state);
121 static void memorystatus_reschedule_idle_demotion_locked(void);
122
123 static void memorystatus_update_priority_locked(proc_t p, int priority);
124
125 int memorystatus_wakeup = 0;
126
127 unsigned int memorystatus_level = 0;
128
129 static int memorystatus_list_count = 0;
130
131 #define MEMSTAT_BUCKET_COUNT (JETSAM_PRIORITY_MAX + 1)
132
133 typedef struct memstat_bucket {
134 TAILQ_HEAD(, proc) list;
135 int count;
136 } memstat_bucket_t;
137
138 memstat_bucket_t memstat_bucket[MEMSTAT_BUCKET_COUNT];
139
140 uint64_t memstat_idle_demotion_deadline = 0;
141
142 static unsigned int memorystatus_dirty_count = 0;
143
144 #if !CONFIG_JETSAM
145 static boolean_t kill_idle_exit = FALSE;
146 #endif
147
148
149 int
150 memorystatus_get_level(__unused struct proc *p, struct memorystatus_get_level_args *args, __unused int *ret)
151 {
152 user_addr_t level = 0;
153
154 level = args->level;
155
156 if (copyout(&memorystatus_level, level, sizeof(memorystatus_level)) != 0) {
157 return EFAULT;
158 }
159
160 return 0;
161 }
162
163 static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search);
164 static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search);
165
166 static void memorystatus_thread(void *param __unused, wait_result_t wr __unused);
167
168 /* Jetsam */
169
170 #if CONFIG_JETSAM
171
172 /* Kill processes exceeding their limit either under memory pressure (1), or as soon as possible (0) */
173 #define LEGACY_HIWATER 1
174
175 static int memorystatus_highwater_enabled = 1;
176
177 extern unsigned int vm_page_free_count;
178 extern unsigned int vm_page_active_count;
179 extern unsigned int vm_page_inactive_count;
180 extern unsigned int vm_page_throttled_count;
181 extern unsigned int vm_page_purgeable_count;
182 extern unsigned int vm_page_wire_count;
183
184 unsigned int memorystatus_delta = 0;
185
186 static unsigned int memorystatus_available_pages = (unsigned int)-1;
187 static unsigned int memorystatus_available_pages_pressure = 0;
188 static unsigned int memorystatus_available_pages_critical = 0;
189 static unsigned int memorystatus_available_pages_critical_base = 0;
190 static unsigned int memorystatus_last_foreground_pressure_pages = (unsigned int)-1;
191 #if !LATENCY_JETSAM
192 static unsigned int memorystatus_available_pages_critical_idle_offset = 0;
193 #endif
194
195 #if DEVELOPMENT || DEBUG
196 static unsigned int memorystatus_jetsam_panic_debug = 0;
197
198 static unsigned int memorystatus_jetsam_policy = kPolicyDefault;
199 static unsigned int memorystatus_jetsam_policy_offset_pages_diagnostic = 0;
200 #endif
201
202 static boolean_t kill_under_pressure = FALSE;
203
204 static memorystatus_jetsam_snapshot_t *memorystatus_jetsam_snapshot;
205 #define memorystatus_jetsam_snapshot_list memorystatus_jetsam_snapshot->entries
206
207 static unsigned int memorystatus_jetsam_snapshot_count = 0;
208 static unsigned int memorystatus_jetsam_snapshot_max = 0;
209
210 static void memorystatus_clear_errors(void);
211 static void memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint);
212 static int memorystatus_send_note(int event_code, void *data, size_t data_length);
213 static uint32_t memorystatus_build_state(proc_t p);
214 static void memorystatus_update_levels_locked(boolean_t critical_only);
215 static boolean_t memorystatus_issue_pressure_kevent(boolean_t pressured);
216
217 static boolean_t memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause);
218 static boolean_t memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors);
219 #if LEGACY_HIWATER
220 static boolean_t memorystatus_kill_hiwat_proc(uint32_t *errors);
221 #endif
222
223 static boolean_t memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause);
224 static boolean_t memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause);
225
226 #endif /* CONFIG_JETSAM */
227
228 /* VM pressure */
229
230 #if VM_PRESSURE_EVENTS
231
232 #include "vm_pressure.h"
233
234 extern boolean_t memorystatus_warn_process(pid_t pid);
235
236 vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal;
237
238 #endif /* VM_PRESSURE_EVENTS */
239
240 /* Freeze */
241
242 #if CONFIG_FREEZE
243
244 boolean_t memorystatus_freeze_enabled = FALSE;
245 int memorystatus_freeze_wakeup = 0;
246
247 static inline boolean_t memorystatus_can_freeze_processes(void);
248 static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low);
249
250 static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused);
251
252 /* Thresholds */
253 static unsigned int memorystatus_freeze_threshold = 0;
254
255 static unsigned int memorystatus_freeze_pages_min = FREEZE_PAGES_MIN;
256 static unsigned int memorystatus_freeze_pages_max = FREEZE_PAGES_MAX;
257
258 static unsigned int memorystatus_frozen_count = 0;
259
260 static unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
261
262 /* Stats */
263 static uint64_t memorystatus_freeze_count = 0;
264 static uint64_t memorystatus_freeze_pageouts = 0;
265
266 /* Throttling */
267 static throttle_interval_t throttle_intervals[] = {
268 { 60, 8, 0, 0, { 0, 0 }, FALSE }, /* 1 hour intermediate interval, 8x burst */
269 { 24 * 60, 1, 0, 0, { 0, 0 }, FALSE }, /* 24 hour long interval, no burst */
270 };
271
272 static uint64_t memorystatus_freeze_throttle_count = 0;
273
274 static unsigned int memorystatus_suspended_count = 0;
275 static unsigned int memorystatus_suspended_footprint_total = 0;
276
277 #endif /* CONFIG_FREEZE */
278
279 /* Debug */
280
281 #if DEVELOPMENT || DEBUG
282
283 #if CONFIG_JETSAM
284
285 /* Debug aid to aid determination of limit */
286
287 static int
288 sysctl_memorystatus_highwater_enable SYSCTL_HANDLER_ARGS
289 {
290 #pragma unused(oidp, arg2)
291 proc_t p;
292 unsigned int b = 0;
293 int error, enable = 0;
294 int32_t memlimit;
295
296 error = SYSCTL_OUT(req, arg1, sizeof(int));
297 if (error || !req->newptr) {
298 return (error);
299 }
300
301 error = SYSCTL_IN(req, &enable, sizeof(int));
302 if (error || !req->newptr) {
303 return (error);
304 }
305
306 if (!(enable == 0 || enable == 1)) {
307 return EINVAL;
308 }
309
310 proc_list_lock();
311
312 p = memorystatus_get_first_proc_locked(&b, TRUE);
313 while (p) {
314 if (enable) {
315 if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) {
316 memlimit = -1;
317 } else {
318 memlimit = p->p_memstat_memlimit;
319 }
320 } else {
321 memlimit = -1;
322 }
323 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE);
324
325 p = memorystatus_get_next_proc_locked(&b, p, TRUE);
326 }
327
328 memorystatus_highwater_enabled = enable;
329
330 proc_list_unlock();
331
332 return 0;
333 }
334
335 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_highwater_enabled, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_highwater_enabled, 0, sysctl_memorystatus_highwater_enable, "I", "");
336
337 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages, 0, "");
338 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_available_pages_critical, 0, "");
339 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_base, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_base, 0, "");
340 #if !LATENCY_JETSAM
341 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_critical_idle_offset, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_critical_idle_offset, 0, "");
342 #endif
343
344 /* Diagnostic code */
345
346 enum {
347 kJetsamDiagnosticModeNone = 0,
348 kJetsamDiagnosticModeAll = 1,
349 kJetsamDiagnosticModeStopAtFirstActive = 2,
350 kJetsamDiagnosticModeCount
351 } jetsam_diagnostic_mode = kJetsamDiagnosticModeNone;
352
353 static int jetsam_diagnostic_suspended_one_active_proc = 0;
354
355 static int
356 sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS
357 {
358 #pragma unused(arg1, arg2)
359
360 const char *diagnosticStrings[] = {
361 "jetsam: diagnostic mode: resetting critical level.",
362 "jetsam: diagnostic mode: will examine all processes",
363 "jetsam: diagnostic mode: will stop at first active process"
364 };
365
366 int error, val = jetsam_diagnostic_mode;
367 boolean_t changed = FALSE;
368
369 error = sysctl_handle_int(oidp, &val, 0, req);
370 if (error || !req->newptr)
371 return (error);
372 if ((val < 0) || (val >= kJetsamDiagnosticModeCount)) {
373 printf("jetsam: diagnostic mode: invalid value - %d\n", val);
374 return EINVAL;
375 }
376
377 proc_list_lock();
378
379 if ((unsigned int) val != jetsam_diagnostic_mode) {
380 jetsam_diagnostic_mode = val;
381
382 memorystatus_jetsam_policy &= ~kPolicyDiagnoseActive;
383
384 switch (jetsam_diagnostic_mode) {
385 case kJetsamDiagnosticModeNone:
386 /* Already cleared */
387 break;
388 case kJetsamDiagnosticModeAll:
389 memorystatus_jetsam_policy |= kPolicyDiagnoseAll;
390 break;
391 case kJetsamDiagnosticModeStopAtFirstActive:
392 memorystatus_jetsam_policy |= kPolicyDiagnoseFirst;
393 break;
394 default:
395 /* Already validated */
396 break;
397 }
398
399 memorystatus_update_levels_locked(FALSE);
400 changed = TRUE;
401 }
402
403 proc_list_unlock();
404
405 if (changed) {
406 printf("%s\n", diagnosticStrings[val]);
407 }
408
409 return (0);
410 }
411
412 SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED|CTLFLAG_ANYBODY,
413 &jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode");
414
415 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_jetsam_policy_offset_pages_diagnostic, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_jetsam_policy_offset_pages_diagnostic, 0, "");
416
417 #if VM_PRESSURE_EVENTS
418
419 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_available_pages_pressure, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_available_pages_pressure, 0, "");
420
421 static int
422 sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS
423 {
424 #pragma unused(arg1, arg2, oidp)
425 int error = 0;
426
427 error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0);
428 if (error)
429 return (error);
430
431 return SYSCTL_OUT(req, &memorystatus_vm_pressure_level, sizeof(memorystatus_vm_pressure_level));
432 }
433
434 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED|CTLFLAG_MASKED,
435 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", "");
436
437 static int
438 sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS
439 {
440 #pragma unused(arg1, arg2)
441
442 int error, pid = 0;
443
444 error = sysctl_handle_int(oidp, &pid, 0, req);
445 if (error || !req->newptr)
446 return (error);
447
448 return vm_dispatch_pressure_note_to_pid(pid, FALSE);
449 }
450
451 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
452 0, 0, &sysctl_memorystatus_vm_pressure_send, "I", "");
453
454 #endif /* VM_PRESSURE_EVENTS */
455
456 #endif /* CONFIG_JETSAM */
457
458 #endif /* DEVELOPMENT || DEBUG */
459
460 #if CONFIG_FREEZE
461
462 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, "");
463
464 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, "");
465 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, "");
466
467 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_count, "");
468 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, "");
469 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_throttle_count, CTLFLAG_RD|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_count, "");
470 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, "");
471
472 boolean_t memorystatus_freeze_throttle_enabled = TRUE;
473 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, "");
474
475 /*
476 * Enabled via: <rdar://problem/13248767> Enable the sysctl_memorystatus_freeze/thaw sysctls on Release KC
477 *
478 * TODO: Manual trigger of freeze and thaw for dev / debug kernels only.
479 * <rdar://problem/13248795> Disable/restrict the sysctl_memorystatus_freeze/thaw sysctls on Release KC
480 */
481 static int
482 sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS
483 {
484 #pragma unused(arg1, arg2)
485
486 int error, pid = 0;
487 proc_t p;
488
489 error = sysctl_handle_int(oidp, &pid, 0, req);
490 if (error || !req->newptr)
491 return (error);
492
493 p = proc_find(pid);
494 if (p != NULL) {
495 uint32_t purgeable, wired, clean, dirty;
496 boolean_t shared;
497 uint32_t max_pages = 0;
498
499 if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
500 max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max);
501 } else {
502 max_pages = UINT32_MAX - 1;
503 }
504 error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE);
505 proc_rele(p);
506
507 if (error)
508 error = EIO;
509 return error;
510 }
511 return EINVAL;
512 }
513
514 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
515 0, 0, &sysctl_memorystatus_freeze, "I", "");
516
517 static int
518 sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS
519 {
520 #pragma unused(arg1, arg2)
521
522 int error, pid = 0;
523 proc_t p;
524
525 error = sysctl_handle_int(oidp, &pid, 0, req);
526 if (error || !req->newptr)
527 return (error);
528
529 p = proc_find(pid);
530 if (p != NULL) {
531 error = task_thaw(p->task);
532 proc_rele(p);
533
534 if (error)
535 error = EIO;
536 return error;
537 }
538
539 return EINVAL;
540 }
541
542 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
543 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", "");
544
545 #endif /* CONFIG_FREEZE */
546
547 extern kern_return_t kernel_thread_start_priority(thread_continue_t continuation,
548 void *parameter,
549 integer_t priority,
550 thread_t *new_thread);
551
552 static proc_t memorystatus_get_first_proc_locked(unsigned int *bucket_index, boolean_t search) {
553 memstat_bucket_t *current_bucket;
554 proc_t next_p;
555
556 if ((*bucket_index) >= MEMSTAT_BUCKET_COUNT) {
557 return NULL;
558 }
559
560 current_bucket = &memstat_bucket[*bucket_index];
561 next_p = TAILQ_FIRST(&current_bucket->list);
562 if (!next_p && search) {
563 while (!next_p && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) {
564 current_bucket = &memstat_bucket[*bucket_index];
565 next_p = TAILQ_FIRST(&current_bucket->list);
566 }
567 }
568
569 return next_p;
570 }
571
572 static proc_t memorystatus_get_next_proc_locked(unsigned int *bucket_index, proc_t p, boolean_t search) {
573 memstat_bucket_t *current_bucket;
574 proc_t next_p;
575
576 if (!p || ((*bucket_index) >= MEMSTAT_BUCKET_COUNT)) {
577 return NULL;
578 }
579
580 next_p = TAILQ_NEXT(p, p_memstat_list);
581 while (!next_p && search && (++(*bucket_index) < MEMSTAT_BUCKET_COUNT)) {
582 current_bucket = &memstat_bucket[*bucket_index];
583 next_p = TAILQ_FIRST(&current_bucket->list);
584 }
585
586 return next_p;
587 }
588
589 __private_extern__ void
590 memorystatus_init(void)
591 {
592 thread_t thread = THREAD_NULL;
593 kern_return_t result;
594 int i;
595
596 nanoseconds_to_absolutetime((uint64_t)DEFERRED_IDLE_EXIT_TIME_SECS * NSEC_PER_SEC, &memorystatus_idle_delay_time);
597
598 /* Init buckets */
599 for (i = 0; i < MEMSTAT_BUCKET_COUNT; i++) {
600 TAILQ_INIT(&memstat_bucket[i].list);
601 memstat_bucket[i].count = 0;
602 }
603
604 memorystatus_idle_demotion_call = thread_call_allocate((thread_call_func_t)memorystatus_perform_idle_demotion, NULL);
605
606 /* Apply overrides */
607 PE_get_default("kern.jetsam_delta", &delta_percentage, sizeof(delta_percentage));
608 assert(delta_percentage < 100);
609 PE_get_default("kern.jetsam_critical_threshold", &critical_threshold_percentage, sizeof(critical_threshold_percentage));
610 assert(critical_threshold_percentage < 100);
611 PE_get_default("kern.jetsam_idle_offset", &idle_offset_percentage, sizeof(idle_offset_percentage));
612 assert(idle_offset_percentage < 100);
613 PE_get_default("kern.jetsam_pressure_threshold", &pressure_threshold_percentage, sizeof(pressure_threshold_percentage));
614 assert(pressure_threshold_percentage < 100);
615 PE_get_default("kern.jetsam_freeze_threshold", &freeze_threshold_percentage, sizeof(freeze_threshold_percentage));
616 assert(freeze_threshold_percentage < 100);
617
618 #if CONFIG_JETSAM
619 memorystatus_delta = delta_percentage * atop_64(max_mem) / 100;
620 #if !LATENCY_JETSAM
621 memorystatus_available_pages_critical_idle_offset = idle_offset_percentage * atop_64(max_mem) / 100;
622 #endif
623
624 memorystatus_available_pages_critical_base = (critical_threshold_percentage / delta_percentage) * memorystatus_delta;
625
626 memorystatus_jetsam_snapshot_max = maxproc;
627 memorystatus_jetsam_snapshot =
628 (memorystatus_jetsam_snapshot_t*)kalloc(sizeof(memorystatus_jetsam_snapshot_t) +
629 sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_max);
630 if (!memorystatus_jetsam_snapshot) {
631 panic("Could not allocate memorystatus_jetsam_snapshot");
632 }
633
634 /* No contention at this point */
635 memorystatus_update_levels_locked(FALSE);
636 #endif
637
638 #if CONFIG_FREEZE
639 memorystatus_freeze_threshold = (freeze_threshold_percentage / delta_percentage) * memorystatus_delta;
640 #endif
641
642 result = kernel_thread_start_priority(memorystatus_thread, NULL, 95 /* MAXPRI_KERNEL */, &thread);
643 if (result == KERN_SUCCESS) {
644 thread_deallocate(thread);
645 } else {
646 panic("Could not create memorystatus_thread");
647 }
648 }
649
650 /* Centralised for the purposes of allowing panic-on-jetsam */
651 extern void
652 vm_wake_compactor_swapper(void);
653
654 static boolean_t
655 memorystatus_do_kill(proc_t p, uint32_t cause) {
656
657 int retval = 0;
658
659 #if CONFIG_JETSAM && (DEVELOPMENT || DEBUG)
660 if (memorystatus_jetsam_panic_debug & (1 << cause)) {
661 panic("memorystatus_do_kill(): jetsam debug panic (cause: %d)", cause);
662 }
663 #else
664 #pragma unused(cause)
665 #endif
666 int jetsam_flags = P_LTERM_JETSAM;
667 switch (cause) {
668 case kMemorystatusKilledHiwat: jetsam_flags |= P_JETSAM_HIWAT; break;
669 case kMemorystatusKilledVnodes: jetsam_flags |= P_JETSAM_VNODE; break;
670 case kMemorystatusKilledVMPageShortage: jetsam_flags |= P_JETSAM_VMPAGESHORTAGE; break;
671 case kMemorystatusKilledVMThrashing: jetsam_flags |= P_JETSAM_VMTHRASHING; break;
672 case kMemorystatusKilledPerProcessLimit: jetsam_flags |= P_JETSAM_PID; break;
673 case kMemorystatusKilledIdleExit: jetsam_flags |= P_JETSAM_IDLEEXIT; break;
674 }
675 retval = exit1_internal(p, W_EXITCODE(0, SIGKILL), (int *)NULL, FALSE, FALSE, jetsam_flags);
676
677 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
678 vm_wake_compactor_swapper();
679 }
680
681 return (retval == 0);
682 }
683
684 /*
685 * Node manipulation
686 */
687
688 static void
689 memorystatus_check_levels_locked(void) {
690 #if CONFIG_JETSAM
691 /* Update levels */
692 memorystatus_update_levels_locked(TRUE);
693 #endif
694 }
695
696 static void
697 memorystatus_perform_idle_demotion(__unused void *spare1, __unused void *spare2)
698 {
699 proc_t p;
700 uint64_t current_time;
701 memstat_bucket_t *demotion_bucket;
702
703 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion()\n");
704
705 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_START, 0, 0, 0, 0, 0);
706
707 current_time = mach_absolute_time();
708
709 proc_list_lock();
710
711 demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED];
712 p = TAILQ_FIRST(&demotion_bucket->list);
713
714 while (p) {
715 MEMORYSTATUS_DEBUG(1, "memorystatus_perform_idle_demotion() found %d\n", p->p_pid);
716
717 assert(p->p_memstat_idledeadline);
718 assert(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS);
719 assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED);
720
721 if (current_time >= p->p_memstat_idledeadline) {
722 #if DEBUG || DEVELOPMENT
723 if (!(p->p_memstat_dirty & P_DIRTY_MARKED)) {
724 printf("memorystatus_perform_idle_demotion: moving process %d to idle band, but never dirtied (0x%x)!\n", p->p_pid, p->p_memstat_dirty);
725 }
726 #endif
727 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
728 memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE);
729
730 // The prior process has moved out of the demotion bucket, so grab the new head and continue
731 p = TAILQ_FIRST(&demotion_bucket->list);
732 continue;
733 }
734
735 // No further candidates
736 break;
737 }
738
739 memorystatus_reschedule_idle_demotion_locked();
740
741 proc_list_unlock();
742
743 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_IDLE_DEMOTE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
744 }
745
746 static void
747 memorystatus_schedule_idle_demotion_locked(proc_t p, boolean_t set_state)
748 {
749 MEMORYSTATUS_DEBUG(1, "memorystatus_schedule_idle_demotion_locked: scheduling demotion to idle band for process %d (dirty:0x%x, set_state %d, demotions %d).\n",
750 p->p_pid, p->p_memstat_dirty, set_state, memorystatus_scheduled_idle_demotions);
751
752 assert((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) == (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS));
753
754 if (set_state) {
755 assert(p->p_memstat_idledeadline == 0);
756 p->p_memstat_idledeadline = mach_absolute_time() + memorystatus_idle_delay_time;
757 }
758
759 assert(p->p_memstat_idledeadline);
760
761 memorystatus_scheduled_idle_demotions++;
762 }
763
764 static void
765 memorystatus_invalidate_idle_demotion_locked(proc_t p, boolean_t clear_state)
766 {
767 MEMORYSTATUS_DEBUG(1, "memorystatus_invalidate_idle_demotion(): invalidating demotion to idle band for process %d (clear_state %d, demotions %d).\n",
768 p->p_pid, clear_state, memorystatus_scheduled_idle_demotions);
769
770 assert(p->p_memstat_idledeadline);
771
772 if (clear_state) {
773 p->p_memstat_idledeadline = 0;
774 p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS;
775 }
776
777 memorystatus_scheduled_idle_demotions--;
778 assert(memorystatus_scheduled_idle_demotions >= 0);
779 }
780
781 static void
782 memorystatus_reschedule_idle_demotion_locked(void) {
783 if (0 == memorystatus_scheduled_idle_demotions) {
784 if (memstat_idle_demotion_deadline) {
785 /* Transitioned 1->0, so cancel next call */
786 thread_call_cancel(memorystatus_idle_demotion_call);
787 memstat_idle_demotion_deadline = 0;
788 }
789 } else {
790 memstat_bucket_t *demotion_bucket;
791 proc_t p;
792 demotion_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE_DEFERRED];
793 p = TAILQ_FIRST(&demotion_bucket->list);
794 assert(p && p->p_memstat_idledeadline);
795
796 if (memstat_idle_demotion_deadline != p->p_memstat_idledeadline){
797 thread_call_enter_delayed(memorystatus_idle_demotion_call, p->p_memstat_idledeadline);
798 memstat_idle_demotion_deadline = p->p_memstat_idledeadline;
799 }
800 }
801 }
802
803 /*
804 * List manipulation
805 */
806
807 int
808 memorystatus_add(proc_t p, boolean_t locked)
809 {
810 memstat_bucket_t *bucket;
811
812 MEMORYSTATUS_DEBUG(1, "memorystatus_list_add(): adding process %d with priority %d.\n", p->pid, priority);
813
814 if (!locked) {
815 proc_list_lock();
816 }
817
818 /* Processes marked internal do not have priority tracked */
819 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
820 goto exit;
821 }
822
823 bucket = &memstat_bucket[p->p_memstat_effectivepriority];
824
825 TAILQ_INSERT_TAIL(&bucket->list, p, p_memstat_list);
826 bucket->count++;
827
828 memorystatus_list_count++;
829
830 memorystatus_check_levels_locked();
831
832 exit:
833 if (!locked) {
834 proc_list_unlock();
835 }
836
837 return 0;
838 }
839
840 static void
841 memorystatus_update_priority_locked(proc_t p, int priority)
842 {
843 memstat_bucket_t *old_bucket, *new_bucket;
844
845 assert(priority < MEMSTAT_BUCKET_COUNT);
846
847 /* Ensure that exit isn't underway, leaving the proc retained but removed from its bucket */
848 if ((p->p_listflag & P_LIST_EXITED) != 0) {
849 return;
850 }
851
852 MEMORYSTATUS_DEBUG(1, "memorystatus_update_priority_locked(): setting process %d to priority %d\n", p->p_pid, priority);
853
854 old_bucket = &memstat_bucket[p->p_memstat_effectivepriority];
855 TAILQ_REMOVE(&old_bucket->list, p, p_memstat_list);
856 old_bucket->count--;
857
858 new_bucket = &memstat_bucket[priority];
859 TAILQ_INSERT_TAIL(&new_bucket->list, p, p_memstat_list);
860 new_bucket->count++;
861
862 #if CONFIG_JETSAM
863 if (memorystatus_highwater_enabled && (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND)) {
864 if (((priority >= JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority < JETSAM_PRIORITY_FOREGROUND)) ||
865 ((priority < JETSAM_PRIORITY_FOREGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND))) {
866 int32_t memlimit = (priority >= JETSAM_PRIORITY_FOREGROUND) ? -1 : p->p_memstat_memlimit;
867 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE);
868 }
869 }
870 #endif
871
872 p->p_memstat_effectivepriority = priority;
873
874 memorystatus_check_levels_locked();
875 }
876
877 int
878 memorystatus_update(proc_t p, int priority, uint64_t user_data, boolean_t effective, boolean_t update_memlimit, int32_t memlimit, boolean_t memlimit_background)
879 {
880 int ret;
881
882 #if !CONFIG_JETSAM
883 #pragma unused(update_memlimit, memlimit, memlimit_background)
884 #endif
885
886 MEMORYSTATUS_DEBUG(1, "memorystatus_update: changing process %d: priority %d, user_data 0x%llx\n", p->p_pid, priority, user_data);
887
888 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_START, p->p_pid, priority, user_data, effective, 0);
889
890 if (priority == -1) {
891 /* Use as shorthand for default priority */
892 priority = JETSAM_PRIORITY_DEFAULT;
893 } else if (priority == JETSAM_PRIORITY_IDLE_DEFERRED) {
894 /* JETSAM_PRIORITY_IDLE_DEFERRED is reserved for internal use; if requested, adjust to JETSAM_PRIORITY_IDLE. */
895 priority = JETSAM_PRIORITY_IDLE;
896 } else if ((priority < 0) || (priority >= MEMSTAT_BUCKET_COUNT)) {
897 /* Sanity check */
898 ret = EINVAL;
899 goto out;
900 }
901
902 proc_list_lock();
903
904 assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL));
905
906 if (effective && (p->p_memstat_state & P_MEMSTAT_PRIORITYUPDATED)) {
907 ret = EALREADY;
908 proc_list_unlock();
909 MEMORYSTATUS_DEBUG(1, "memorystatus_update: effective change specified for pid %d, but change already occurred.\n", pid);
910 goto out;
911 }
912
913 p->p_memstat_state |= P_MEMSTAT_PRIORITYUPDATED;
914 p->p_memstat_userdata = user_data;
915 p->p_memstat_requestedpriority = priority;
916
917 #if CONFIG_JETSAM
918 if (update_memlimit) {
919 p->p_memstat_memlimit = memlimit;
920 if (memlimit_background) {
921 /* Will be set as priority is updated */
922 p->p_memstat_state |= P_MEMSTAT_MEMLIMIT_BACKGROUND;
923 } else {
924 /* Otherwise, apply now */
925 if (memorystatus_highwater_enabled) {
926 task_set_phys_footprint_limit_internal(p->task, (memlimit > 0) ? memlimit : -1, NULL, TRUE);
927 }
928 }
929 }
930 #endif
931
932 memorystatus_update_priority_locked(p, priority);
933
934 proc_list_unlock();
935 ret = 0;
936
937 out:
938 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_UPDATE) | DBG_FUNC_END, ret, 0, 0, 0, 0);
939
940 return ret;
941 }
942
943 int
944 memorystatus_remove(proc_t p, boolean_t locked)
945 {
946 int ret;
947 memstat_bucket_t *bucket;
948
949 MEMORYSTATUS_DEBUG(1, "memorystatus_list_remove: removing process %d\n", pid);
950
951 if (!locked) {
952 proc_list_lock();
953 }
954
955 assert(!(p->p_memstat_state & P_MEMSTAT_INTERNAL));
956
957 bucket = &memstat_bucket[p->p_memstat_effectivepriority];
958 TAILQ_REMOVE(&bucket->list, p, p_memstat_list);
959 bucket->count--;
960
961 memorystatus_list_count--;
962
963 /* If awaiting demotion to the idle band, clean up */
964 if (p->p_memstat_effectivepriority == JETSAM_PRIORITY_IDLE_DEFERRED) {
965 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
966 memorystatus_reschedule_idle_demotion_locked();
967 }
968
969 memorystatus_check_levels_locked();
970
971 #if CONFIG_FREEZE
972 if (p->p_memstat_state & (P_MEMSTAT_FROZEN)) {
973 memorystatus_frozen_count--;
974 }
975
976 if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) {
977 memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint;
978 memorystatus_suspended_count--;
979 }
980 #endif
981
982 if (!locked) {
983 proc_list_unlock();
984 }
985
986 if (p) {
987 ret = 0;
988 } else {
989 ret = ESRCH;
990 }
991
992 return ret;
993 }
994
995 static boolean_t
996 memorystatus_validate_track_flags(struct proc *target_p, uint32_t pcontrol) {
997 /* See that the process isn't marked for termination */
998 if (target_p->p_memstat_dirty & P_DIRTY_TERMINATED) {
999 return FALSE;
1000 }
1001
1002 /* Idle exit requires that process be tracked */
1003 if ((pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) &&
1004 !(pcontrol & PROC_DIRTY_TRACK)) {
1005 return FALSE;
1006 }
1007
1008 /* Deferral is only relevant if idle exit is specified */
1009 if ((pcontrol & PROC_DIRTY_DEFER) &&
1010 !(pcontrol & PROC_DIRTY_ALLOWS_IDLE_EXIT)) {
1011 return FALSE;
1012 }
1013
1014 return TRUE;
1015 }
1016
1017 static void
1018 memorystatus_update_idle_priority_locked(proc_t p) {
1019 int32_t priority;
1020
1021 MEMORYSTATUS_DEBUG(1, "memorystatus_update_idle_priority_locked(): pid %d dirty 0x%X\n", p->p_pid, p->p_memstat_dirty);
1022
1023 if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_IS_DIRTY)) == P_DIRTY_IDLE_EXIT_ENABLED) {
1024 priority = (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) ? JETSAM_PRIORITY_IDLE_DEFERRED : JETSAM_PRIORITY_IDLE;
1025 } else {
1026 priority = p->p_memstat_requestedpriority;
1027 }
1028
1029 memorystatus_update_priority_locked(p, priority);
1030 }
1031
1032 /*
1033 * Processes can opt to have their state tracked by the kernel, indicating when they are busy (dirty) or idle
1034 * (clean). They may also indicate that they support termination when idle, with the result that they are promoted
1035 * to their desired, higher, jetsam priority when dirty (and are therefore killed later), and demoted to the low
1036 * priority idle band when clean (and killed earlier, protecting higher priority procesess).
1037 *
1038 * If the deferral flag is set, then newly tracked processes will be protected for an initial period (as determined by
1039 * memorystatus_idle_delay_time); if they go clean during this time, then they will be moved to a deferred-idle band
1040 * with a slightly higher priority, guarding against immediate termination under memory pressure and being unable to
1041 * make forward progress. Finally, when the guard expires, they will be moved to the standard, lowest-priority, idle
1042 * band. The deferral can be cleared early by clearing the appropriate flag.
1043 *
1044 * The deferral timer is active only for the duration that the process is marked as guarded and clean; if the process
1045 * is marked dirty, the timer will be cancelled. Upon being subsequently marked clean, the deferment will either be
1046 * re-enabled or the guard state cleared, depending on whether the guard deadline has passed.
1047 */
1048
1049 int
1050 memorystatus_dirty_track(proc_t p, uint32_t pcontrol) {
1051 unsigned int old_dirty;
1052 boolean_t reschedule = FALSE;
1053 int ret;
1054
1055 proc_list_lock();
1056
1057 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
1058 ret = EPERM;
1059 goto exit;
1060 }
1061
1062 if (!memorystatus_validate_track_flags(p, pcontrol)) {
1063 ret = EINVAL;
1064 goto exit;
1065 }
1066
1067 old_dirty = p->p_memstat_dirty;
1068
1069 /* These bits are cumulative, as per <rdar://problem/11159924> */
1070 if (pcontrol & PROC_DIRTY_TRACK) {
1071 p->p_memstat_dirty |= P_DIRTY_TRACK;
1072 }
1073
1074 if (pcontrol & PROC_DIRTY_ALLOW_IDLE_EXIT) {
1075 p->p_memstat_dirty |= P_DIRTY_ALLOW_IDLE_EXIT;
1076 }
1077
1078 /* This can be set and cleared exactly once. */
1079 if ((pcontrol & PROC_DIRTY_DEFER) && !(old_dirty & P_DIRTY_DEFER)) {
1080 p->p_memstat_dirty |= (P_DIRTY_DEFER|P_DIRTY_DEFER_IN_PROGRESS);
1081 } else {
1082 p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS;
1083 }
1084
1085 MEMORYSTATUS_DEBUG(1, "memorystatus_on_track_dirty(): set idle-exit %s / deferred %s / dirty %s for process %d\n",
1086 ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) ? "Y" : "N",
1087 p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS ? "Y" : "N",
1088 p->p_memstat_dirty & P_DIRTY ? "Y" : "N",
1089 p->p_pid);
1090
1091 /* Kick off or invalidate the idle exit deferment if there's a state transition. */
1092 if (!(p->p_memstat_dirty & P_DIRTY_IS_DIRTY)) {
1093 if (((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) &&
1094 (p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) && !(old_dirty & P_DIRTY_DEFER_IN_PROGRESS)) {
1095 memorystatus_schedule_idle_demotion_locked(p, TRUE);
1096 reschedule = TRUE;
1097 } else if (!(p->p_memstat_dirty & P_DIRTY_DEFER_IN_PROGRESS) && (old_dirty & P_DIRTY_DEFER_IN_PROGRESS)) {
1098 memorystatus_invalidate_idle_demotion_locked(p, TRUE);
1099 reschedule = TRUE;
1100 }
1101 }
1102
1103 memorystatus_update_idle_priority_locked(p);
1104
1105 if (reschedule) {
1106 memorystatus_reschedule_idle_demotion_locked();
1107 }
1108
1109 ret = 0;
1110
1111 exit:
1112 proc_list_unlock();
1113
1114 return ret;
1115 }
1116
1117 int
1118 memorystatus_dirty_set(proc_t p, boolean_t self, uint32_t pcontrol) {
1119 int ret;
1120 boolean_t kill = false;
1121 boolean_t reschedule = FALSE;
1122 boolean_t was_dirty = FALSE;
1123 boolean_t now_dirty = FALSE;
1124
1125 MEMORYSTATUS_DEBUG(1, "memorystatus_dirty_set(): %d %d 0x%x 0x%x\n", self, p->p_pid, pcontrol, p->p_memstat_dirty);
1126
1127 proc_list_lock();
1128
1129 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
1130 ret = EPERM;
1131 goto exit;
1132 }
1133
1134 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY)
1135 was_dirty = TRUE;
1136
1137 if (!(p->p_memstat_dirty & P_DIRTY_TRACK)) {
1138 /* Dirty tracking not enabled */
1139 ret = EINVAL;
1140 } else if (pcontrol && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) {
1141 /*
1142 * Process is set to be terminated and we're attempting to mark it dirty.
1143 * Set for termination and marking as clean is OK - see <rdar://problem/10594349>.
1144 */
1145 ret = EBUSY;
1146 } else {
1147 int flag = (self == TRUE) ? P_DIRTY : P_DIRTY_SHUTDOWN;
1148 if (pcontrol && !(p->p_memstat_dirty & flag)) {
1149 /* Mark the process as having been dirtied at some point */
1150 p->p_memstat_dirty |= (flag | P_DIRTY_MARKED);
1151 memorystatus_dirty_count++;
1152 ret = 0;
1153 } else if ((pcontrol == 0) && (p->p_memstat_dirty & flag)) {
1154 if ((flag == P_DIRTY_SHUTDOWN) && (!p->p_memstat_dirty & P_DIRTY)) {
1155 /* Clearing the dirty shutdown flag, and the process is otherwise clean - kill */
1156 p->p_memstat_dirty |= P_DIRTY_TERMINATED;
1157 kill = true;
1158 } else if ((flag == P_DIRTY) && (p->p_memstat_dirty & P_DIRTY_TERMINATED)) {
1159 /* Kill previously terminated processes if set clean */
1160 kill = true;
1161 }
1162 p->p_memstat_dirty &= ~flag;
1163 memorystatus_dirty_count--;
1164 ret = 0;
1165 } else {
1166 /* Already set */
1167 ret = EALREADY;
1168 }
1169 }
1170
1171 if (ret != 0) {
1172 goto exit;
1173 }
1174
1175 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY)
1176 now_dirty = TRUE;
1177
1178 if ((was_dirty == TRUE && now_dirty == FALSE) ||
1179 (was_dirty == FALSE && now_dirty == TRUE)) {
1180
1181 /* Manage idle exit deferral, if applied */
1182 if ((p->p_memstat_dirty & (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) ==
1183 (P_DIRTY_IDLE_EXIT_ENABLED|P_DIRTY_DEFER_IN_PROGRESS)) {
1184 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
1185 memorystatus_invalidate_idle_demotion_locked(p, FALSE);
1186 reschedule = TRUE;
1187 } else {
1188 /* We evaluate lazily, so reset the idle-deadline if it's expired by the time the process becomes clean. */
1189 if (mach_absolute_time() >= p->p_memstat_idledeadline) {
1190 p->p_memstat_idledeadline = 0;
1191 p->p_memstat_dirty &= ~P_DIRTY_DEFER_IN_PROGRESS;
1192 } else {
1193 memorystatus_schedule_idle_demotion_locked(p, FALSE);
1194 reschedule = TRUE;
1195 }
1196 }
1197 }
1198
1199 memorystatus_update_idle_priority_locked(p);
1200
1201 /* If the deferral state changed, reschedule the demotion timer */
1202 if (reschedule) {
1203 memorystatus_reschedule_idle_demotion_locked();
1204 }
1205 }
1206
1207 if (kill) {
1208 psignal(p, SIGKILL);
1209 }
1210
1211 exit:
1212 proc_list_unlock();
1213
1214 return ret;
1215 }
1216
1217 int
1218 memorystatus_dirty_get(proc_t p) {
1219 int ret = 0;
1220
1221 proc_list_lock();
1222
1223 if (p->p_memstat_dirty & P_DIRTY_TRACK) {
1224 ret |= PROC_DIRTY_TRACKED;
1225 if (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) {
1226 ret |= PROC_DIRTY_ALLOWS_IDLE_EXIT;
1227 }
1228 if (p->p_memstat_dirty & P_DIRTY) {
1229 ret |= PROC_DIRTY_IS_DIRTY;
1230 }
1231 }
1232
1233 proc_list_unlock();
1234
1235 return ret;
1236 }
1237
1238 int
1239 memorystatus_on_terminate(proc_t p) {
1240 int sig;
1241
1242 proc_list_lock();
1243
1244 p->p_memstat_dirty |= P_DIRTY_TERMINATED;
1245
1246 if ((p->p_memstat_dirty & (P_DIRTY_TRACK|P_DIRTY_IS_DIRTY)) == P_DIRTY_TRACK) {
1247 /* Clean; mark as terminated and issue SIGKILL */
1248 sig = SIGKILL;
1249 } else {
1250 /* Dirty, terminated, or state tracking is unsupported; issue SIGTERM to allow cleanup */
1251 sig = SIGTERM;
1252 }
1253
1254 proc_list_unlock();
1255
1256 return sig;
1257 }
1258
1259 void
1260 memorystatus_on_suspend(proc_t p)
1261 {
1262 #if CONFIG_FREEZE
1263 uint32_t pages;
1264 memorystatus_get_task_page_counts(p->task, &pages, NULL);
1265 #endif
1266 proc_list_lock();
1267 #if CONFIG_FREEZE
1268 p->p_memstat_suspendedfootprint = pages;
1269 memorystatus_suspended_footprint_total += pages;
1270 memorystatus_suspended_count++;
1271 #endif
1272 p->p_memstat_state |= P_MEMSTAT_SUSPENDED;
1273 proc_list_unlock();
1274 }
1275
1276 void
1277 memorystatus_on_resume(proc_t p)
1278 {
1279 #if CONFIG_FREEZE
1280 boolean_t frozen;
1281 pid_t pid;
1282 #endif
1283
1284 proc_list_lock();
1285
1286 #if CONFIG_FREEZE
1287 frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN);
1288 if (frozen) {
1289 memorystatus_frozen_count--;
1290 p->p_memstat_state |= P_MEMSTAT_PRIOR_THAW;
1291 }
1292
1293 memorystatus_suspended_footprint_total -= p->p_memstat_suspendedfootprint;
1294 memorystatus_suspended_count--;
1295
1296 pid = p->p_pid;
1297 #endif
1298
1299 p->p_memstat_state &= ~(P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN);
1300
1301 proc_list_unlock();
1302
1303 #if CONFIG_FREEZE
1304 if (frozen) {
1305 memorystatus_freeze_entry_t data = { pid, FALSE, 0 };
1306 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
1307 }
1308 #endif
1309 }
1310
1311 void
1312 memorystatus_on_inactivity(proc_t p)
1313 {
1314 #pragma unused(p)
1315 #if CONFIG_FREEZE
1316 /* Wake the freeze thread */
1317 thread_wakeup((event_t)&memorystatus_freeze_wakeup);
1318 #endif
1319 }
1320
1321 static uint32_t
1322 memorystatus_build_state(proc_t p) {
1323 uint32_t snapshot_state = 0;
1324
1325 /* General */
1326 if (p->p_memstat_state & P_MEMSTAT_SUSPENDED) {
1327 snapshot_state |= kMemorystatusSuspended;
1328 }
1329 if (p->p_memstat_state & P_MEMSTAT_FROZEN) {
1330 snapshot_state |= kMemorystatusFrozen;
1331 }
1332 if (p->p_memstat_state & P_MEMSTAT_PRIOR_THAW) {
1333 snapshot_state |= kMemorystatusWasThawed;
1334 }
1335
1336 /* Tracking */
1337 if (p->p_memstat_dirty & P_DIRTY_TRACK) {
1338 snapshot_state |= kMemorystatusTracked;
1339 }
1340 if ((p->p_memstat_dirty & P_DIRTY_IDLE_EXIT_ENABLED) == P_DIRTY_IDLE_EXIT_ENABLED) {
1341 snapshot_state |= kMemorystatusSupportsIdleExit;
1342 }
1343 if (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) {
1344 snapshot_state |= kMemorystatusDirty;
1345 }
1346
1347 return snapshot_state;
1348 }
1349
1350 #if !CONFIG_JETSAM
1351
1352 static boolean_t
1353 kill_idle_exit_proc(void)
1354 {
1355 proc_t p, victim_p = PROC_NULL;
1356 uint64_t current_time;
1357 boolean_t killed = FALSE;
1358 unsigned int i = 0;
1359
1360 /* Pick next idle exit victim. */
1361 current_time = mach_absolute_time();
1362
1363 proc_list_lock();
1364
1365 p = memorystatus_get_first_proc_locked(&i, FALSE);
1366 while (p) {
1367 /* No need to look beyond the idle band */
1368 if (p->p_memstat_effectivepriority != JETSAM_PRIORITY_IDLE) {
1369 break;
1370 }
1371
1372 if ((p->p_memstat_dirty & (P_DIRTY_ALLOW_IDLE_EXIT|P_DIRTY_IS_DIRTY|P_DIRTY_TERMINATED)) == (P_DIRTY_ALLOW_IDLE_EXIT)) {
1373 if (current_time >= p->p_memstat_idledeadline) {
1374 p->p_memstat_dirty |= P_DIRTY_TERMINATED;
1375 victim_p = proc_ref_locked(p);
1376 break;
1377 }
1378 }
1379
1380 p = memorystatus_get_next_proc_locked(&i, p, FALSE);
1381 }
1382
1383 proc_list_unlock();
1384
1385 if (victim_p) {
1386 printf("memorystatus_thread: idle exiting pid %d [%s]\n", victim_p->p_pid, (victim_p->p_comm ? victim_p->p_comm : "(unknown)"));
1387 killed = memorystatus_do_kill(victim_p, kMemorystatusKilledIdleExit);
1388 proc_rele(victim_p);
1389 }
1390
1391 return killed;
1392 }
1393 #endif
1394
1395 static void
1396 memorystatus_thread_wake(void) {
1397 thread_wakeup((event_t)&memorystatus_wakeup);
1398 }
1399
1400 static int
1401 memorystatus_thread_block(uint32_t interval_ms, thread_continue_t continuation)
1402 {
1403 if (interval_ms) {
1404 assert_wait_timeout(&memorystatus_wakeup, THREAD_UNINT, interval_ms, 1000 * NSEC_PER_USEC);
1405 } else {
1406 assert_wait(&memorystatus_wakeup, THREAD_UNINT);
1407 }
1408
1409 return thread_block(continuation);
1410 }
1411
1412 extern boolean_t vm_compressor_thrashing_detected;
1413 extern uint64_t vm_compressor_total_compressions(void);
1414
1415 static void
1416 memorystatus_thread(void *param __unused, wait_result_t wr __unused)
1417 {
1418 static boolean_t is_vm_privileged = FALSE;
1419 #if CONFIG_JETSAM
1420 boolean_t post_snapshot = FALSE;
1421 uint32_t errors = 0;
1422 #endif
1423
1424 if (is_vm_privileged == FALSE) {
1425 /*
1426 * It's the first time the thread has run, so just mark the thread as privileged and block.
1427 * This avoids a spurious pass with unset variables, as set out in <rdar://problem/9609402>.
1428 */
1429 thread_wire(host_priv_self(), current_thread(), TRUE);
1430 is_vm_privileged = TRUE;
1431
1432 memorystatus_thread_block(0, memorystatus_thread);
1433 }
1434
1435 #if CONFIG_JETSAM
1436
1437 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_START,
1438 memorystatus_available_pages, 0, 0, 0, 0);
1439
1440 uint32_t cause = vm_compressor_thrashing_detected ? kMemorystatusKilledVMThrashing : kMemorystatusKilledVMPageShortage;
1441
1442 /* Jetsam aware version.
1443 *
1444 * If woken under pressure, go down the path of killing:
1445 *
1446 * - processes exceeding their highwater mark if no clean victims available
1447 * - the least recently used process if no highwater mark victims available
1448 */
1449 #if !LATENCY_JETSAM
1450 while (vm_compressor_thrashing_detected || memorystatus_available_pages <= memorystatus_available_pages_critical) {
1451 #else
1452 while (kill_under_pressure) {
1453 const uint32_t SNAPSHOT_WAIT_TIMEOUT_MS = 100;
1454 wait_result_t wait_result;
1455 #endif
1456 boolean_t killed;
1457 int32_t priority;
1458
1459 #if LEGACY_HIWATER
1460 /* Highwater */
1461 killed = memorystatus_kill_hiwat_proc(&errors);
1462 if (killed) {
1463 post_snapshot = TRUE;
1464 goto done;
1465 }
1466 #endif
1467
1468 /* LRU */
1469 killed = memorystatus_kill_top_process(TRUE, cause, &priority, &errors);
1470 if (killed) {
1471 if (!kill_under_pressure && (priority != JETSAM_PRIORITY_IDLE)) {
1472 /* Don't generate logs for steady-state idle-exit kills */
1473 post_snapshot = TRUE;
1474 }
1475 goto done;
1476 }
1477
1478 /* Under pressure and unable to kill a process - panic */
1479 panic("memorystatus_jetsam_thread: no victim! available pages:%d\n", memorystatus_available_pages);
1480
1481 done:
1482 kill_under_pressure = FALSE;
1483 vm_compressor_thrashing_detected = FALSE;
1484
1485 #if LATENCY_JETSAM
1486 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_LATENCY_COALESCE) | DBG_FUNC_START,
1487 memorystatus_available_pages, 0, 0, 0, 0);
1488 thread_wakeup((event_t)&latency_jetsam_wakeup);
1489 /*
1490 * Coalesce snapshot reports in the face of repeated jetsams by blocking here with a timeout.
1491 * If the wait expires, issue the note.
1492 */
1493 wait_result = memorystatus_thread_block(SNAPSHOT_WAIT_TIMEOUT_MS, THREAD_CONTINUE_NULL);
1494 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_LATENCY_COALESCE) | DBG_FUNC_END,
1495 memorystatus_available_pages, 0, 0, 0, 0);
1496 if (wait_result != THREAD_AWAKENED) {
1497 /* Catch-all */
1498 break;
1499 }
1500 #endif
1501 }
1502
1503 if (errors) {
1504 memorystatus_clear_errors();
1505 }
1506
1507 #if VM_PRESSURE_EVENTS
1508 memorystatus_update_vm_pressure(TRUE);
1509 #endif
1510
1511 if (post_snapshot) {
1512 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
1513 sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count);
1514 memorystatus_jetsam_snapshot->notification_time = mach_absolute_time();
1515 memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
1516 }
1517
1518 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_SCAN) | DBG_FUNC_END,
1519 memorystatus_available_pages, 0, 0, 0, 0);
1520
1521 #else /* CONFIG_JETSAM */
1522
1523 /* Simple version.
1524 *
1525 * Jetsam not enabled, so just kill the first suitable clean process
1526 * and sleep.
1527 */
1528
1529 if (kill_idle_exit) {
1530 kill_idle_exit_proc();
1531 kill_idle_exit = FALSE;
1532 }
1533
1534 #endif /* CONFIG_JETSAM */
1535
1536 memorystatus_thread_block(0, memorystatus_thread);
1537 }
1538
1539 #if !CONFIG_JETSAM
1540 boolean_t memorystatus_idle_exit_from_VM(void) {
1541 kill_idle_exit = TRUE;
1542 memorystatus_thread_wake();
1543 return TRUE;
1544 }
1545 #endif
1546
1547 #if CONFIG_JETSAM
1548
1549 /*
1550 * Callback invoked when allowable physical memory footprint exceeded
1551 * (dirty pages + IOKit mappings)
1552 *
1553 * This is invoked for both advisory, non-fatal per-task high watermarks,
1554 * as well as the fatal system-wide task memory limit.
1555 */
1556 void
1557 memorystatus_on_ledger_footprint_exceeded(boolean_t warning, const int max_footprint_mb)
1558 {
1559 proc_t p = current_proc();
1560
1561 printf("process %d (%s) %s physical memory footprint limit of %d MB\n",
1562 p->p_pid, p->p_comm,
1563 warning ? "approaching" : "exceeded",
1564 max_footprint_mb);
1565
1566 #if VM_PRESSURE_EVENTS
1567 if (warning == TRUE) {
1568 if (memorystatus_warn_process(p->p_pid) != TRUE) {
1569 /* Print warning, since it's possible that task has not registered for pressure notifications */
1570 printf("task_exceeded_footprint: failed to warn the current task (exiting?).\n");
1571 }
1572 return;
1573 }
1574 #endif /* VM_PRESSURE_EVENTS */
1575
1576 if (p->p_memstat_memlimit <= 0) {
1577 /*
1578 * If this process has no high watermark, then we have been invoked because the task
1579 * has violated the system-wide per-task memory limit.
1580 */
1581 if (memorystatus_kill_process_sync(p->p_pid, kMemorystatusKilledPerProcessLimit) != TRUE) {
1582 printf("task_exceeded_footprint: failed to kill the current task (exiting?).\n");
1583 }
1584 }
1585 }
1586
1587 static void
1588 memorystatus_get_task_page_counts(task_t task, uint32_t *footprint, uint32_t *max_footprint)
1589 {
1590 assert(task);
1591 assert(footprint);
1592
1593 *footprint = (uint32_t)(get_task_phys_footprint(task) / PAGE_SIZE_64);
1594 if (max_footprint) {
1595 *max_footprint = (uint32_t)(get_task_phys_footprint_max(task) / PAGE_SIZE_64);
1596 }
1597 }
1598
1599 static int
1600 memorystatus_send_note(int event_code, void *data, size_t data_length) {
1601 int ret;
1602 struct kev_msg ev_msg;
1603
1604 ev_msg.vendor_code = KEV_VENDOR_APPLE;
1605 ev_msg.kev_class = KEV_SYSTEM_CLASS;
1606 ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS;
1607
1608 ev_msg.event_code = event_code;
1609
1610 ev_msg.dv[0].data_length = data_length;
1611 ev_msg.dv[0].data_ptr = data;
1612 ev_msg.dv[1].data_length = 0;
1613
1614 ret = kev_post_msg(&ev_msg);
1615 if (ret) {
1616 printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
1617 }
1618
1619 return ret;
1620 }
1621
1622 static void
1623 memorystatus_update_snapshot_locked(proc_t p, uint32_t kill_cause)
1624 {
1625 unsigned int i;
1626
1627 for (i = 0; i < memorystatus_jetsam_snapshot_count; i++) {
1628 if (memorystatus_jetsam_snapshot_list[i].pid == p->p_pid) {
1629 /* Update if the priority has changed since the snapshot was taken */
1630 if (memorystatus_jetsam_snapshot_list[i].priority != p->p_memstat_effectivepriority) {
1631 memorystatus_jetsam_snapshot_list[i].priority = p->p_memstat_effectivepriority;
1632 strlcpy(memorystatus_jetsam_snapshot_list[i].name, p->p_comm, MAXCOMLEN+1);
1633 memorystatus_jetsam_snapshot_list[i].state = memorystatus_build_state(p);
1634 memorystatus_jetsam_snapshot_list[i].user_data = p->p_memstat_userdata;
1635 memorystatus_jetsam_snapshot_list[i].fds = p->p_fd->fd_nfiles;
1636 }
1637 memorystatus_jetsam_snapshot_list[i].killed = kill_cause;
1638 return;
1639 }
1640 }
1641 }
1642
1643 void memorystatus_pages_update(unsigned int pages_avail)
1644 {
1645 boolean_t critical, delta;
1646
1647 if (!memorystatus_delta) {
1648 return;
1649 }
1650
1651 critical = (pages_avail < memorystatus_available_pages_critical) ? TRUE : FALSE;
1652 delta = ((pages_avail >= (memorystatus_available_pages + memorystatus_delta))
1653 || (memorystatus_available_pages >= (pages_avail + memorystatus_delta))) ? TRUE : FALSE;
1654
1655 if (critical || delta) {
1656 memorystatus_available_pages = pages_avail;
1657 memorystatus_level = memorystatus_available_pages * 100 / atop_64(max_mem);
1658
1659 #if LATENCY_JETSAM
1660 /* Bail early to avoid excessive wake-ups */
1661 if (critical) {
1662 return;
1663 }
1664 #endif
1665
1666 memorystatus_thread_wake();
1667 }
1668 }
1669
1670 static boolean_t
1671 memorystatus_get_snapshot_properties_for_proc_locked(proc_t p, memorystatus_jetsam_snapshot_entry_t *entry)
1672 {
1673 memset(entry, 0, sizeof(memorystatus_jetsam_snapshot_entry_t));
1674
1675 entry->pid = p->p_pid;
1676 strlcpy(&entry->name[0], p->p_comm, MAXCOMLEN+1);
1677 entry->priority = p->p_memstat_effectivepriority;
1678 memorystatus_get_task_page_counts(p->task, &entry->pages, &entry->max_pages);
1679 entry->state = memorystatus_build_state(p);
1680 entry->user_data = p->p_memstat_userdata;
1681 memcpy(&entry->uuid[0], &p->p_uuid[0], sizeof(p->p_uuid));
1682
1683 return TRUE;
1684 }
1685
1686 static void
1687 memorystatus_jetsam_snapshot_procs_locked(void)
1688 {
1689 proc_t p, next_p;
1690 unsigned int b = 0, i = 0;
1691 kern_return_t kr = KERN_SUCCESS;
1692
1693 mach_msg_type_number_t count = HOST_VM_INFO64_COUNT;
1694 vm_statistics64_data_t vm_stat;
1695
1696 if ((kr = host_statistics64(host_self(), HOST_VM_INFO64, (host_info64_t)&vm_stat, &count) != KERN_SUCCESS)) {
1697 printf("memorystatus_jetsam_snapshot_procs_locked: host_statistics64 failed with %d\n", kr);
1698 memset(&memorystatus_jetsam_snapshot->stats, 0, sizeof(memorystatus_jetsam_snapshot->stats));
1699 } else {
1700 memorystatus_jetsam_snapshot->stats.free_pages = vm_stat.free_count;
1701 memorystatus_jetsam_snapshot->stats.active_pages = vm_stat.active_count;
1702 memorystatus_jetsam_snapshot->stats.inactive_pages = vm_stat.inactive_count;
1703 memorystatus_jetsam_snapshot->stats.throttled_pages = vm_stat.throttled_count;
1704 memorystatus_jetsam_snapshot->stats.purgeable_pages = vm_stat.purgeable_count;
1705 memorystatus_jetsam_snapshot->stats.wired_pages = vm_stat.wire_count;
1706
1707 memorystatus_jetsam_snapshot->stats.speculative_pages = vm_stat.speculative_count;
1708 memorystatus_jetsam_snapshot->stats.filebacked_pages = vm_stat.external_page_count;
1709 memorystatus_jetsam_snapshot->stats.anonymous_pages = vm_stat.internal_page_count;
1710 memorystatus_jetsam_snapshot->stats.compressions = vm_stat.compressions;
1711 memorystatus_jetsam_snapshot->stats.decompressions = vm_stat.decompressions;
1712 memorystatus_jetsam_snapshot->stats.compressor_pages = vm_stat.compressor_page_count;
1713 memorystatus_jetsam_snapshot->stats.total_uncompressed_pages_in_compressor = vm_stat.total_uncompressed_pages_in_compressor;
1714 }
1715
1716 next_p = memorystatus_get_first_proc_locked(&b, TRUE);
1717 while (next_p) {
1718 p = next_p;
1719 next_p = memorystatus_get_next_proc_locked(&b, p, TRUE);
1720
1721 if (FALSE == memorystatus_get_snapshot_properties_for_proc_locked(p, &memorystatus_jetsam_snapshot_list[i])) {
1722 continue;
1723 }
1724
1725 MEMORYSTATUS_DEBUG(0, "jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
1726 p->p_pid,
1727 p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7],
1728 p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]);
1729
1730 if (++i == memorystatus_jetsam_snapshot_max) {
1731 break;
1732 }
1733 }
1734
1735 memorystatus_jetsam_snapshot->snapshot_time = mach_absolute_time();
1736 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = i;
1737 }
1738
1739 #if DEVELOPMENT || DEBUG
1740
1741 static int
1742 memorystatus_cmd_set_panic_bits(user_addr_t buffer, uint32_t buffer_size) {
1743 int ret;
1744 memorystatus_jetsam_panic_options_t debug;
1745
1746 if (buffer_size != sizeof(memorystatus_jetsam_panic_options_t)) {
1747 return EINVAL;
1748 }
1749
1750 ret = copyin(buffer, &debug, buffer_size);
1751 if (ret) {
1752 return ret;
1753 }
1754
1755 /* Panic bits match kMemorystatusKilled* enum */
1756 memorystatus_jetsam_panic_debug = (memorystatus_jetsam_panic_debug & ~debug.mask) | (debug.data & debug.mask);
1757
1758 /* Copyout new value */
1759 debug.data = memorystatus_jetsam_panic_debug;
1760 ret = copyout(&debug, buffer, sizeof(memorystatus_jetsam_panic_options_t));
1761
1762 return ret;
1763 }
1764
1765 #endif
1766
1767 /*
1768 * Jetsam a specific process.
1769 */
1770 static boolean_t
1771 memorystatus_kill_specific_process(pid_t victim_pid, uint32_t cause) {
1772 boolean_t killed;
1773 proc_t p;
1774
1775 /* TODO - add a victim queue and push this into the main jetsam thread */
1776
1777 p = proc_find(victim_pid);
1778 if (!p) {
1779 return FALSE;
1780 }
1781
1782 printf("memorystatus: specifically killing pid %d [%s] - memorystatus_available_pages: %d\n",
1783 victim_pid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages);
1784
1785 proc_list_lock();
1786
1787 if (memorystatus_jetsam_snapshot_count == 0) {
1788 memorystatus_jetsam_snapshot_procs_locked();
1789 }
1790
1791 memorystatus_update_snapshot_locked(p, cause);
1792 proc_list_unlock();
1793
1794 killed = memorystatus_do_kill(p, cause);
1795 proc_rele(p);
1796
1797 return killed;
1798 }
1799
1800 /*
1801 * Jetsam the first process in the queue.
1802 */
1803 static boolean_t
1804 memorystatus_kill_top_process(boolean_t any, uint32_t cause, int32_t *priority, uint32_t *errors)
1805 {
1806 pid_t aPid;
1807 proc_t p = PROC_NULL, next_p = PROC_NULL;
1808 boolean_t new_snapshot = FALSE, killed = FALSE;
1809 unsigned int i = 0;
1810
1811 #ifndef CONFIG_FREEZE
1812 #pragma unused(any)
1813 #endif
1814
1815 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_START,
1816 memorystatus_available_pages, 0, 0, 0, 0);
1817
1818 proc_list_lock();
1819
1820 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
1821 while (next_p) {
1822 #if DEVELOPMENT || DEBUG
1823 int activeProcess;
1824 int procSuspendedForDiagnosis;
1825 #endif /* DEVELOPMENT || DEBUG */
1826
1827 p = next_p;
1828 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
1829
1830 #if DEVELOPMENT || DEBUG
1831 activeProcess = p->p_memstat_state & P_MEMSTAT_FOREGROUND;
1832 procSuspendedForDiagnosis = p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED;
1833 #endif /* DEVELOPMENT || DEBUG */
1834
1835 aPid = p->p_pid;
1836
1837 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
1838 continue;
1839 }
1840
1841 #if DEVELOPMENT || DEBUG
1842 if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && procSuspendedForDiagnosis) {
1843 printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid);
1844 continue;
1845 }
1846 #endif /* DEVELOPMENT || DEBUG */
1847
1848 #if CONFIG_FREEZE
1849 boolean_t skip;
1850 boolean_t reclaim_proc = !(p->p_memstat_state & (P_MEMSTAT_LOCKED | P_MEMSTAT_NORECLAIM));
1851 if (any || reclaim_proc) {
1852 skip = FALSE;
1853 } else {
1854 skip = TRUE;
1855 }
1856
1857 if (skip) {
1858 continue;
1859 } else
1860 #endif
1861 {
1862 if (priority) {
1863 *priority = p->p_memstat_effectivepriority;
1864 }
1865
1866 /*
1867 * Capture a snapshot if none exists and:
1868 * - priority was not requested (this is something other than an ambient kill)
1869 * - the priority was requested *and* the targeted process is not at idle priority
1870 */
1871 if ((memorystatus_jetsam_snapshot_count == 0) &&
1872 ((!priority) || (priority && (*priority != JETSAM_PRIORITY_IDLE)))) {
1873 memorystatus_jetsam_snapshot_procs_locked();
1874 new_snapshot = TRUE;
1875 }
1876
1877 /*
1878 * Mark as terminated so that if exit1() indicates success, but the process (for example)
1879 * is blocked in task_exception_notify(), it'll be skipped if encountered again - see
1880 * <rdar://problem/13553476>. This is cheaper than examining P_LEXIT, which requires the
1881 * acquisition of the proc lock.
1882 */
1883 p->p_memstat_state |= P_MEMSTAT_TERMINATED;
1884
1885 #if DEVELOPMENT || DEBUG
1886 if ((memorystatus_jetsam_policy & kPolicyDiagnoseActive) && activeProcess) {
1887 MEMORYSTATUS_DEBUG(1, "jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n",
1888 aPid, (p->p_comm ? p->p_comm: "(unknown)"), memorystatus_level);
1889 memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic);
1890 p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED;
1891 if (memorystatus_jetsam_policy & kPolicyDiagnoseFirst) {
1892 jetsam_diagnostic_suspended_one_active_proc = 1;
1893 printf("jetsam: returning after suspending first active proc - %d\n", aPid);
1894 }
1895
1896 p = proc_ref_locked(p);
1897 proc_list_unlock();
1898 if (p) {
1899 task_suspend(p->task);
1900 proc_rele(p);
1901 killed = TRUE;
1902 }
1903
1904 goto exit;
1905 } else
1906 #endif /* DEVELOPMENT || DEBUG */
1907 {
1908 /* Shift queue, update stats */
1909 memorystatus_update_snapshot_locked(p, cause);
1910
1911 p = proc_ref_locked(p);
1912 proc_list_unlock();
1913 if (p) {
1914 printf("memorystatus: jetsam killing pid %d [%s] - memorystatus_available_pages: %d\n",
1915 aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages);
1916 killed = memorystatus_do_kill(p, cause);
1917 }
1918
1919 /* Success? */
1920 if (killed) {
1921 proc_rele(p);
1922 goto exit;
1923 }
1924
1925 /* Failure - unwind and restart. */
1926 proc_list_lock();
1927 proc_rele_locked(p);
1928 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
1929 p->p_memstat_state |= P_MEMSTAT_ERROR;
1930 *errors += 1;
1931 i = 0;
1932 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
1933 }
1934 }
1935 }
1936
1937 proc_list_unlock();
1938
1939 exit:
1940 /* Clear snapshot if freshly captured and no target was found */
1941 if (new_snapshot && !killed) {
1942 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
1943 }
1944
1945 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM) | DBG_FUNC_END,
1946 memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0);
1947
1948 return killed;
1949 }
1950
1951 #if LEGACY_HIWATER
1952
1953 static boolean_t
1954 memorystatus_kill_hiwat_proc(uint32_t *errors)
1955 {
1956 pid_t aPid = 0;
1957 proc_t p = PROC_NULL, next_p = PROC_NULL;
1958 boolean_t new_snapshot = FALSE, killed = FALSE;
1959 unsigned int i = 0;
1960
1961 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_START,
1962 memorystatus_available_pages, 0, 0, 0, 0);
1963
1964 proc_list_lock();
1965
1966 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
1967 while (next_p) {
1968 uint32_t footprint;
1969 boolean_t skip;
1970
1971 p = next_p;
1972 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
1973
1974 aPid = p->p_pid;
1975
1976 if (p->p_memstat_state & (P_MEMSTAT_ERROR | P_MEMSTAT_TERMINATED)) {
1977 continue;
1978 }
1979
1980 /* skip if no limit set */
1981 if (p->p_memstat_memlimit <= 0) {
1982 continue;
1983 }
1984
1985 /* skip if a currently inapplicable limit is encountered */
1986 if ((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) {
1987 continue;
1988 }
1989
1990 footprint = (uint32_t)(get_task_phys_footprint(p->task) / (1024 * 1024));
1991 skip = (((int32_t)footprint) <= p->p_memstat_memlimit);
1992 #if DEVELOPMENT || DEBUG
1993 if (!skip && (memorystatus_jetsam_policy & kPolicyDiagnoseActive)) {
1994 if (p->p_memstat_state & P_MEMSTAT_DIAG_SUSPENDED) {
1995 continue;
1996 }
1997 }
1998 #endif /* DEVELOPMENT || DEBUG */
1999
2000 #if CONFIG_FREEZE
2001 if (!skip) {
2002 if (p->p_memstat_state & P_MEMSTAT_LOCKED) {
2003 skip = TRUE;
2004 } else {
2005 skip = FALSE;
2006 }
2007 }
2008 #endif
2009
2010 if (skip) {
2011 continue;
2012 } else {
2013 MEMORYSTATUS_DEBUG(1, "jetsam: %s pid %d [%s] - %d pages > 1 (%d)\n",
2014 (memorystatus_jetsam_policy & kPolicyDiagnoseActive) ? "suspending": "killing", aPid, p->p_comm, pages, hiwat);
2015
2016 if (memorystatus_jetsam_snapshot_count == 0) {
2017 memorystatus_jetsam_snapshot_procs_locked();
2018 new_snapshot = TRUE;
2019 }
2020
2021 p->p_memstat_state |= P_MEMSTAT_TERMINATED;
2022
2023 #if DEVELOPMENT || DEBUG
2024 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) {
2025 MEMORYSTATUS_DEBUG(1, "jetsam: pid %d suspended for diagnosis - memorystatus_available_pages: %d\n", aPid, memorystatus_available_pages);
2026 memorystatus_update_snapshot_locked(p, kMemorystatusKilledDiagnostic);
2027 p->p_memstat_state |= P_MEMSTAT_DIAG_SUSPENDED;
2028
2029 p = proc_ref_locked(p);
2030 proc_list_unlock();
2031 if (p) {
2032 task_suspend(p->task);
2033 proc_rele(p);
2034 killed = TRUE;
2035 }
2036
2037 goto exit;
2038 } else
2039 #endif /* DEVELOPMENT || DEBUG */
2040 {
2041 memorystatus_update_snapshot_locked(p, kMemorystatusKilledHiwat);
2042
2043 p = proc_ref_locked(p);
2044 proc_list_unlock();
2045 if (p) {
2046 printf("memorystatus: jetsam killing pid %d [%s] (highwater) - memorystatus_available_pages: %d\n",
2047 aPid, (p->p_comm ? p->p_comm : "(unknown)"), memorystatus_available_pages);
2048 killed = memorystatus_do_kill(p, kMemorystatusKilledHiwat);
2049 }
2050
2051 /* Success? */
2052 if (killed) {
2053 proc_rele(p);
2054 goto exit;
2055 }
2056
2057 /* Failure - unwind and restart. */
2058 proc_list_lock();
2059 proc_rele_locked(p);
2060 p->p_memstat_state &= ~P_MEMSTAT_TERMINATED;
2061 p->p_memstat_state |= P_MEMSTAT_ERROR;
2062 *errors += 1;
2063 i = 0;
2064 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
2065 }
2066 }
2067 }
2068
2069 proc_list_unlock();
2070
2071 exit:
2072 /* Clear snapshot if freshly captured and no target was found */
2073 if (new_snapshot && !killed) {
2074 memorystatus_jetsam_snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
2075 }
2076
2077 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_JETSAM_HIWAT) | DBG_FUNC_END,
2078 memorystatus_available_pages, killed ? aPid : 0, 0, 0, 0);
2079
2080 return killed;
2081 }
2082
2083 #endif /* LEGACY_HIWATER */
2084
2085 static boolean_t
2086 memorystatus_kill_process_async(pid_t victim_pid, uint32_t cause) {
2087 /* TODO: allow a general async path */
2088 if ((victim_pid != -1) || (cause != kMemorystatusKilledVMPageShortage || cause != kMemorystatusKilledVMThrashing)) {
2089 return FALSE;
2090 }
2091
2092 kill_under_pressure = TRUE;
2093 memorystatus_thread_wake();
2094 return TRUE;
2095 }
2096
2097 static boolean_t
2098 memorystatus_kill_process_sync(pid_t victim_pid, uint32_t cause) {
2099 boolean_t res;
2100 uint32_t errors = 0;
2101
2102 if (victim_pid == -1) {
2103 /* No pid, so kill first process */
2104 res = memorystatus_kill_top_process(TRUE, cause, NULL, &errors);
2105 } else {
2106 res = memorystatus_kill_specific_process(victim_pid, cause);
2107 }
2108
2109 if (errors) {
2110 memorystatus_clear_errors();
2111 }
2112
2113 if (res == TRUE) {
2114 /* Fire off snapshot notification */
2115 size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) +
2116 sizeof(memorystatus_jetsam_snapshot_entry_t) * memorystatus_jetsam_snapshot_count;
2117 memorystatus_jetsam_snapshot->notification_time = mach_absolute_time();
2118 memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size));
2119 }
2120
2121 return res;
2122 }
2123
2124 boolean_t
2125 memorystatus_kill_on_VM_page_shortage(boolean_t async) {
2126 if (async) {
2127 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMPageShortage);
2128 } else {
2129 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMPageShortage);
2130 }
2131 }
2132
2133 boolean_t
2134 memorystatus_kill_on_VM_thrashing(boolean_t async) {
2135 if (async) {
2136 return memorystatus_kill_process_async(-1, kMemorystatusKilledVMThrashing);
2137 } else {
2138 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVMThrashing);
2139 }
2140 }
2141
2142 boolean_t
2143 memorystatus_kill_on_vnode_limit(void) {
2144 return memorystatus_kill_process_sync(-1, kMemorystatusKilledVnodes);
2145 }
2146
2147 #endif /* CONFIG_JETSAM */
2148
2149 #if CONFIG_FREEZE
2150
2151 __private_extern__ void
2152 memorystatus_freeze_init(void)
2153 {
2154 kern_return_t result;
2155 thread_t thread;
2156
2157 result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread);
2158 if (result == KERN_SUCCESS) {
2159 thread_deallocate(thread);
2160 } else {
2161 panic("Could not create memorystatus_freeze_thread");
2162 }
2163 }
2164
2165 static int
2166 memorystatus_freeze_top_process(boolean_t *memorystatus_freeze_swap_low)
2167 {
2168 pid_t aPid = 0;
2169 int ret = -1;
2170 proc_t p = PROC_NULL, next_p = PROC_NULL;
2171 unsigned int i = 0;
2172
2173 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START,
2174 memorystatus_available_pages, 0, 0, 0, 0);
2175
2176 proc_list_lock();
2177
2178 next_p = memorystatus_get_first_proc_locked(&i, TRUE);
2179 while (next_p) {
2180 kern_return_t kr;
2181 uint32_t purgeable, wired, clean, dirty;
2182 boolean_t shared;
2183 uint32_t pages;
2184 uint32_t max_pages = 0;
2185 uint32_t state;
2186
2187 p = next_p;
2188 next_p = memorystatus_get_next_proc_locked(&i, p, TRUE);
2189
2190 aPid = p->p_pid;
2191 state = p->p_memstat_state;
2192
2193 /* Ensure the process is eligible for freezing */
2194 if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FROZEN)) || !(state & P_MEMSTAT_SUSPENDED)) {
2195 continue; // with lock held
2196 }
2197
2198 /* Only freeze processes meeting our minimum resident page criteria */
2199 memorystatus_get_task_page_counts(p->task, &pages, NULL);
2200 if (pages < memorystatus_freeze_pages_min) {
2201 continue; // with lock held
2202 }
2203
2204 if (DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) {
2205 /* Ensure there's enough free space to freeze this process. */
2206 max_pages = MIN(default_pager_swap_pages_free(), memorystatus_freeze_pages_max);
2207 if (max_pages < memorystatus_freeze_pages_min) {
2208 *memorystatus_freeze_swap_low = TRUE;
2209 proc_list_unlock();
2210 goto exit;
2211 }
2212 } else {
2213 max_pages = UINT32_MAX - 1;
2214 }
2215
2216 /* Mark as locked temporarily to avoid kill */
2217 p->p_memstat_state |= P_MEMSTAT_LOCKED;
2218
2219 p = proc_ref_locked(p);
2220 proc_list_unlock();
2221 if (!p) {
2222 goto exit;
2223 }
2224
2225 kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, FALSE);
2226
2227 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - "
2228 "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n",
2229 (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (p->p_comm ? p->p_comm : "(unknown)"),
2230 memorystatus_available_pages, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free());
2231
2232 proc_list_lock();
2233 p->p_memstat_state &= ~P_MEMSTAT_LOCKED;
2234
2235 /* Success? */
2236 if (KERN_SUCCESS == kr) {
2237 memorystatus_freeze_entry_t data = { aPid, TRUE, dirty };
2238
2239 memorystatus_frozen_count++;
2240
2241 p->p_memstat_state |= (P_MEMSTAT_FROZEN | (shared ? 0: P_MEMSTAT_NORECLAIM));
2242
2243 /* Update stats */
2244 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
2245 throttle_intervals[i].pageouts += dirty;
2246 }
2247
2248 memorystatus_freeze_pageouts += dirty;
2249 memorystatus_freeze_count++;
2250
2251 proc_list_unlock();
2252
2253 memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data));
2254
2255 /* Return the number of reclaimed pages */
2256 ret = dirty;
2257
2258 } else {
2259 proc_list_unlock();
2260 }
2261
2262 proc_rele(p);
2263 goto exit;
2264 }
2265
2266 proc_list_unlock();
2267
2268 exit:
2269 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END,
2270 memorystatus_available_pages, aPid, 0, 0, 0);
2271
2272 return ret;
2273 }
2274
2275 static inline boolean_t
2276 memorystatus_can_freeze_processes(void)
2277 {
2278 boolean_t ret;
2279
2280 proc_list_lock();
2281
2282 if (memorystatus_suspended_count) {
2283 uint32_t average_resident_pages, estimated_processes;
2284
2285 /* Estimate the number of suspended processes we can fit */
2286 average_resident_pages = memorystatus_suspended_footprint_total / memorystatus_suspended_count;
2287 estimated_processes = memorystatus_suspended_count +
2288 ((memorystatus_available_pages - memorystatus_available_pages_critical) / average_resident_pages);
2289
2290 /* If it's predicted that no freeze will occur, lower the threshold temporarily */
2291 if (estimated_processes <= FREEZE_SUSPENDED_THRESHOLD_DEFAULT) {
2292 memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_LOW;
2293 } else {
2294 memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT;
2295 }
2296
2297 MEMORYSTATUS_DEBUG(1, "memorystatus_can_freeze_processes: %d suspended processes, %d average resident pages / process, %d suspended processes estimated\n",
2298 memorystatus_suspended_count, average_resident_pages, estimated_processes);
2299
2300 if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) {
2301 ret = TRUE;
2302 } else {
2303 ret = FALSE;
2304 }
2305 } else {
2306 ret = FALSE;
2307 }
2308
2309 proc_list_unlock();
2310
2311 return ret;
2312 }
2313
2314 static boolean_t
2315 memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low)
2316 {
2317 /* Only freeze if we're sufficiently low on memory; this holds off freeze right
2318 after boot, and is generally is a no-op once we've reached steady state. */
2319 if (memorystatus_available_pages > memorystatus_freeze_threshold) {
2320 return FALSE;
2321 }
2322
2323 /* Check minimum suspended process threshold. */
2324 if (!memorystatus_can_freeze_processes()) {
2325 return FALSE;
2326 }
2327
2328 /* Is swap running low? */
2329 if (*memorystatus_freeze_swap_low) {
2330 /* If there's been no movement in free swap pages since we last attempted freeze, return. */
2331 if (default_pager_swap_pages_free() < memorystatus_freeze_pages_min) {
2332 return FALSE;
2333 }
2334
2335 /* Pages have been freed - we can retry. */
2336 *memorystatus_freeze_swap_low = FALSE;
2337 }
2338
2339 /* OK */
2340 return TRUE;
2341 }
2342
2343 static void
2344 memorystatus_freeze_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval)
2345 {
2346 if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) {
2347 if (!interval->max_pageouts) {
2348 interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * FREEZE_DAILY_PAGEOUTS_MAX) / (24 * 60)));
2349 } else {
2350 printf("memorystatus_freeze_update_throttle_interval: %d minute throttle timeout, resetting\n", interval->mins);
2351 }
2352 interval->ts.tv_sec = interval->mins * 60;
2353 interval->ts.tv_nsec = 0;
2354 ADD_MACH_TIMESPEC(&interval->ts, ts);
2355 /* Since we update the throttle stats pre-freeze, adjust for overshoot here */
2356 if (interval->pageouts > interval->max_pageouts) {
2357 interval->pageouts -= interval->max_pageouts;
2358 } else {
2359 interval->pageouts = 0;
2360 }
2361 interval->throttle = FALSE;
2362 } else if (!interval->throttle && interval->pageouts >= interval->max_pageouts) {
2363 printf("memorystatus_freeze_update_throttle_interval: %d minute pageout limit exceeded; enabling throttle\n", interval->mins);
2364 interval->throttle = TRUE;
2365 }
2366
2367 MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
2368 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60,
2369 interval->throttle ? "on" : "off");
2370 }
2371
2372 static boolean_t
2373 memorystatus_freeze_update_throttle(void)
2374 {
2375 clock_sec_t sec;
2376 clock_nsec_t nsec;
2377 mach_timespec_t ts;
2378 uint32_t i;
2379 boolean_t throttled = FALSE;
2380
2381 #if DEVELOPMENT || DEBUG
2382 if (!memorystatus_freeze_throttle_enabled)
2383 return FALSE;
2384 #endif
2385
2386 clock_get_system_nanotime(&sec, &nsec);
2387 ts.tv_sec = sec;
2388 ts.tv_nsec = nsec;
2389
2390 /* Check freeze pageouts over multiple intervals and throttle if we've exceeded our budget.
2391 *
2392 * This ensures that periods of inactivity can't be used as 'credit' towards freeze if the device has
2393 * remained dormant for a long period. We do, however, allow increased thresholds for shorter intervals in
2394 * order to allow for bursts of activity.
2395 */
2396 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
2397 memorystatus_freeze_update_throttle_interval(&ts, &throttle_intervals[i]);
2398 if (throttle_intervals[i].throttle == TRUE)
2399 throttled = TRUE;
2400 }
2401
2402 return throttled;
2403 }
2404
2405 static void
2406 memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused)
2407 {
2408 static boolean_t memorystatus_freeze_swap_low = FALSE;
2409
2410 if (memorystatus_freeze_enabled) {
2411 if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) {
2412 /* Only freeze if we've not exceeded our pageout budgets */
2413 if (!memorystatus_freeze_update_throttle()) {
2414 memorystatus_freeze_top_process(&memorystatus_freeze_swap_low);
2415 } else {
2416 printf("memorystatus_freeze_thread: in throttle, ignoring freeze\n");
2417 memorystatus_freeze_throttle_count++; /* Throttled, update stats */
2418 }
2419 }
2420 }
2421
2422 assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT);
2423 thread_block((thread_continue_t) memorystatus_freeze_thread);
2424 }
2425
2426 #endif /* CONFIG_FREEZE */
2427
2428 #if CONFIG_JETSAM && VM_PRESSURE_EVENTS
2429
2430 boolean_t
2431 memorystatus_warn_process(pid_t pid) {
2432 return (vm_dispatch_pressure_note_to_pid(pid, FALSE) == 0);
2433 }
2434
2435 static inline boolean_t
2436 memorystatus_update_pressure_locked(boolean_t *pressured) {
2437 vm_pressure_level_t old_level, new_level;
2438
2439 old_level = memorystatus_vm_pressure_level;
2440
2441 if (memorystatus_available_pages > memorystatus_available_pages_pressure) {
2442 /* Too many free pages */
2443 new_level = kVMPressureNormal;
2444 }
2445 #if CONFIG_FREEZE
2446 else if (memorystatus_frozen_count > 0) {
2447 /* Frozen processes exist */
2448 new_level = kVMPressureNormal;
2449 }
2450 #endif
2451 else if (memorystatus_suspended_count > MEMORYSTATUS_SUSPENDED_THRESHOLD) {
2452 /* Too many supended processes */
2453 new_level = kVMPressureNormal;
2454 }
2455 else if (memorystatus_suspended_count > 0) {
2456 /* Some suspended processes - warn */
2457 new_level = kVMPressureWarning;
2458 }
2459 else {
2460 /* Otherwise, pressure level is urgent */
2461 new_level = kVMPressureUrgent;
2462 }
2463
2464 *pressured = (new_level != kVMPressureNormal);
2465
2466 /* Did the pressure level change? */
2467 if (old_level != new_level) {
2468 MEMORYSTATUS_DEBUG(1, "memorystatus_update_pressure_locked(): memory pressure changed %d -> %d; memorystatus_available_pages: %d\n ",
2469 old_level, new_level, memorystatus_available_pages);
2470 memorystatus_vm_pressure_level = new_level;
2471 return TRUE;
2472 }
2473
2474 return FALSE;
2475 }
2476
2477 kern_return_t
2478 memorystatus_update_vm_pressure(boolean_t target_foreground) {
2479 boolean_t pressure_changed, pressured;
2480 boolean_t warn = FALSE;
2481
2482 /*
2483 * Centralised pressure handling routine. Called from:
2484 * - The main jetsam thread. In this case, we update the pressure level and dispatch warnings to the foreground
2485 * process *only*, each time the available page % drops.
2486 * - The pageout scan path. In this scenario, every other registered process is targeted in footprint order.
2487 *
2488 * This scheme guarantees delivery to the foreground app, while providing for warnings to the remaining processes
2489 * driven by the pageout scan.
2490 */
2491
2492 MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): foreground %d; available %d, critical %d, pressure %d\n",
2493 target_foreground, memorystatus_available_pages, memorystatus_available_pages_critical, memorystatus_available_pages_pressure);
2494
2495 proc_list_lock();
2496
2497 pressure_changed = memorystatus_update_pressure_locked(&pressured);
2498
2499 if (pressured) {
2500 if (target_foreground) {
2501 if (memorystatus_available_pages != memorystatus_last_foreground_pressure_pages) {
2502 if (memorystatus_available_pages < memorystatus_last_foreground_pressure_pages) {
2503 warn = TRUE;
2504 }
2505 memorystatus_last_foreground_pressure_pages = memorystatus_available_pages;
2506 }
2507 } else {
2508 warn = TRUE;
2509 }
2510 } else if (pressure_changed) {
2511 memorystatus_last_foreground_pressure_pages = (unsigned int)-1;
2512 }
2513
2514 proc_list_unlock();
2515
2516 /* Target foreground processes if specified */
2517 if (warn) {
2518 if (target_foreground) {
2519 MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): invoking vm_find_pressure_foreground_candidates()\n");
2520 vm_find_pressure_foreground_candidates();
2521 } else {
2522 MEMORYSTATUS_DEBUG(1, "memorystatus_update_vm_pressure(): invoking vm_find_pressure_candidate()\n");
2523 /* Defer to VM code. This can race with the foreground priority, but
2524 * it's preferable to holding onto locks for an extended period. */
2525 vm_find_pressure_candidate();
2526 }
2527 }
2528
2529 /* Dispatch the global kevent to privileged listeners */
2530 if (pressure_changed) {
2531 memorystatus_issue_pressure_kevent(pressured);
2532 }
2533
2534 return KERN_SUCCESS;
2535 }
2536
2537 int
2538 memorystatus_send_pressure_note(pid_t pid) {
2539 MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid);
2540 return memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid));
2541 }
2542
2543 boolean_t
2544 memorystatus_bg_pressure_eligible(proc_t p) {
2545 boolean_t eligible = FALSE;
2546
2547 proc_list_lock();
2548
2549 MEMORYSTATUS_DEBUG(1, "memorystatus_bg_pressure_eligible: pid %d, state 0x%x\n", p->p_pid, p->p_memstat_state);
2550
2551 /* Foreground processes have already been dealt with at this point, so just test for eligibility */
2552 if (!(p->p_memstat_state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN))) {
2553 eligible = TRUE;
2554 }
2555
2556 proc_list_unlock();
2557
2558 return eligible;
2559 }
2560
2561 boolean_t
2562 memorystatus_is_foreground_locked(proc_t p) {
2563 return ((p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND) ||
2564 (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND_SUPPORT));
2565 }
2566
2567 #else /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */
2568
2569 /*
2570 * Trigger levels to test the mechanism.
2571 * Can be used via a sysctl.
2572 */
2573 #define TEST_LOW_MEMORY_TRIGGER_ONE 1
2574 #define TEST_LOW_MEMORY_TRIGGER_ALL 2
2575 #define TEST_PURGEABLE_TRIGGER_ONE 3
2576 #define TEST_PURGEABLE_TRIGGER_ALL 4
2577 #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE 5
2578 #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL 6
2579
2580 boolean_t memorystatus_manual_testing_on = FALSE;
2581 vm_pressure_level_t memorystatus_manual_testing_level = kVMPressureNormal;
2582
2583 extern struct knote *
2584 vm_pressure_select_optimal_candidate_to_notify(struct klist *, int);
2585
2586 extern
2587 kern_return_t vm_pressure_notification_without_levels(void);
2588
2589 extern void vm_pressure_klist_lock(void);
2590 extern void vm_pressure_klist_unlock(void);
2591
2592 extern void vm_reset_active_list(void);
2593
2594 extern void delay(int);
2595
2596 #define INTER_NOTIFICATION_DELAY (250000) /* .25 second */
2597
2598 void memorystatus_on_pageout_scan_end(void) {
2599 /* No-op */
2600 }
2601
2602 /*
2603 * kn_max - knote
2604 *
2605 * knote_pressure_level - to check if the knote is registered for this notification level.
2606 *
2607 * task - task whose bits we'll be modifying
2608 *
2609 * pressure_level_to_clear - if the task has been notified of this past level, clear that notification bit so that if/when we revert to that level, the task will be notified again.
2610 *
2611 * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately.
2612 *
2613 */
2614 boolean_t
2615 is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t);
2616
2617 boolean_t
2618 is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pressure_level, task_t task, vm_pressure_level_t pressure_level_to_clear, vm_pressure_level_t pressure_level_to_set)
2619 {
2620 if (kn_max->kn_sfflags & knote_pressure_level) {
2621
2622 if (task_has_been_notified(task, pressure_level_to_clear) == TRUE) {
2623
2624 task_clear_has_been_notified(task, pressure_level_to_clear);
2625 }
2626
2627 task_mark_has_been_notified(task, pressure_level_to_set);
2628 return TRUE;
2629 }
2630
2631 return FALSE;
2632 }
2633
2634 extern kern_return_t vm_pressure_notify_dispatch_vm_clients(void);
2635
2636 kern_return_t
2637 memorystatus_update_vm_pressure(boolean_t target_best_process)
2638 {
2639 struct knote *kn_max = NULL;
2640 pid_t target_pid = -1;
2641 struct klist dispatch_klist = { NULL };
2642 proc_t target_proc = PROC_NULL;
2643 static vm_pressure_level_t level_snapshot = kVMPressureNormal;
2644 struct task *task = NULL;
2645 boolean_t found_candidate = FALSE;
2646
2647 while (1) {
2648
2649 /*
2650 * There is a race window here. But it's not clear
2651 * how much we benefit from having extra synchronization.
2652 */
2653 level_snapshot = memorystatus_vm_pressure_level;
2654
2655 memorystatus_klist_lock();
2656 kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot);
2657
2658 if (kn_max == NULL) {
2659 memorystatus_klist_unlock();
2660
2661 /*
2662 * No more level-based clients to notify.
2663 * Try the non-level based notification clients.
2664 *
2665 * However, these non-level clients don't understand
2666 * the "return-to-normal" notification.
2667 *
2668 * So don't consider them for those notifications. Just
2669 * return instead.
2670 *
2671 */
2672
2673 if (level_snapshot != kVMPressureNormal) {
2674 goto try_dispatch_vm_clients;
2675 } else {
2676 return KERN_FAILURE;
2677 }
2678 }
2679
2680 target_proc = kn_max->kn_kq->kq_p;
2681
2682 proc_list_lock();
2683 if (target_proc != proc_ref_locked(target_proc)) {
2684 target_proc = PROC_NULL;
2685 proc_list_unlock();
2686 memorystatus_klist_unlock();
2687 continue;
2688 }
2689 proc_list_unlock();
2690 memorystatus_klist_unlock();
2691
2692 target_pid = target_proc->p_pid;
2693
2694 task = (struct task *)(target_proc->task);
2695
2696 if (level_snapshot != kVMPressureNormal) {
2697
2698 if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) {
2699
2700 if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_WARN, task, kVMPressureCritical, kVMPressureWarning) == TRUE) {
2701 found_candidate = TRUE;
2702 }
2703 } else {
2704 if (level_snapshot == kVMPressureCritical) {
2705
2706 if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_CRITICAL, task, kVMPressureWarning, kVMPressureCritical) == TRUE) {
2707 found_candidate = TRUE;
2708 }
2709 }
2710 }
2711 } else {
2712 if (kn_max->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
2713
2714 task_clear_has_been_notified(task, kVMPressureWarning);
2715 task_clear_has_been_notified(task, kVMPressureCritical);
2716
2717 found_candidate = TRUE;
2718 }
2719 }
2720
2721 if (found_candidate == FALSE) {
2722 continue;
2723 }
2724
2725 memorystatus_klist_lock();
2726 KNOTE_DETACH(&memorystatus_klist, kn_max);
2727 KNOTE_ATTACH(&dispatch_klist, kn_max);
2728 memorystatus_klist_unlock();
2729
2730 KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure);
2731
2732 memorystatus_klist_lock();
2733 KNOTE_DETACH(&dispatch_klist, kn_max);
2734 KNOTE_ATTACH(&memorystatus_klist, kn_max);
2735 memorystatus_klist_unlock();
2736
2737 microuptime(&target_proc->vm_pressure_last_notify_tstamp);
2738 proc_rele(target_proc);
2739
2740 if (target_best_process == TRUE) {
2741 break;
2742 }
2743
2744 try_dispatch_vm_clients:
2745 if (level_snapshot != kVMPressureNormal) {
2746 /*
2747 * Wake up idle-exit thread.
2748 * Targets one process per invocation.
2749 *
2750 * TODO: memorystatus_idle_exit_from_VM should return FALSE once it's
2751 * done with all idle-exitable processes. Currently, we will exit this
2752 * loop when we are done with notification clients (level and non-level based)
2753 * but we may still have some idle-exitable processes around.
2754 *
2755 */
2756 memorystatus_idle_exit_from_VM();
2757
2758 if ((vm_pressure_notify_dispatch_vm_clients() == KERN_FAILURE) && (kn_max == NULL)) {
2759 /*
2760 * kn_max == NULL i.e. we didn't find any eligible clients for the level-based notifications
2761 * AND
2762 * we have failed to find any eligible clients for the non-level based notifications too.
2763 * So, we are done.
2764 */
2765
2766 return KERN_FAILURE;
2767 }
2768 }
2769
2770 if (memorystatus_manual_testing_on == FALSE) {
2771 delay(INTER_NOTIFICATION_DELAY);
2772 }
2773 }
2774
2775 return KERN_SUCCESS;
2776 }
2777
2778 vm_pressure_level_t
2779 convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t);
2780
2781 vm_pressure_level_t
2782 convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t internal_pressure_level)
2783 {
2784 vm_pressure_level_t dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
2785
2786 switch (internal_pressure_level) {
2787
2788 case kVMPressureNormal:
2789 {
2790 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
2791 break;
2792 }
2793
2794 case kVMPressureWarning:
2795 case kVMPressureUrgent:
2796 {
2797 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_WARN;
2798 break;
2799 }
2800
2801 case kVMPressureCritical:
2802 {
2803 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
2804 break;
2805 }
2806
2807 default:
2808 break;
2809 }
2810
2811 return dispatch_level;
2812 }
2813
2814 static int
2815 sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS
2816 {
2817 #pragma unused(arg1, arg2, oidp)
2818
2819 vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level);
2820
2821 return SYSCTL_OUT(req, &dispatch_level, sizeof(dispatch_level));
2822 }
2823
2824 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED,
2825 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", "");
2826
2827
2828 extern int memorystatus_purge_on_warning;
2829 extern int memorystatus_purge_on_critical;
2830
2831 static int
2832 sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS
2833 {
2834 #pragma unused(arg1, arg2)
2835
2836 int level = 0;
2837 int error = 0;
2838 int pressure_level = 0;
2839 int trigger_request = 0;
2840 int force_purge;
2841
2842 error = sysctl_handle_int(oidp, &level, 0, req);
2843 if (error || !req->newptr) {
2844 return (error);
2845 }
2846
2847 memorystatus_manual_testing_on = TRUE;
2848
2849 trigger_request = (level >> 16) & 0xFFFF;
2850 pressure_level = (level & 0xFFFF);
2851
2852 if (trigger_request < TEST_LOW_MEMORY_TRIGGER_ONE ||
2853 trigger_request > TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL) {
2854 return EINVAL;
2855 }
2856 switch (pressure_level) {
2857 case NOTE_MEMORYSTATUS_PRESSURE_NORMAL:
2858 case NOTE_MEMORYSTATUS_PRESSURE_WARN:
2859 case NOTE_MEMORYSTATUS_PRESSURE_CRITICAL:
2860 break;
2861 default:
2862 return EINVAL;
2863 }
2864
2865 /*
2866 * The pressure level is being set from user-space.
2867 * And user-space uses the constants in sys/event.h
2868 * So we translate those events to our internal levels here.
2869 */
2870 if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
2871
2872 memorystatus_manual_testing_level = kVMPressureNormal;
2873 force_purge = 0;
2874
2875 } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_WARN) {
2876
2877 memorystatus_manual_testing_level = kVMPressureWarning;
2878 force_purge = memorystatus_purge_on_warning;
2879
2880 } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) {
2881
2882 memorystatus_manual_testing_level = kVMPressureCritical;
2883 force_purge = memorystatus_purge_on_critical;
2884 }
2885
2886 memorystatus_vm_pressure_level = memorystatus_manual_testing_level;
2887
2888 /* purge according to the new pressure level */
2889 switch (trigger_request) {
2890 case TEST_PURGEABLE_TRIGGER_ONE:
2891 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE:
2892 if (force_purge == 0) {
2893 /* no purging requested */
2894 break;
2895 }
2896 vm_purgeable_object_purge_one_unlocked(force_purge);
2897 break;
2898 case TEST_PURGEABLE_TRIGGER_ALL:
2899 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL:
2900 if (force_purge == 0) {
2901 /* no purging requested */
2902 break;
2903 }
2904 while (vm_purgeable_object_purge_one_unlocked(force_purge));
2905 break;
2906 }
2907
2908 if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ONE) ||
2909 (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE)) {
2910
2911 memorystatus_update_vm_pressure(TRUE);
2912 }
2913
2914 if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ALL) ||
2915 (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL)) {
2916
2917 while (memorystatus_update_vm_pressure(FALSE) == KERN_SUCCESS) {
2918 continue;
2919 }
2920 }
2921
2922 if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
2923 memorystatus_manual_testing_on = FALSE;
2924
2925 vm_pressure_klist_lock();
2926 vm_reset_active_list();
2927 vm_pressure_klist_unlock();
2928 } else {
2929
2930 vm_pressure_klist_lock();
2931 vm_pressure_notification_without_levels();
2932 vm_pressure_klist_unlock();
2933 }
2934
2935 return 0;
2936 }
2937
2938 SYSCTL_PROC(_kern, OID_AUTO, memorypressure_manual_trigger, CTLTYPE_INT|CTLFLAG_WR|CTLFLAG_LOCKED|CTLFLAG_MASKED,
2939 0, 0, &sysctl_memorypressure_manual_trigger, "I", "");
2940
2941
2942 extern int memorystatus_purge_on_warning;
2943 extern int memorystatus_purge_on_urgent;
2944 extern int memorystatus_purge_on_critical;
2945
2946 SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_warning, CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_warning, 0, "");
2947 SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_urgent, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_urgent, 0, "");
2948 SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_critical, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_LOCKED, &memorystatus_purge_on_critical, 0, "");
2949
2950
2951 #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */
2952
2953 /* Return both allocated and actual size, since there's a race between allocation and list compilation */
2954 static int
2955 memorystatus_get_priority_list(memorystatus_priority_entry_t **list_ptr, size_t *buffer_size, size_t *list_size, boolean_t size_only)
2956 {
2957 uint32_t list_count, i = 0;
2958 memorystatus_priority_entry_t *list_entry;
2959 proc_t p;
2960
2961 list_count = memorystatus_list_count;
2962 *list_size = sizeof(memorystatus_priority_entry_t) * list_count;
2963
2964 /* Just a size check? */
2965 if (size_only) {
2966 return 0;
2967 }
2968
2969 /* Otherwise, validate the size of the buffer */
2970 if (*buffer_size < *list_size) {
2971 return EINVAL;
2972 }
2973
2974 *list_ptr = (memorystatus_priority_entry_t*)kalloc(*list_size);
2975 if (!list_ptr) {
2976 return ENOMEM;
2977 }
2978
2979 memset(*list_ptr, 0, *list_size);
2980
2981 *buffer_size = *list_size;
2982 *list_size = 0;
2983
2984 list_entry = *list_ptr;
2985
2986 proc_list_lock();
2987
2988 p = memorystatus_get_first_proc_locked(&i, TRUE);
2989 while (p && (*list_size < *buffer_size)) {
2990 list_entry->pid = p->p_pid;
2991 list_entry->priority = p->p_memstat_effectivepriority;
2992 list_entry->user_data = p->p_memstat_userdata;
2993 #if LEGACY_HIWATER
2994 if (((p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) && (p->p_memstat_effectivepriority >= JETSAM_PRIORITY_FOREGROUND)) ||
2995 (p->p_memstat_memlimit <= 0)) {
2996 task_get_phys_footprint_limit(p->task, &list_entry->limit);
2997 } else {
2998 list_entry->limit = p->p_memstat_memlimit;
2999 }
3000 #else
3001 task_get_phys_footprint_limit(p->task, &list_entry->limit);
3002 #endif
3003 list_entry->state = memorystatus_build_state(p);
3004 list_entry++;
3005
3006 *list_size += sizeof(memorystatus_priority_entry_t);
3007
3008 p = memorystatus_get_next_proc_locked(&i, p, TRUE);
3009 }
3010
3011 proc_list_unlock();
3012
3013 MEMORYSTATUS_DEBUG(1, "memorystatus_get_priority_list: returning %lu for size\n", (unsigned long)*list_size);
3014
3015 return 0;
3016 }
3017
3018 static int
3019 memorystatus_cmd_get_priority_list(user_addr_t buffer, size_t buffer_size, int32_t *retval) {
3020 int error = EINVAL;
3021 boolean_t size_only;
3022 memorystatus_priority_entry_t *list = NULL;
3023 size_t list_size;
3024
3025 size_only = ((buffer == USER_ADDR_NULL) ? TRUE: FALSE);
3026
3027 error = memorystatus_get_priority_list(&list, &buffer_size, &list_size, size_only);
3028 if (error) {
3029 goto out;
3030 }
3031
3032 if (!size_only) {
3033 error = copyout(list, buffer, list_size);
3034 }
3035
3036 if (error == 0) {
3037 *retval = list_size;
3038 }
3039 out:
3040
3041 if (list) {
3042 kfree(list, buffer_size);
3043 }
3044
3045 return error;
3046 }
3047
3048 #if CONFIG_JETSAM
3049
3050 static void
3051 memorystatus_clear_errors(void)
3052 {
3053 proc_t p;
3054 unsigned int i = 0;
3055
3056 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_START, 0, 0, 0, 0, 0);
3057
3058 proc_list_lock();
3059
3060 p = memorystatus_get_first_proc_locked(&i, TRUE);
3061 while (p) {
3062 if (p->p_memstat_state & P_MEMSTAT_ERROR) {
3063 p->p_memstat_state &= ~P_MEMSTAT_ERROR;
3064 }
3065 p = memorystatus_get_next_proc_locked(&i, p, TRUE);
3066 }
3067
3068 proc_list_unlock();
3069
3070 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_CLEAR_ERRORS) | DBG_FUNC_END, 0, 0, 0, 0, 0);
3071 }
3072
3073 static void
3074 memorystatus_update_levels_locked(boolean_t critical_only) {
3075 memorystatus_available_pages_critical = memorystatus_available_pages_critical_base;
3076 #if !LATENCY_JETSAM
3077 {
3078 // If there's an entry in the first bucket, we have idle processes
3079 memstat_bucket_t *first_bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE];
3080 if (first_bucket->count) {
3081 memorystatus_available_pages_critical += memorystatus_available_pages_critical_idle_offset;
3082 }
3083 }
3084 #endif
3085 #if DEBUG || DEVELOPMENT
3086 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) {
3087 memorystatus_available_pages_critical += memorystatus_jetsam_policy_offset_pages_diagnostic;
3088 }
3089 #endif
3090
3091 if (critical_only) {
3092 return;
3093 }
3094
3095 #if VM_PRESSURE_EVENTS
3096 memorystatus_available_pages_pressure = (pressure_threshold_percentage / delta_percentage) * memorystatus_delta;
3097 #if DEBUG || DEVELOPMENT
3098 if (memorystatus_jetsam_policy & kPolicyDiagnoseActive) {
3099 memorystatus_available_pages_pressure += memorystatus_jetsam_policy_offset_pages_diagnostic;
3100 }
3101 #endif
3102 #endif
3103 }
3104
3105 static int
3106 memorystatus_get_snapshot(memorystatus_jetsam_snapshot_t **snapshot, size_t *snapshot_size, boolean_t size_only) {
3107 size_t input_size = *snapshot_size;
3108
3109 if (memorystatus_jetsam_snapshot_count > 0) {
3110 *snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + (sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count));
3111 } else {
3112 *snapshot_size = 0;
3113 }
3114
3115 if (size_only) {
3116 return 0;
3117 }
3118
3119 if (input_size < *snapshot_size) {
3120 return EINVAL;
3121 }
3122
3123 *snapshot = memorystatus_jetsam_snapshot;
3124
3125 MEMORYSTATUS_DEBUG(1, "memorystatus_snapshot: returning %ld for size\n", (long)*snapshot_size);
3126
3127 return 0;
3128 }
3129
3130 static int
3131 memorystatus_cmd_get_jetsam_snapshot(user_addr_t buffer, size_t buffer_size, int32_t *retval) {
3132 int error = EINVAL;
3133 boolean_t size_only;
3134 memorystatus_jetsam_snapshot_t *snapshot;
3135
3136 size_only = ((buffer == USER_ADDR_NULL) ? TRUE : FALSE);
3137
3138 error = memorystatus_get_snapshot(&snapshot, &buffer_size, size_only);
3139 if (error) {
3140 goto out;
3141 }
3142
3143 /* Copy out and reset */
3144 if (!size_only) {
3145 if ((error = copyout(snapshot, buffer, buffer_size)) == 0) {
3146 snapshot->entry_count = memorystatus_jetsam_snapshot_count = 0;
3147 }
3148 }
3149
3150 if (error == 0) {
3151 *retval = buffer_size;
3152 }
3153 out:
3154 return error;
3155 }
3156
3157 static int
3158 memorystatus_cmd_set_priority_properties(pid_t pid, user_addr_t buffer, size_t buffer_size, __unused int32_t *retval) {
3159 const uint32_t MAX_ENTRY_COUNT = 2; /* Cap the entry count */
3160
3161 int error;
3162 uint32_t i;
3163 uint32_t entry_count;
3164 memorystatus_priority_properties_t *entries;
3165
3166 /* Validate inputs */
3167 if ((pid == 0) || (buffer == USER_ADDR_NULL) || (buffer_size == 0)) {
3168 return EINVAL;
3169 }
3170
3171 /* Make sure the buffer is a multiple of the entry size, and that an excessive size isn't specified */
3172 entry_count = (buffer_size / sizeof(memorystatus_priority_properties_t));
3173 if (((buffer_size % sizeof(memorystatus_priority_properties_t)) != 0) || (entry_count > MAX_ENTRY_COUNT)) {
3174 return EINVAL;
3175 }
3176
3177 entries = (memorystatus_priority_properties_t *)kalloc(buffer_size);
3178
3179 error = copyin(buffer, entries, buffer_size);
3180
3181 for (i = 0; i < entry_count; i++) {
3182 proc_t p;
3183
3184 if (error) {
3185 break;
3186 }
3187
3188 p = proc_find(pid);
3189 if (!p) {
3190 error = ESRCH;
3191 break;
3192 }
3193
3194 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
3195 error = EPERM;
3196 proc_rele(p);
3197 break;
3198 }
3199
3200 error = memorystatus_update(p, entries[i].priority, entries[i].user_data, FALSE, FALSE, 0, 0);
3201 proc_rele(p);
3202 }
3203
3204 kfree(entries, buffer_size);
3205
3206 return error;
3207 }
3208
3209 static int
3210 memorystatus_cmd_get_pressure_status(int32_t *retval) {
3211 int error;
3212
3213 /* Need privilege for check */
3214 error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0);
3215 if (error) {
3216 return (error);
3217 }
3218
3219 /* Inherently racy, so it's not worth taking a lock here */
3220 *retval = (kVMPressureNormal != memorystatus_vm_pressure_level) ? 1 : 0;
3221
3222 return error;
3223 }
3224
3225 static int
3226 memorystatus_cmd_set_jetsam_high_water_mark(pid_t pid, int32_t high_water_mark, __unused int32_t *retval) {
3227 int error = 0;
3228
3229 proc_t p = proc_find(pid);
3230 if (!p) {
3231 return ESRCH;
3232 }
3233
3234 if (high_water_mark <= 0) {
3235 high_water_mark = -1; /* Disable */
3236 }
3237
3238 proc_list_lock();
3239
3240 if (p->p_memstat_state & P_MEMSTAT_INTERNAL) {
3241 error = EPERM;
3242 goto exit;
3243 }
3244
3245 p->p_memstat_memlimit = high_water_mark;
3246 if (memorystatus_highwater_enabled) {
3247 if (p->p_memstat_state & P_MEMSTAT_MEMLIMIT_BACKGROUND) {
3248 memorystatus_update_priority_locked(p, p->p_memstat_effectivepriority);
3249 } else {
3250 error = (task_set_phys_footprint_limit_internal(p->task, high_water_mark, NULL, TRUE) == 0) ? 0 : EINVAL;
3251 }
3252 }
3253
3254 exit:
3255 proc_list_unlock();
3256 proc_rele(p);
3257
3258 return error;
3259 }
3260
3261 #endif /* CONFIG_JETSAM */
3262
3263 int
3264 memorystatus_control(struct proc *p __unused, struct memorystatus_control_args *args, int *ret) {
3265 int error = EINVAL;
3266
3267 #if !CONFIG_JETSAM
3268 #pragma unused(ret)
3269 #endif
3270
3271 /* Root only for now */
3272 if (!kauth_cred_issuser(kauth_cred_get())) {
3273 error = EPERM;
3274 goto out;
3275 }
3276
3277 /* Sanity check */
3278 if (args->buffersize > MEMORYSTATUS_BUFFERSIZE_MAX) {
3279 error = EINVAL;
3280 goto out;
3281 }
3282
3283 switch (args->command) {
3284 case MEMORYSTATUS_CMD_GET_PRIORITY_LIST:
3285 error = memorystatus_cmd_get_priority_list(args->buffer, args->buffersize, ret);
3286 break;
3287 #if CONFIG_JETSAM
3288 case MEMORYSTATUS_CMD_SET_PRIORITY_PROPERTIES:
3289 error = memorystatus_cmd_set_priority_properties(args->pid, args->buffer, args->buffersize, ret);
3290 break;
3291 case MEMORYSTATUS_CMD_GET_JETSAM_SNAPSHOT:
3292 error = memorystatus_cmd_get_jetsam_snapshot(args->buffer, args->buffersize, ret);
3293 break;
3294 case MEMORYSTATUS_CMD_GET_PRESSURE_STATUS:
3295 error = memorystatus_cmd_get_pressure_status(ret);
3296 break;
3297 case MEMORYSTATUS_CMD_SET_JETSAM_HIGH_WATER_MARK:
3298 /* TODO: deprecate. Keeping it in as there's no pid based way to set the ledger limit right now. */
3299 error = memorystatus_cmd_set_jetsam_high_water_mark(args->pid, (int32_t)args->flags, ret);
3300 break;
3301 /* Test commands */
3302 #if DEVELOPMENT || DEBUG
3303 case MEMORYSTATUS_CMD_TEST_JETSAM:
3304 error = memorystatus_kill_process_sync(args->pid, kMemorystatusKilled) ? 0 : EINVAL;
3305 break;
3306 case MEMORYSTATUS_CMD_SET_JETSAM_PANIC_BITS:
3307 error = memorystatus_cmd_set_panic_bits(args->buffer, args->buffersize);
3308 break;
3309 #endif /* DEVELOPMENT || DEBUG */
3310 #endif /* CONFIG_JETSAM */
3311 default:
3312 break;
3313 }
3314
3315 out:
3316 return error;
3317 }
3318
3319
3320 static int
3321 filt_memorystatusattach(struct knote *kn)
3322 {
3323 kn->kn_flags |= EV_CLEAR;
3324 return memorystatus_knote_register(kn);
3325 }
3326
3327 static void
3328 filt_memorystatusdetach(struct knote *kn)
3329 {
3330 memorystatus_knote_unregister(kn);
3331 }
3332
3333 static int
3334 filt_memorystatus(struct knote *kn __unused, long hint)
3335 {
3336 if (hint) {
3337 switch (hint) {
3338 case kMemorystatusNoPressure:
3339 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
3340 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
3341 }
3342 break;
3343 case kMemorystatusPressure:
3344 if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) {
3345 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) {
3346 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_WARN;
3347 }
3348 } else if (memorystatus_vm_pressure_level == kVMPressureCritical) {
3349
3350 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) {
3351 kn->kn_fflags |= NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
3352 }
3353 }
3354 break;
3355 default:
3356 break;
3357 }
3358 }
3359
3360 return (kn->kn_fflags != 0);
3361 }
3362
3363 static void
3364 memorystatus_klist_lock(void) {
3365 lck_mtx_lock(&memorystatus_klist_mutex);
3366 }
3367
3368 static void
3369 memorystatus_klist_unlock(void) {
3370 lck_mtx_unlock(&memorystatus_klist_mutex);
3371 }
3372
3373 void
3374 memorystatus_kevent_init(lck_grp_t *grp, lck_attr_t *attr) {
3375 lck_mtx_init(&memorystatus_klist_mutex, grp, attr);
3376 klist_init(&memorystatus_klist);
3377 }
3378
3379 int
3380 memorystatus_knote_register(struct knote *kn) {
3381 int error = 0;
3382
3383 memorystatus_klist_lock();
3384
3385 if (kn->kn_sfflags & (NOTE_MEMORYSTATUS_PRESSURE_NORMAL | NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL)) {
3386
3387 #if CONFIG_JETSAM && VM_PRESSURE_EVENTS
3388 /* Need a privilege to register */
3389 error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0);
3390 #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */
3391
3392 if (!error) {
3393 KNOTE_ATTACH(&memorystatus_klist, kn);
3394 }
3395 } else {
3396 error = ENOTSUP;
3397 }
3398
3399 memorystatus_klist_unlock();
3400
3401 return error;
3402 }
3403
3404 void
3405 memorystatus_knote_unregister(struct knote *kn __unused) {
3406 memorystatus_klist_lock();
3407 KNOTE_DETACH(&memorystatus_klist, kn);
3408 memorystatus_klist_unlock();
3409 }
3410
3411 #if CONFIG_JETSAM && VM_PRESSURE_EVENTS
3412 static boolean_t
3413 memorystatus_issue_pressure_kevent(boolean_t pressured) {
3414 memorystatus_klist_lock();
3415 KNOTE(&memorystatus_klist, pressured ? kMemorystatusPressure : kMemorystatusNoPressure);
3416 memorystatus_klist_unlock();
3417 return TRUE;
3418 }
3419
3420 #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */