]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_memorystatus_notify.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / bsd / kern / kern_memorystatus_notify.c
1 /*
2 * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30 #include <sys/kern_event.h>
31 #include <kern/sched_prim.h>
32 #include <kern/kalloc.h>
33 #include <kern/assert.h>
34 #include <kern/debug.h>
35 #include <kern/locks.h>
36 #include <kern/task.h>
37 #include <kern/thread.h>
38 #include <kern/host.h>
39 #include <kern/policy_internal.h>
40 #include <kern/thread_group.h>
41
42 #include <IOKit/IOBSD.h>
43
44 #include <libkern/libkern.h>
45 #include <mach/coalition.h>
46 #include <mach/mach_time.h>
47 #include <mach/task.h>
48 #include <mach/host_priv.h>
49 #include <mach/mach_host.h>
50 #include <os/log.h>
51 #include <pexpert/pexpert.h>
52 #include <sys/coalition.h>
53 #include <sys/kern_event.h>
54 #include <sys/proc.h>
55 #include <sys/proc_info.h>
56 #include <sys/reason.h>
57 #include <sys/signal.h>
58 #include <sys/signalvar.h>
59 #include <sys/sysctl.h>
60 #include <sys/sysproto.h>
61 #include <sys/time.h>
62 #include <sys/wait.h>
63 #include <sys/tree.h>
64 #include <sys/priv.h>
65 #include <vm/vm_pageout.h>
66 #include <vm/vm_protos.h>
67 #include <mach/machine/sdt.h>
68 #include <libkern/section_keywords.h>
69 #include <stdatomic.h>
70
71 #if CONFIG_FREEZE
72 #include <vm/vm_map.h>
73 #endif /* CONFIG_FREEZE */
74
75 #include <sys/kern_memorystatus.h>
76 #include <sys/kern_memorystatus_notify.h>
77
78 /*
79 * Memorystatus klist structures
80 */
81 struct klist memorystatus_klist;
82 static lck_mtx_t memorystatus_klist_mutex;
83 static void memorystatus_klist_lock(void);
84 static void memorystatus_klist_unlock(void);
85
86 /*
87 * Memorystatus kevent filter routines
88 */
89 static int filt_memorystatusattach(struct knote *kn, struct kevent_qos_s *kev);
90 static void filt_memorystatusdetach(struct knote *kn);
91 static int filt_memorystatus(struct knote *kn, long hint);
92 static int filt_memorystatustouch(struct knote *kn, struct kevent_qos_s *kev);
93 static int filt_memorystatusprocess(struct knote *kn, struct kevent_qos_s *kev);
94
95 SECURITY_READ_ONLY_EARLY(struct filterops) memorystatus_filtops = {
96 .f_attach = filt_memorystatusattach,
97 .f_detach = filt_memorystatusdetach,
98 .f_event = filt_memorystatus,
99 .f_touch = filt_memorystatustouch,
100 .f_process = filt_memorystatusprocess,
101 };
102
103 /*
104 * Memorystatus notification events
105 */
106 enum {
107 kMemorystatusNoPressure = 0x1,
108 kMemorystatusPressure = 0x2,
109 kMemorystatusLowSwap = 0x4,
110 kMemorystatusProcLimitWarn = 0x8,
111 kMemorystatusProcLimitCritical = 0x10
112 };
113
114 #define INTER_NOTIFICATION_DELAY (250000) /* .25 second */
115 #define VM_PRESSURE_DECREASED_SMOOTHING_PERIOD 5000 /* milliseconds */
116 #define WARNING_NOTIFICATION_RESTING_PERIOD 25 /* seconds */
117 #define CRITICAL_NOTIFICATION_RESTING_PERIOD 25 /* seconds */
118
119 /*
120 * Memorystatus notification helper routines
121 */
122 static vm_pressure_level_t convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t);
123 static boolean_t is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t);
124 static void memorystatus_klist_reset_all_for_level(vm_pressure_level_t pressure_level_to_clear);
125 static struct knote *vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level, boolean_t target_foreground_process);
126 static void vm_dispatch_memory_pressure(void);
127 kern_return_t memorystatus_update_vm_pressure(boolean_t target_foreground_process);
128
129 #if VM_PRESSURE_EVENTS
130
131 /*
132 * This value is the threshold that a process must meet to be considered for scavenging.
133 */
134 #if CONFIG_EMBEDDED
135 #define VM_PRESSURE_MINIMUM_RSIZE 6 /* MB */
136 #else /* CONFIG_EMBEDDED */
137 #define VM_PRESSURE_MINIMUM_RSIZE 10 /* MB */
138 #endif /* CONFIG_EMBEDDED */
139
140 static uint32_t vm_pressure_task_footprint_min = VM_PRESSURE_MINIMUM_RSIZE;
141
142 #if DEVELOPMENT || DEBUG
143 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_vm_pressure_task_footprint_min, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pressure_task_footprint_min, 0, "");
144 #endif /* DEVELOPMENT || DEBUG */
145
146 vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal;
147
148 /*
149 * We use this flag to signal if we have any HWM offenders
150 * on the system. This way we can reduce the number of wakeups
151 * of the memorystatus_thread when the system is between the
152 * "pressure" and "critical" threshold.
153 *
154 * The (re-)setting of this variable is done without any locks
155 * or synchronization simply because it is not possible (currently)
156 * to keep track of HWM offenders that drop down below their memory
157 * limit and/or exit. So, we choose to burn a couple of wasted wakeups
158 * by allowing the unguarded modification of this variable.
159 */
160 boolean_t memorystatus_hwm_candidates = 0;
161
162 #endif /* VM_PRESSURE_EVENTS */
163
164 #if CONFIG_JETSAM
165
166 extern unsigned int memorystatus_available_pages;
167 extern unsigned int memorystatus_available_pages_pressure;
168 extern unsigned int memorystatus_available_pages_critical;
169 extern unsigned int memorystatus_available_pages_critical_base;
170 extern unsigned int memorystatus_available_pages_critical_idle_offset;
171
172 #else /* CONFIG_JETSAM */
173
174 extern uint64_t memorystatus_available_pages;
175 extern uint64_t memorystatus_available_pages_pressure;
176 extern uint64_t memorystatus_available_pages_critical;
177
178 #endif /* CONFIG_JETSAM */
179
180 extern lck_mtx_t memorystatus_jetsam_fg_band_lock;
181 uint32_t memorystatus_jetsam_fg_band_waiters = 0;
182 static uint64_t memorystatus_jetsam_fg_band_timestamp_ns = 0; /* nanosec */
183 static uint64_t memorystatus_jetsam_fg_band_delay_ns = 5ull * 1000 * 1000 * 1000; /* nanosec */
184
185 extern boolean_t(*volatile consider_buffer_cache_collect)(int);
186
187 #if DEVELOPMENT || DEBUG
188 SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_jetsam_fg_band_delay_ns, CTLFLAG_RW | CTLFLAG_LOCKED,
189 &memorystatus_jetsam_fg_band_delay_ns, "");
190 #endif
191
192 static int
193 filt_memorystatusattach(struct knote *kn, __unused struct kevent_qos_s *kev)
194 {
195 int error;
196
197 kn->kn_flags |= EV_CLEAR; /* automatically set */
198 kn->kn_sdata = 0; /* incoming data is ignored */
199
200 error = memorystatus_knote_register(kn);
201 if (error) {
202 knote_set_error(kn, error);
203 }
204 return 0;
205 }
206
207 static void
208 filt_memorystatusdetach(struct knote *kn)
209 {
210 memorystatus_knote_unregister(kn);
211 }
212
213 static int
214 filt_memorystatus(struct knote *kn __unused, long hint)
215 {
216 if (hint) {
217 switch (hint) {
218 case kMemorystatusNoPressure:
219 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
220 kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
221 }
222 break;
223 case kMemorystatusPressure:
224 if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) {
225 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) {
226 kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN;
227 }
228 } else if (memorystatus_vm_pressure_level == kVMPressureCritical) {
229 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) {
230 kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
231 }
232 }
233 break;
234 case kMemorystatusLowSwap:
235 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) {
236 kn->kn_fflags = NOTE_MEMORYSTATUS_LOW_SWAP;
237 }
238 break;
239
240 case kMemorystatusProcLimitWarn:
241 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) {
242 kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_WARN;
243 }
244 break;
245
246 case kMemorystatusProcLimitCritical:
247 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) {
248 kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL;
249 }
250 break;
251
252 default:
253 break;
254 }
255 }
256
257 #if 0
258 if (kn->kn_fflags != 0) {
259 proc_t knote_proc = knote_get_kq(kn)->kq_p;
260 pid_t knote_pid = knote_proc->p_pid;
261
262 printf("filt_memorystatus: sending kn 0x%lx (event 0x%x) for pid (%d)\n",
263 (unsigned long)kn, kn->kn_fflags, knote_pid);
264 }
265 #endif
266
267 return kn->kn_fflags != 0;
268 }
269
270 static int
271 filt_memorystatustouch(struct knote *kn, struct kevent_qos_s *kev)
272 {
273 int res;
274 int prev_kn_sfflags = 0;
275
276 memorystatus_klist_lock();
277
278 /*
279 * copy in new kevent settings
280 * (saving the "desired" data and fflags).
281 */
282
283 prev_kn_sfflags = kn->kn_sfflags;
284 kn->kn_sfflags = (kev->fflags & EVFILT_MEMORYSTATUS_ALL_MASK);
285
286 #if !CONFIG_EMBEDDED
287 /*
288 * Only on desktop do we restrict notifications to
289 * one per active/inactive state (soft limits only).
290 */
291 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) {
292 /*
293 * Is there previous state to preserve?
294 */
295 if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) {
296 /*
297 * This knote was previously interested in proc_limit_warn,
298 * so yes, preserve previous state.
299 */
300 if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE) {
301 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE;
302 }
303 if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE) {
304 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE;
305 }
306 } else {
307 /*
308 * This knote was not previously interested in proc_limit_warn,
309 * but it is now. Set both states.
310 */
311 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE;
312 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE;
313 }
314 }
315
316 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) {
317 /*
318 * Is there previous state to preserve?
319 */
320 if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) {
321 /*
322 * This knote was previously interested in proc_limit_critical,
323 * so yes, preserve previous state.
324 */
325 if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE) {
326 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE;
327 }
328 if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE) {
329 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE;
330 }
331 } else {
332 /*
333 * This knote was not previously interested in proc_limit_critical,
334 * but it is now. Set both states.
335 */
336 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE;
337 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE;
338 }
339 }
340 #endif /* !CONFIG_EMBEDDED */
341
342 /*
343 * reset the output flags based on a
344 * combination of the old events and
345 * the new desired event list.
346 */
347 //kn->kn_fflags &= kn->kn_sfflags;
348
349 res = (kn->kn_fflags != 0);
350
351 memorystatus_klist_unlock();
352
353 return res;
354 }
355
356 static int
357 filt_memorystatusprocess(struct knote *kn, struct kevent_qos_s *kev)
358 {
359 int res = 0;
360
361 memorystatus_klist_lock();
362 if (kn->kn_fflags) {
363 knote_fill_kevent(kn, kev, 0);
364 res = 1;
365 }
366 memorystatus_klist_unlock();
367
368 return res;
369 }
370
371 static void
372 memorystatus_klist_lock(void)
373 {
374 lck_mtx_lock(&memorystatus_klist_mutex);
375 }
376
377 static void
378 memorystatus_klist_unlock(void)
379 {
380 lck_mtx_unlock(&memorystatus_klist_mutex);
381 }
382
383 void
384 memorystatus_kevent_init(lck_grp_t *grp, lck_attr_t *attr)
385 {
386 lck_mtx_init(&memorystatus_klist_mutex, grp, attr);
387 klist_init(&memorystatus_klist);
388 }
389
390 int
391 memorystatus_knote_register(struct knote *kn)
392 {
393 int error = 0;
394
395 memorystatus_klist_lock();
396
397 /*
398 * Support only userspace visible flags.
399 */
400 if ((kn->kn_sfflags & EVFILT_MEMORYSTATUS_ALL_MASK) == (unsigned int) kn->kn_sfflags) {
401 #if !CONFIG_EMBEDDED
402 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) {
403 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE;
404 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE;
405 }
406
407 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) {
408 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE;
409 kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE;
410 }
411 #endif /* !CONFIG_EMBEDDED */
412
413 KNOTE_ATTACH(&memorystatus_klist, kn);
414 } else {
415 error = ENOTSUP;
416 }
417
418 memorystatus_klist_unlock();
419
420 return error;
421 }
422
423 void
424 memorystatus_knote_unregister(struct knote *kn __unused)
425 {
426 memorystatus_klist_lock();
427 KNOTE_DETACH(&memorystatus_klist, kn);
428 memorystatus_klist_unlock();
429 }
430
431 #if VM_PRESSURE_EVENTS
432
433 #if CONFIG_MEMORYSTATUS
434
435 int
436 memorystatus_send_note(int event_code, void *data, size_t data_length)
437 {
438 int ret;
439 struct kev_msg ev_msg;
440
441 ev_msg.vendor_code = KEV_VENDOR_APPLE;
442 ev_msg.kev_class = KEV_SYSTEM_CLASS;
443 ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS;
444
445 ev_msg.event_code = event_code;
446
447 ev_msg.dv[0].data_length = data_length;
448 ev_msg.dv[0].data_ptr = data;
449 ev_msg.dv[1].data_length = 0;
450
451 ret = kev_post_msg(&ev_msg);
452 if (ret) {
453 printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
454 }
455
456 return ret;
457 }
458
459 boolean_t
460 memorystatus_warn_process(pid_t pid, __unused boolean_t is_active, __unused boolean_t is_fatal, boolean_t limit_exceeded)
461 {
462 boolean_t ret = FALSE;
463 boolean_t found_knote = FALSE;
464 struct knote *kn = NULL;
465 int send_knote_count = 0;
466
467 /*
468 * See comment in sysctl_memorystatus_vm_pressure_send.
469 */
470
471 memorystatus_klist_lock();
472
473 SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) {
474 proc_t knote_proc = knote_get_kq(kn)->kq_p;
475 pid_t knote_pid = knote_proc->p_pid;
476
477 if (knote_pid == pid) {
478 /*
479 * By setting the "fflags" here, we are forcing
480 * a process to deal with the case where it's
481 * bumping up into its memory limits. If we don't
482 * do this here, we will end up depending on the
483 * system pressure snapshot evaluation in
484 * filt_memorystatus().
485 */
486
487 #if CONFIG_EMBEDDED
488 if (!limit_exceeded) {
489 /*
490 * Intentionally set either the unambiguous limit warning,
491 * the system-wide critical or the system-wide warning
492 * notification bit.
493 */
494
495 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) {
496 kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_WARN;
497 found_knote = TRUE;
498 send_knote_count++;
499 } else if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) {
500 kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
501 found_knote = TRUE;
502 send_knote_count++;
503 } else if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) {
504 kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN;
505 found_knote = TRUE;
506 send_knote_count++;
507 }
508 } else {
509 /*
510 * Send this notification when a process has exceeded a soft limit.
511 */
512 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) {
513 kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL;
514 found_knote = TRUE;
515 send_knote_count++;
516 }
517 }
518 #else /* CONFIG_EMBEDDED */
519 if (!limit_exceeded) {
520 /*
521 * Processes on desktop are not expecting to handle a system-wide
522 * critical or system-wide warning notification from this path.
523 * Intentionally set only the unambiguous limit warning here.
524 *
525 * If the limit is soft, however, limit this to one notification per
526 * active/inactive limit (per each registered listener).
527 */
528
529 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) {
530 found_knote = TRUE;
531 if (!is_fatal) {
532 /*
533 * Restrict proc_limit_warn notifications when
534 * non-fatal (soft) limit is at play.
535 */
536 if (is_active) {
537 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE) {
538 /*
539 * Mark this knote for delivery.
540 */
541 kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_WARN;
542 /*
543 * And suppress it from future notifications.
544 */
545 kn->kn_sfflags &= ~NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE;
546 send_knote_count++;
547 }
548 } else {
549 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE) {
550 /*
551 * Mark this knote for delivery.
552 */
553 kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_WARN;
554 /*
555 * And suppress it from future notifications.
556 */
557 kn->kn_sfflags &= ~NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE;
558 send_knote_count++;
559 }
560 }
561 } else {
562 /*
563 * No restriction on proc_limit_warn notifications when
564 * fatal (hard) limit is at play.
565 */
566 kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_WARN;
567 send_knote_count++;
568 }
569 }
570 } else {
571 /*
572 * Send this notification when a process has exceeded a soft limit,
573 */
574
575 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) {
576 found_knote = TRUE;
577 if (!is_fatal) {
578 /*
579 * Restrict critical notifications for soft limits.
580 */
581
582 if (is_active) {
583 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE) {
584 /*
585 * Suppress future proc_limit_critical notifications
586 * for the active soft limit.
587 */
588 kn->kn_sfflags &= ~NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE;
589 kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL;
590 send_knote_count++;
591 }
592 } else {
593 if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE) {
594 /*
595 * Suppress future proc_limit_critical_notifications
596 * for the inactive soft limit.
597 */
598 kn->kn_sfflags &= ~NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE;
599 kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL;
600 send_knote_count++;
601 }
602 }
603 } else {
604 /*
605 * We should never be trying to send a critical notification for
606 * a hard limit... the process would be killed before it could be
607 * received.
608 */
609 panic("Caught sending pid %d a critical warning for a fatal limit.\n", pid);
610 }
611 }
612 }
613 #endif /* CONFIG_EMBEDDED */
614 }
615 }
616
617 if (found_knote) {
618 if (send_knote_count > 0) {
619 KNOTE(&memorystatus_klist, 0);
620 }
621 ret = TRUE;
622 }
623
624 memorystatus_klist_unlock();
625
626 return ret;
627 }
628
629 /*
630 * Can only be set by the current task on itself.
631 */
632 int
633 memorystatus_low_mem_privileged_listener(uint32_t op_flags)
634 {
635 boolean_t set_privilege = FALSE;
636 /*
637 * Need an entitlement check here?
638 */
639 if (op_flags == MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE) {
640 set_privilege = TRUE;
641 } else if (op_flags == MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE) {
642 set_privilege = FALSE;
643 } else {
644 return EINVAL;
645 }
646
647 return task_low_mem_privileged_listener(current_task(), set_privilege, NULL);
648 }
649
650 int
651 memorystatus_send_pressure_note(pid_t pid)
652 {
653 MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid);
654 return memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid));
655 }
656
657 boolean_t
658 memorystatus_is_foreground_locked(proc_t p)
659 {
660 return (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND) ||
661 (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND_SUPPORT);
662 }
663
664 /*
665 * This is meant for stackshot and kperf -- it does not take the proc_list_lock
666 * to access the p_memstat_dirty field.
667 */
668 void
669 memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit)
670 {
671 if (!v) {
672 *is_dirty = FALSE;
673 *is_dirty_tracked = FALSE;
674 *allow_idle_exit = FALSE;
675 } else {
676 proc_t p = (proc_t)v;
677 *is_dirty = (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) != 0;
678 *is_dirty_tracked = (p->p_memstat_dirty & P_DIRTY_TRACK) != 0;
679 *allow_idle_exit = (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) != 0;
680 }
681 }
682
683 boolean_t
684 memorystatus_bg_pressure_eligible(proc_t p)
685 {
686 boolean_t eligible = FALSE;
687
688 proc_list_lock();
689
690 MEMORYSTATUS_DEBUG(1, "memorystatus_bg_pressure_eligible: pid %d, state 0x%x\n", p->p_pid, p->p_memstat_state);
691
692 /* Foreground processes have already been dealt with at this point, so just test for eligibility */
693 if (!(p->p_memstat_state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN))) {
694 eligible = TRUE;
695 }
696
697 if (p->p_memstat_effectivepriority < JETSAM_PRIORITY_BACKGROUND_OPPORTUNISTIC) {
698 /*
699 * IDLE and IDLE_DEFERRED bands contain processes
700 * that have dropped memory to be under their inactive
701 * memory limits. And so they can't really give back
702 * anything.
703 */
704 eligible = FALSE;
705 }
706
707 proc_list_unlock();
708
709 return eligible;
710 }
711
712 void
713 memorystatus_send_low_swap_note(void)
714 {
715 struct knote *kn = NULL;
716
717 memorystatus_klist_lock();
718 SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) {
719 /* We call is_knote_registered_modify_task_pressure_bits to check if the sfflags for the
720 * current note contain NOTE_MEMORYSTATUS_LOW_SWAP. Once we find one note in the memorystatus_klist
721 * that has the NOTE_MEMORYSTATUS_LOW_SWAP flags in its sfflags set, we call KNOTE with
722 * kMemoryStatusLowSwap as the hint to process and update all knotes on the memorystatus_klist accordingly. */
723 if (is_knote_registered_modify_task_pressure_bits(kn, NOTE_MEMORYSTATUS_LOW_SWAP, NULL, 0, 0) == TRUE) {
724 KNOTE(&memorystatus_klist, kMemorystatusLowSwap);
725 break;
726 }
727 }
728
729 memorystatus_klist_unlock();
730 }
731
732 #endif /* CONFIG_MEMORYSTATUS */
733
734 /*
735 * kn_max - knote
736 *
737 * knote_pressure_level - to check if the knote is registered for this notification level.
738 *
739 * task - task whose bits we'll be modifying
740 *
741 * pressure_level_to_clear - if the task has been notified of this past level, clear that notification bit so that if/when we revert to that level, the task will be notified again.
742 *
743 * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately.
744 *
745 */
746
747 static boolean_t
748 is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pressure_level, task_t task, vm_pressure_level_t pressure_level_to_clear, vm_pressure_level_t pressure_level_to_set)
749 {
750 if (kn_max->kn_sfflags & knote_pressure_level) {
751 if (pressure_level_to_clear && task_has_been_notified(task, pressure_level_to_clear) == TRUE) {
752 task_clear_has_been_notified(task, pressure_level_to_clear);
753 }
754
755 task_mark_has_been_notified(task, pressure_level_to_set);
756 return TRUE;
757 }
758
759 return FALSE;
760 }
761
762 static void
763 memorystatus_klist_reset_all_for_level(vm_pressure_level_t pressure_level_to_clear)
764 {
765 struct knote *kn = NULL;
766
767 memorystatus_klist_lock();
768 SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) {
769 proc_t p = PROC_NULL;
770 struct task* t = TASK_NULL;
771
772 p = knote_get_kq(kn)->kq_p;
773 proc_list_lock();
774 if (p != proc_ref_locked(p)) {
775 p = PROC_NULL;
776 proc_list_unlock();
777 continue;
778 }
779 proc_list_unlock();
780
781 t = (struct task *)(p->task);
782
783 task_clear_has_been_notified(t, pressure_level_to_clear);
784
785 proc_rele(p);
786 }
787
788 memorystatus_klist_unlock();
789 }
790
791 /*
792 * Used by the vm_pressure_thread which is
793 * signalled from within vm_pageout_scan().
794 */
795
796 void
797 consider_vm_pressure_events(void)
798 {
799 vm_dispatch_memory_pressure();
800 }
801
802 static void
803 vm_dispatch_memory_pressure(void)
804 {
805 memorystatus_update_vm_pressure(FALSE);
806 }
807
808 static struct knote *
809 vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level, boolean_t target_foreground_process)
810 {
811 struct knote *kn = NULL, *kn_max = NULL;
812 uint64_t resident_max = 0;/* MB */
813 struct timeval curr_tstamp = {0, 0};
814 int elapsed_msecs = 0;
815 int selected_task_importance = 0;
816 static int pressure_snapshot = -1;
817 boolean_t pressure_increase = FALSE;
818
819 if (pressure_snapshot == -1) {
820 /*
821 * Initial snapshot.
822 */
823 pressure_snapshot = level;
824 pressure_increase = TRUE;
825 } else {
826 if (level && (level >= pressure_snapshot)) {
827 pressure_increase = TRUE;
828 } else {
829 pressure_increase = FALSE;
830 }
831
832 pressure_snapshot = level;
833 }
834
835 if (pressure_increase == TRUE) {
836 /*
837 * We'll start by considering the largest
838 * unimportant task in our list.
839 */
840 selected_task_importance = INT_MAX;
841 } else {
842 /*
843 * We'll start by considering the largest
844 * important task in our list.
845 */
846 selected_task_importance = 0;
847 }
848
849 microuptime(&curr_tstamp);
850
851 SLIST_FOREACH(kn, candidate_list, kn_selnext) {
852 uint64_t resident_size = 0;/* MB */
853 proc_t p = PROC_NULL;
854 struct task* t = TASK_NULL;
855 int curr_task_importance = 0;
856 boolean_t consider_knote = FALSE;
857 boolean_t privileged_listener = FALSE;
858
859 p = knote_get_kq(kn)->kq_p;
860 proc_list_lock();
861 if (p != proc_ref_locked(p)) {
862 p = PROC_NULL;
863 proc_list_unlock();
864 continue;
865 }
866 proc_list_unlock();
867
868 #if CONFIG_MEMORYSTATUS
869 if (target_foreground_process == TRUE && !memorystatus_is_foreground_locked(p)) {
870 /*
871 * Skip process not marked foreground.
872 */
873 proc_rele(p);
874 continue;
875 }
876 #endif /* CONFIG_MEMORYSTATUS */
877
878 t = (struct task *)(p->task);
879
880 timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp);
881 elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000;
882
883 vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(level);
884
885 if ((kn->kn_sfflags & dispatch_level) == 0) {
886 proc_rele(p);
887 continue;
888 }
889
890 #if CONFIG_MEMORYSTATUS
891 if (target_foreground_process == FALSE && !memorystatus_bg_pressure_eligible(p)) {
892 VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p->p_pid);
893 proc_rele(p);
894 continue;
895 }
896 #endif /* CONFIG_MEMORYSTATUS */
897
898 #if CONFIG_EMBEDDED
899 curr_task_importance = p->p_memstat_effectivepriority;
900 #else /* CONFIG_EMBEDDED */
901 curr_task_importance = task_importance_estimate(t);
902 #endif /* CONFIG_EMBEDDED */
903
904 /*
905 * Privileged listeners are only considered in the multi-level pressure scheme
906 * AND only if the pressure is increasing.
907 */
908 if (level > 0) {
909 if (task_has_been_notified(t, level) == FALSE) {
910 /*
911 * Is this a privileged listener?
912 */
913 if (task_low_mem_privileged_listener(t, FALSE, &privileged_listener) == 0) {
914 if (privileged_listener) {
915 kn_max = kn;
916 proc_rele(p);
917 goto done_scanning;
918 }
919 }
920 } else {
921 proc_rele(p);
922 continue;
923 }
924 } else if (level == 0) {
925 /*
926 * Task wasn't notified when the pressure was increasing and so
927 * no need to notify it that the pressure is decreasing.
928 */
929 if ((task_has_been_notified(t, kVMPressureWarning) == FALSE) && (task_has_been_notified(t, kVMPressureCritical) == FALSE)) {
930 proc_rele(p);
931 continue;
932 }
933 }
934
935 /*
936 * We don't want a small process to block large processes from
937 * being notified again. <rdar://problem/7955532>
938 */
939 resident_size = (get_task_phys_footprint(t)) / (1024 * 1024ULL); /* MB */
940
941 if (resident_size >= vm_pressure_task_footprint_min) {
942 if (level > 0) {
943 /*
944 * Warning or Critical Pressure.
945 */
946 if (pressure_increase) {
947 if ((curr_task_importance < selected_task_importance) ||
948 ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) {
949 /*
950 * We have found a candidate process which is:
951 * a) at a lower importance than the current selected process
952 * OR
953 * b) has importance equal to that of the current selected process but is larger
954 */
955
956 consider_knote = TRUE;
957 }
958 } else {
959 if ((curr_task_importance > selected_task_importance) ||
960 ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) {
961 /*
962 * We have found a candidate process which is:
963 * a) at a higher importance than the current selected process
964 * OR
965 * b) has importance equal to that of the current selected process but is larger
966 */
967
968 consider_knote = TRUE;
969 }
970 }
971 } else if (level == 0) {
972 /*
973 * Pressure back to normal.
974 */
975 if ((curr_task_importance > selected_task_importance) ||
976 ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) {
977 consider_knote = TRUE;
978 }
979 }
980
981 if (consider_knote) {
982 resident_max = resident_size;
983 kn_max = kn;
984 selected_task_importance = curr_task_importance;
985 consider_knote = FALSE; /* reset for the next candidate */
986 }
987 } else {
988 /* There was no candidate with enough resident memory to scavenge */
989 VM_PRESSURE_DEBUG(0, "[vm_pressure] threshold failed for pid %d with %llu resident...\n", p->p_pid, resident_size);
990 }
991 proc_rele(p);
992 }
993
994 done_scanning:
995 if (kn_max) {
996 VM_DEBUG_CONSTANT_EVENT(vm_pressure_event, VM_PRESSURE_EVENT, DBG_FUNC_NONE, knote_get_kq(kn_max)->kq_p->p_pid, resident_max, 0, 0);
997 VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %llu resident\n", knote_get_kq(kn_max)->kq_p->p_pid, resident_max);
998 }
999
1000 return kn_max;
1001 }
1002
1003 static uint64_t next_warning_notification_sent_at_ts = 0;
1004 static uint64_t next_critical_notification_sent_at_ts = 0;
1005
1006 boolean_t memorystatus_manual_testing_on = FALSE;
1007 vm_pressure_level_t memorystatus_manual_testing_level = kVMPressureNormal;
1008
1009 kern_return_t
1010 memorystatus_update_vm_pressure(boolean_t target_foreground_process)
1011 {
1012 struct knote *kn_max = NULL;
1013 struct knote *kn_cur = NULL, *kn_temp = NULL;/* for safe list traversal */
1014 pid_t target_pid = -1;
1015 struct klist dispatch_klist = { NULL };
1016 proc_t target_proc = PROC_NULL;
1017 struct task *task = NULL;
1018 boolean_t found_candidate = FALSE;
1019
1020 static vm_pressure_level_t level_snapshot = kVMPressureNormal;
1021 static vm_pressure_level_t prev_level_snapshot = kVMPressureNormal;
1022 boolean_t smoothing_window_started = FALSE;
1023 struct timeval smoothing_window_start_tstamp = {0, 0};
1024 struct timeval curr_tstamp = {0, 0};
1025 int elapsed_msecs = 0;
1026 uint64_t curr_ts = mach_absolute_time();
1027
1028 #if !CONFIG_JETSAM
1029 #define MAX_IDLE_KILLS 100 /* limit the number of idle kills allowed */
1030
1031 int idle_kill_counter = 0;
1032
1033 /*
1034 * On desktop we take this opportunity to free up memory pressure
1035 * by immediately killing idle exitable processes. We use a delay
1036 * to avoid overkill. And we impose a max counter as a fail safe
1037 * in case daemons re-launch too fast.
1038 */
1039 while ((memorystatus_vm_pressure_level != kVMPressureNormal) && (idle_kill_counter < MAX_IDLE_KILLS)) {
1040 if (memorystatus_idle_exit_from_VM() == FALSE) {
1041 /* No idle exitable processes left to kill */
1042 break;
1043 }
1044 idle_kill_counter++;
1045
1046 if (memorystatus_manual_testing_on == TRUE) {
1047 /*
1048 * Skip the delay when testing
1049 * the pressure notification scheme.
1050 */
1051 } else {
1052 delay(1000000); /* 1 second */
1053 }
1054 }
1055 #endif /* !CONFIG_JETSAM */
1056
1057 if (level_snapshot != kVMPressureNormal) {
1058 /*
1059 * Check to see if we are still in the 'resting' period
1060 * after having notified all clients interested in
1061 * a particular pressure level.
1062 */
1063
1064 level_snapshot = memorystatus_vm_pressure_level;
1065
1066 if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) {
1067 if (next_warning_notification_sent_at_ts) {
1068 if (curr_ts < next_warning_notification_sent_at_ts) {
1069 delay(INTER_NOTIFICATION_DELAY * 4 /* 1 sec */);
1070 return KERN_SUCCESS;
1071 }
1072
1073 next_warning_notification_sent_at_ts = 0;
1074 memorystatus_klist_reset_all_for_level(kVMPressureWarning);
1075 }
1076 } else if (level_snapshot == kVMPressureCritical) {
1077 if (next_critical_notification_sent_at_ts) {
1078 if (curr_ts < next_critical_notification_sent_at_ts) {
1079 delay(INTER_NOTIFICATION_DELAY * 4 /* 1 sec */);
1080 return KERN_SUCCESS;
1081 }
1082 next_critical_notification_sent_at_ts = 0;
1083 memorystatus_klist_reset_all_for_level(kVMPressureCritical);
1084 }
1085 }
1086 }
1087
1088 while (1) {
1089 /*
1090 * There is a race window here. But it's not clear
1091 * how much we benefit from having extra synchronization.
1092 */
1093 level_snapshot = memorystatus_vm_pressure_level;
1094
1095 if (prev_level_snapshot > level_snapshot) {
1096 /*
1097 * Pressure decreased? Let's take a little breather
1098 * and see if this condition stays.
1099 */
1100 if (smoothing_window_started == FALSE) {
1101 smoothing_window_started = TRUE;
1102 microuptime(&smoothing_window_start_tstamp);
1103 }
1104
1105 microuptime(&curr_tstamp);
1106 timevalsub(&curr_tstamp, &smoothing_window_start_tstamp);
1107 elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000;
1108
1109 if (elapsed_msecs < VM_PRESSURE_DECREASED_SMOOTHING_PERIOD) {
1110 delay(INTER_NOTIFICATION_DELAY);
1111 continue;
1112 }
1113 }
1114
1115 prev_level_snapshot = level_snapshot;
1116 smoothing_window_started = FALSE;
1117
1118 memorystatus_klist_lock();
1119 kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot, target_foreground_process);
1120
1121 if (kn_max == NULL) {
1122 memorystatus_klist_unlock();
1123
1124 /*
1125 * No more level-based clients to notify.
1126 *
1127 * Start the 'resting' window within which clients will not be re-notified.
1128 */
1129
1130 if (level_snapshot != kVMPressureNormal) {
1131 if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) {
1132 nanoseconds_to_absolutetime(WARNING_NOTIFICATION_RESTING_PERIOD * NSEC_PER_SEC, &curr_ts);
1133
1134 /* Next warning notification (if nothing changes) won't be sent before...*/
1135 next_warning_notification_sent_at_ts = mach_absolute_time() + curr_ts;
1136 }
1137
1138 if (level_snapshot == kVMPressureCritical) {
1139 nanoseconds_to_absolutetime(CRITICAL_NOTIFICATION_RESTING_PERIOD * NSEC_PER_SEC, &curr_ts);
1140
1141 /* Next critical notification (if nothing changes) won't be sent before...*/
1142 next_critical_notification_sent_at_ts = mach_absolute_time() + curr_ts;
1143 }
1144 }
1145 return KERN_FAILURE;
1146 }
1147
1148 target_proc = knote_get_kq(kn_max)->kq_p;
1149
1150 proc_list_lock();
1151 if (target_proc != proc_ref_locked(target_proc)) {
1152 target_proc = PROC_NULL;
1153 proc_list_unlock();
1154 memorystatus_klist_unlock();
1155 continue;
1156 }
1157 proc_list_unlock();
1158
1159 target_pid = target_proc->p_pid;
1160
1161 task = (struct task *)(target_proc->task);
1162
1163 if (level_snapshot != kVMPressureNormal) {
1164 if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) {
1165 if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_WARN, task, 0, kVMPressureWarning) == TRUE) {
1166 found_candidate = TRUE;
1167 }
1168 } else {
1169 if (level_snapshot == kVMPressureCritical) {
1170 if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_CRITICAL, task, 0, kVMPressureCritical) == TRUE) {
1171 found_candidate = TRUE;
1172 }
1173 }
1174 }
1175 } else {
1176 if (kn_max->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
1177 task_clear_has_been_notified(task, kVMPressureWarning);
1178 task_clear_has_been_notified(task, kVMPressureCritical);
1179
1180 found_candidate = TRUE;
1181 }
1182 }
1183
1184 if (found_candidate == FALSE) {
1185 proc_rele(target_proc);
1186 memorystatus_klist_unlock();
1187 continue;
1188 }
1189
1190 SLIST_FOREACH_SAFE(kn_cur, &memorystatus_klist, kn_selnext, kn_temp) {
1191 int knote_pressure_level = convert_internal_pressure_level_to_dispatch_level(level_snapshot);
1192
1193 if (is_knote_registered_modify_task_pressure_bits(kn_cur, knote_pressure_level, task, 0, level_snapshot) == TRUE) {
1194 proc_t knote_proc = knote_get_kq(kn_cur)->kq_p;
1195 pid_t knote_pid = knote_proc->p_pid;
1196 if (knote_pid == target_pid) {
1197 KNOTE_DETACH(&memorystatus_klist, kn_cur);
1198 KNOTE_ATTACH(&dispatch_klist, kn_cur);
1199 }
1200 }
1201 }
1202
1203 KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure);
1204
1205 SLIST_FOREACH_SAFE(kn_cur, &dispatch_klist, kn_selnext, kn_temp) {
1206 KNOTE_DETACH(&dispatch_klist, kn_cur);
1207 KNOTE_ATTACH(&memorystatus_klist, kn_cur);
1208 }
1209
1210 memorystatus_klist_unlock();
1211
1212 microuptime(&target_proc->vm_pressure_last_notify_tstamp);
1213 proc_rele(target_proc);
1214
1215 if (memorystatus_manual_testing_on == TRUE && target_foreground_process == TRUE) {
1216 break;
1217 }
1218
1219 if (memorystatus_manual_testing_on == TRUE) {
1220 /*
1221 * Testing out the pressure notification scheme.
1222 * No need for delays etc.
1223 */
1224 } else {
1225 uint32_t sleep_interval = INTER_NOTIFICATION_DELAY;
1226 #if CONFIG_JETSAM
1227 unsigned int page_delta = 0;
1228 unsigned int skip_delay_page_threshold = 0;
1229
1230 assert(memorystatus_available_pages_pressure >= memorystatus_available_pages_critical_base);
1231
1232 page_delta = (memorystatus_available_pages_pressure - memorystatus_available_pages_critical_base) / 2;
1233 skip_delay_page_threshold = memorystatus_available_pages_pressure - page_delta;
1234
1235 if (memorystatus_available_pages <= skip_delay_page_threshold) {
1236 /*
1237 * We are nearing the critcal mark fast and can't afford to wait between
1238 * notifications.
1239 */
1240 sleep_interval = 0;
1241 }
1242 #endif /* CONFIG_JETSAM */
1243
1244 if (sleep_interval) {
1245 delay(sleep_interval);
1246 }
1247 }
1248 }
1249
1250 return KERN_SUCCESS;
1251 }
1252
1253 static uint32_t
1254 convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t internal_pressure_level)
1255 {
1256 uint32_t dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
1257
1258 switch (internal_pressure_level) {
1259 case kVMPressureNormal:
1260 {
1261 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL;
1262 break;
1263 }
1264
1265 case kVMPressureWarning:
1266 case kVMPressureUrgent:
1267 {
1268 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_WARN;
1269 break;
1270 }
1271
1272 case kVMPressureCritical:
1273 {
1274 dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL;
1275 break;
1276 }
1277
1278 default:
1279 break;
1280 }
1281
1282 return dispatch_level;
1283 }
1284
1285 /*
1286 * Notify any kexts that are waiting for notification that jetsam
1287 * is approaching the foreground bands. They should use this notification
1288 * to free cached memory.
1289 */
1290 void
1291 memorystatus_issue_fg_band_notify(void)
1292 {
1293 uint64_t now;
1294
1295 lck_mtx_lock(&memorystatus_jetsam_fg_band_lock);
1296 absolutetime_to_nanoseconds(mach_absolute_time(), &now);
1297 if (now - memorystatus_jetsam_fg_band_timestamp_ns < memorystatus_jetsam_fg_band_delay_ns) {
1298 lck_mtx_unlock(&memorystatus_jetsam_fg_band_lock);
1299 return;
1300 }
1301
1302 if (memorystatus_jetsam_fg_band_waiters > 0) {
1303 thread_wakeup(&memorystatus_jetsam_fg_band_waiters);
1304 memorystatus_jetsam_fg_band_waiters = 0;
1305 memorystatus_jetsam_fg_band_timestamp_ns = now;
1306 }
1307 lck_mtx_unlock(&memorystatus_jetsam_fg_band_lock);
1308
1309 /* Notify the buffer cache, file systems, etc. to jetison everything they can. */
1310 if (consider_buffer_cache_collect != NULL) {
1311 (void)(*consider_buffer_cache_collect)(1);
1312 }
1313 }
1314
1315
1316 /*
1317 * Memorystatus notification debugging support
1318 */
1319
1320 static int
1321 sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS
1322 {
1323 #pragma unused(arg1, arg2, oidp)
1324 #if CONFIG_EMBEDDED
1325 int error = 0;
1326
1327 error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0);
1328 if (error) {
1329 return error;
1330 }
1331
1332 #endif /* CONFIG_EMBEDDED */
1333 uint32_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level);
1334
1335 return SYSCTL_OUT(req, &dispatch_level, sizeof(dispatch_level));
1336 }
1337
1338 #if DEBUG || DEVELOPMENT
1339
1340 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
1341 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", "");
1342
1343 #else /* DEBUG || DEVELOPMENT */
1344
1345 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED,
1346 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", "");
1347
1348 #endif /* DEBUG || DEVELOPMENT */
1349
1350 /*
1351 * Trigger levels to test the mechanism.
1352 * Can be used via a sysctl.
1353 */
1354 #define TEST_LOW_MEMORY_TRIGGER_ONE 1
1355 #define TEST_LOW_MEMORY_TRIGGER_ALL 2
1356 #define TEST_PURGEABLE_TRIGGER_ONE 3
1357 #define TEST_PURGEABLE_TRIGGER_ALL 4
1358 #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE 5
1359 #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL 6
1360
1361 static int
1362 sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS
1363 {
1364 #pragma unused(arg1, arg2)
1365
1366 int level = 0;
1367 int error = 0;
1368 int pressure_level = 0;
1369 int trigger_request = 0;
1370 int force_purge;
1371
1372 error = sysctl_handle_int(oidp, &level, 0, req);
1373 if (error || !req->newptr) {
1374 return error;
1375 }
1376
1377 memorystatus_manual_testing_on = TRUE;
1378
1379 trigger_request = (level >> 16) & 0xFFFF;
1380 pressure_level = (level & 0xFFFF);
1381
1382 if (trigger_request < TEST_LOW_MEMORY_TRIGGER_ONE ||
1383 trigger_request > TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL) {
1384 return EINVAL;
1385 }
1386 switch (pressure_level) {
1387 case NOTE_MEMORYSTATUS_PRESSURE_NORMAL:
1388 case NOTE_MEMORYSTATUS_PRESSURE_WARN:
1389 case NOTE_MEMORYSTATUS_PRESSURE_CRITICAL:
1390 break;
1391 default:
1392 return EINVAL;
1393 }
1394
1395 /*
1396 * The pressure level is being set from user-space.
1397 * And user-space uses the constants in sys/event.h
1398 * So we translate those events to our internal levels here.
1399 */
1400 if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
1401 memorystatus_manual_testing_level = kVMPressureNormal;
1402 force_purge = 0;
1403 } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_WARN) {
1404 memorystatus_manual_testing_level = kVMPressureWarning;
1405 force_purge = vm_pageout_state.memorystatus_purge_on_warning;
1406 } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) {
1407 memorystatus_manual_testing_level = kVMPressureCritical;
1408 force_purge = vm_pageout_state.memorystatus_purge_on_critical;
1409 }
1410
1411 memorystatus_vm_pressure_level = memorystatus_manual_testing_level;
1412
1413 /* purge according to the new pressure level */
1414 switch (trigger_request) {
1415 case TEST_PURGEABLE_TRIGGER_ONE:
1416 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE:
1417 if (force_purge == 0) {
1418 /* no purging requested */
1419 break;
1420 }
1421 vm_purgeable_object_purge_one_unlocked(force_purge);
1422 break;
1423 case TEST_PURGEABLE_TRIGGER_ALL:
1424 case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL:
1425 if (force_purge == 0) {
1426 /* no purging requested */
1427 break;
1428 }
1429 while (vm_purgeable_object_purge_one_unlocked(force_purge)) {
1430 ;
1431 }
1432 break;
1433 }
1434
1435 if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ONE) ||
1436 (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE)) {
1437 memorystatus_update_vm_pressure(TRUE);
1438 }
1439
1440 if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ALL) ||
1441 (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL)) {
1442 while (memorystatus_update_vm_pressure(FALSE) == KERN_SUCCESS) {
1443 continue;
1444 }
1445 }
1446
1447 if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) {
1448 memorystatus_manual_testing_on = FALSE;
1449 }
1450
1451 return 0;
1452 }
1453
1454 SYSCTL_PROC(_kern, OID_AUTO, memorypressure_manual_trigger, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED,
1455 0, 0, &sysctl_memorypressure_manual_trigger, "I", "");
1456
1457
1458 SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_warning, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_state.memorystatus_purge_on_warning, 0, "");
1459 SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_urgent, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_state.memorystatus_purge_on_urgent, 0, "");
1460 SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_critical, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_state.memorystatus_purge_on_critical, 0, "");
1461
1462 #if DEBUG || DEVELOPMENT
1463 SYSCTL_UINT(_kern, OID_AUTO, memorystatus_vm_pressure_events_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pressure_events_enabled, 0, "");
1464
1465 #if 0
1466 #if CONFIG_JETSAM && VM_PRESSURE_EVENTS
1467 static boolean_t
1468 memorystatus_issue_pressure_kevent(boolean_t pressured)
1469 {
1470 memorystatus_klist_lock();
1471 KNOTE(&memorystatus_klist, pressured ? kMemorystatusPressure : kMemorystatusNoPressure);
1472 memorystatus_klist_unlock();
1473 return TRUE;
1474 }
1475 #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */
1476 #endif /* 0 */
1477
1478 /*
1479 * This routine is used for targeted notifications regardless of system memory pressure
1480 * and regardless of whether or not the process has already been notified.
1481 * It bypasses and has no effect on the only-one-notification per soft-limit policy.
1482 *
1483 * "memnote" is the current user.
1484 */
1485
1486 static int
1487 sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS
1488 {
1489 #pragma unused(arg1, arg2)
1490 /* Need to be root or have memorystatus entitlement */
1491 if (!kauth_cred_issuser(kauth_cred_get()) && !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) {
1492 return EPERM;
1493 }
1494
1495 int error = 0, pid = 0;
1496 struct knote *kn = NULL;
1497 boolean_t found_knote = FALSE;
1498 int fflags = 0; /* filter flags for EVFILT_MEMORYSTATUS */
1499 uint64_t value = 0;
1500
1501 error = sysctl_handle_quad(oidp, &value, 0, req);
1502 if (error || !req->newptr) {
1503 return error;
1504 }
1505
1506 /*
1507 * Find the pid in the low 32 bits of value passed in.
1508 */
1509 pid = (int)(value & 0xFFFFFFFF);
1510
1511 /*
1512 * Find notification in the high 32 bits of the value passed in.
1513 */
1514 fflags = (int)((value >> 32) & 0xFFFFFFFF);
1515
1516 /*
1517 * For backwards compatibility, when no notification is
1518 * passed in, default to the NOTE_MEMORYSTATUS_PRESSURE_WARN
1519 */
1520 if (fflags == 0) {
1521 fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN;
1522 // printf("memorystatus_vm_pressure_send: using default notification [0x%x]\n", fflags);
1523 }
1524
1525 /* wake up everybody waiting for kVMPressureJetsam */
1526 if (fflags == NOTE_MEMORYSTATUS_JETSAM_FG_BAND) {
1527 memorystatus_issue_fg_band_notify();
1528 return error;
1529 }
1530
1531 /*
1532 * See event.h ... fflags for EVFILT_MEMORYSTATUS
1533 */
1534 if (!((fflags == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) ||
1535 (fflags == NOTE_MEMORYSTATUS_PRESSURE_WARN) ||
1536 (fflags == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) ||
1537 (fflags == NOTE_MEMORYSTATUS_LOW_SWAP) ||
1538 (fflags == NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) ||
1539 (fflags == NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) ||
1540 (((fflags & NOTE_MEMORYSTATUS_MSL_STATUS) != 0 &&
1541 ((fflags & ~NOTE_MEMORYSTATUS_MSL_STATUS) == 0))))) {
1542 printf("memorystatus_vm_pressure_send: notification [0x%x] not supported \n", fflags);
1543 error = 1;
1544 return error;
1545 }
1546
1547 /*
1548 * Forcibly send pid a memorystatus notification.
1549 */
1550
1551 memorystatus_klist_lock();
1552
1553 SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) {
1554 proc_t knote_proc = knote_get_kq(kn)->kq_p;
1555 pid_t knote_pid = knote_proc->p_pid;
1556
1557 if (knote_pid == pid) {
1558 /*
1559 * Forcibly send this pid a memorystatus notification.
1560 */
1561 kn->kn_fflags = fflags;
1562 found_knote = TRUE;
1563 }
1564 }
1565
1566 if (found_knote) {
1567 KNOTE(&memorystatus_klist, 0);
1568 printf("memorystatus_vm_pressure_send: (value 0x%llx) notification [0x%x] sent to process [%d] \n", value, fflags, pid);
1569 error = 0;
1570 } else {
1571 printf("memorystatus_vm_pressure_send: (value 0x%llx) notification [0x%x] not sent to process [%d] (none registered?)\n", value, fflags, pid);
1572 error = 1;
1573 }
1574
1575 memorystatus_klist_unlock();
1576
1577 return error;
1578 }
1579
1580 SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED | CTLFLAG_ANYBODY,
1581 0, 0, &sysctl_memorystatus_vm_pressure_send, "Q", "");
1582
1583 #endif /* DEBUG || DEVELOPMENT */
1584
1585 #endif /* VM_PRESSURE_EVENTS */