]>
Commit | Line | Data |
---|---|---|
cb323159 A |
1 | /* |
2 | * Copyright (c) 2006-2018 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | * | |
28 | */ | |
29 | ||
30 | #include <sys/kern_event.h> | |
31 | #include <kern/sched_prim.h> | |
cb323159 A |
32 | #include <kern/assert.h> |
33 | #include <kern/debug.h> | |
34 | #include <kern/locks.h> | |
35 | #include <kern/task.h> | |
36 | #include <kern/thread.h> | |
37 | #include <kern/host.h> | |
38 | #include <kern/policy_internal.h> | |
39 | #include <kern/thread_group.h> | |
40 | ||
41 | #include <IOKit/IOBSD.h> | |
42 | ||
43 | #include <libkern/libkern.h> | |
44 | #include <mach/coalition.h> | |
45 | #include <mach/mach_time.h> | |
46 | #include <mach/task.h> | |
47 | #include <mach/host_priv.h> | |
48 | #include <mach/mach_host.h> | |
49 | #include <os/log.h> | |
50 | #include <pexpert/pexpert.h> | |
51 | #include <sys/coalition.h> | |
52 | #include <sys/kern_event.h> | |
53 | #include <sys/proc.h> | |
54 | #include <sys/proc_info.h> | |
55 | #include <sys/reason.h> | |
56 | #include <sys/signal.h> | |
57 | #include <sys/signalvar.h> | |
58 | #include <sys/sysctl.h> | |
59 | #include <sys/sysproto.h> | |
60 | #include <sys/time.h> | |
61 | #include <sys/wait.h> | |
62 | #include <sys/tree.h> | |
63 | #include <sys/priv.h> | |
64 | #include <vm/vm_pageout.h> | |
65 | #include <vm/vm_protos.h> | |
66 | #include <mach/machine/sdt.h> | |
67 | #include <libkern/section_keywords.h> | |
68 | #include <stdatomic.h> | |
69 | ||
70 | #if CONFIG_FREEZE | |
71 | #include <vm/vm_map.h> | |
72 | #endif /* CONFIG_FREEZE */ | |
73 | ||
74 | #include <sys/kern_memorystatus.h> | |
75 | #include <sys/kern_memorystatus_notify.h> | |
76 | ||
77 | /* | |
78 | * Memorystatus klist structures | |
79 | */ | |
80 | struct klist memorystatus_klist; | |
81 | static lck_mtx_t memorystatus_klist_mutex; | |
82 | static void memorystatus_klist_lock(void); | |
83 | static void memorystatus_klist_unlock(void); | |
84 | ||
85 | /* | |
86 | * Memorystatus kevent filter routines | |
87 | */ | |
88 | static int filt_memorystatusattach(struct knote *kn, struct kevent_qos_s *kev); | |
89 | static void filt_memorystatusdetach(struct knote *kn); | |
90 | static int filt_memorystatus(struct knote *kn, long hint); | |
91 | static int filt_memorystatustouch(struct knote *kn, struct kevent_qos_s *kev); | |
92 | static int filt_memorystatusprocess(struct knote *kn, struct kevent_qos_s *kev); | |
93 | ||
94 | SECURITY_READ_ONLY_EARLY(struct filterops) memorystatus_filtops = { | |
95 | .f_attach = filt_memorystatusattach, | |
96 | .f_detach = filt_memorystatusdetach, | |
97 | .f_event = filt_memorystatus, | |
98 | .f_touch = filt_memorystatustouch, | |
99 | .f_process = filt_memorystatusprocess, | |
100 | }; | |
101 | ||
102 | /* | |
103 | * Memorystatus notification events | |
104 | */ | |
105 | enum { | |
106 | kMemorystatusNoPressure = 0x1, | |
107 | kMemorystatusPressure = 0x2, | |
108 | kMemorystatusLowSwap = 0x4, | |
109 | kMemorystatusProcLimitWarn = 0x8, | |
110 | kMemorystatusProcLimitCritical = 0x10 | |
111 | }; | |
112 | ||
113 | #define INTER_NOTIFICATION_DELAY (250000) /* .25 second */ | |
114 | #define VM_PRESSURE_DECREASED_SMOOTHING_PERIOD 5000 /* milliseconds */ | |
115 | #define WARNING_NOTIFICATION_RESTING_PERIOD 25 /* seconds */ | |
116 | #define CRITICAL_NOTIFICATION_RESTING_PERIOD 25 /* seconds */ | |
117 | ||
118 | /* | |
119 | * Memorystatus notification helper routines | |
120 | */ | |
121 | static vm_pressure_level_t convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t); | |
122 | static boolean_t is_knote_registered_modify_task_pressure_bits(struct knote*, int, task_t, vm_pressure_level_t, vm_pressure_level_t); | |
123 | static void memorystatus_klist_reset_all_for_level(vm_pressure_level_t pressure_level_to_clear); | |
124 | static struct knote *vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level, boolean_t target_foreground_process); | |
125 | static void vm_dispatch_memory_pressure(void); | |
126 | kern_return_t memorystatus_update_vm_pressure(boolean_t target_foreground_process); | |
127 | ||
128 | #if VM_PRESSURE_EVENTS | |
129 | ||
130 | /* | |
131 | * This value is the threshold that a process must meet to be considered for scavenging. | |
132 | */ | |
f427ee49 | 133 | #if XNU_TARGET_OS_OSX |
cb323159 | 134 | #define VM_PRESSURE_MINIMUM_RSIZE 10 /* MB */ |
f427ee49 A |
135 | #else /* XNU_TARGET_OS_OSX */ |
136 | #define VM_PRESSURE_MINIMUM_RSIZE 6 /* MB */ | |
137 | #endif /* XNU_TARGET_OS_OSX */ | |
cb323159 A |
138 | |
139 | static uint32_t vm_pressure_task_footprint_min = VM_PRESSURE_MINIMUM_RSIZE; | |
140 | ||
141 | #if DEVELOPMENT || DEBUG | |
142 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_vm_pressure_task_footprint_min, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pressure_task_footprint_min, 0, ""); | |
143 | #endif /* DEVELOPMENT || DEBUG */ | |
144 | ||
145 | vm_pressure_level_t memorystatus_vm_pressure_level = kVMPressureNormal; | |
146 | ||
147 | /* | |
148 | * We use this flag to signal if we have any HWM offenders | |
149 | * on the system. This way we can reduce the number of wakeups | |
150 | * of the memorystatus_thread when the system is between the | |
151 | * "pressure" and "critical" threshold. | |
152 | * | |
153 | * The (re-)setting of this variable is done without any locks | |
154 | * or synchronization simply because it is not possible (currently) | |
155 | * to keep track of HWM offenders that drop down below their memory | |
156 | * limit and/or exit. So, we choose to burn a couple of wasted wakeups | |
157 | * by allowing the unguarded modification of this variable. | |
158 | */ | |
159 | boolean_t memorystatus_hwm_candidates = 0; | |
160 | ||
161 | #endif /* VM_PRESSURE_EVENTS */ | |
162 | ||
163 | #if CONFIG_JETSAM | |
164 | ||
165 | extern unsigned int memorystatus_available_pages; | |
166 | extern unsigned int memorystatus_available_pages_pressure; | |
167 | extern unsigned int memorystatus_available_pages_critical; | |
168 | extern unsigned int memorystatus_available_pages_critical_base; | |
169 | extern unsigned int memorystatus_available_pages_critical_idle_offset; | |
170 | ||
171 | #else /* CONFIG_JETSAM */ | |
172 | ||
173 | extern uint64_t memorystatus_available_pages; | |
174 | extern uint64_t memorystatus_available_pages_pressure; | |
175 | extern uint64_t memorystatus_available_pages_critical; | |
176 | ||
177 | #endif /* CONFIG_JETSAM */ | |
178 | ||
179 | extern lck_mtx_t memorystatus_jetsam_fg_band_lock; | |
180 | uint32_t memorystatus_jetsam_fg_band_waiters = 0; | |
181 | static uint64_t memorystatus_jetsam_fg_band_timestamp_ns = 0; /* nanosec */ | |
182 | static uint64_t memorystatus_jetsam_fg_band_delay_ns = 5ull * 1000 * 1000 * 1000; /* nanosec */ | |
183 | ||
184 | extern boolean_t(*volatile consider_buffer_cache_collect)(int); | |
185 | ||
186 | #if DEVELOPMENT || DEBUG | |
187 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_jetsam_fg_band_delay_ns, CTLFLAG_RW | CTLFLAG_LOCKED, | |
188 | &memorystatus_jetsam_fg_band_delay_ns, ""); | |
189 | #endif | |
190 | ||
191 | static int | |
192 | filt_memorystatusattach(struct knote *kn, __unused struct kevent_qos_s *kev) | |
193 | { | |
194 | int error; | |
195 | ||
196 | kn->kn_flags |= EV_CLEAR; /* automatically set */ | |
197 | kn->kn_sdata = 0; /* incoming data is ignored */ | |
198 | ||
199 | error = memorystatus_knote_register(kn); | |
200 | if (error) { | |
201 | knote_set_error(kn, error); | |
202 | } | |
203 | return 0; | |
204 | } | |
205 | ||
206 | static void | |
207 | filt_memorystatusdetach(struct knote *kn) | |
208 | { | |
209 | memorystatus_knote_unregister(kn); | |
210 | } | |
211 | ||
212 | static int | |
213 | filt_memorystatus(struct knote *kn __unused, long hint) | |
214 | { | |
215 | if (hint) { | |
216 | switch (hint) { | |
217 | case kMemorystatusNoPressure: | |
218 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
219 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; | |
220 | } | |
221 | break; | |
222 | case kMemorystatusPressure: | |
223 | if (memorystatus_vm_pressure_level == kVMPressureWarning || memorystatus_vm_pressure_level == kVMPressureUrgent) { | |
224 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { | |
225 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
226 | } | |
227 | } else if (memorystatus_vm_pressure_level == kVMPressureCritical) { | |
228 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { | |
229 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; | |
230 | } | |
231 | } | |
232 | break; | |
233 | case kMemorystatusLowSwap: | |
234 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_LOW_SWAP) { | |
235 | kn->kn_fflags = NOTE_MEMORYSTATUS_LOW_SWAP; | |
236 | } | |
237 | break; | |
238 | ||
239 | case kMemorystatusProcLimitWarn: | |
240 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) { | |
241 | kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_WARN; | |
242 | } | |
243 | break; | |
244 | ||
245 | case kMemorystatusProcLimitCritical: | |
246 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) { | |
247 | kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL; | |
248 | } | |
249 | break; | |
250 | ||
251 | default: | |
252 | break; | |
253 | } | |
254 | } | |
255 | ||
256 | #if 0 | |
257 | if (kn->kn_fflags != 0) { | |
258 | proc_t knote_proc = knote_get_kq(kn)->kq_p; | |
259 | pid_t knote_pid = knote_proc->p_pid; | |
260 | ||
261 | printf("filt_memorystatus: sending kn 0x%lx (event 0x%x) for pid (%d)\n", | |
262 | (unsigned long)kn, kn->kn_fflags, knote_pid); | |
263 | } | |
264 | #endif | |
265 | ||
266 | return kn->kn_fflags != 0; | |
267 | } | |
268 | ||
269 | static int | |
270 | filt_memorystatustouch(struct knote *kn, struct kevent_qos_s *kev) | |
271 | { | |
272 | int res; | |
273 | int prev_kn_sfflags = 0; | |
274 | ||
275 | memorystatus_klist_lock(); | |
276 | ||
277 | /* | |
278 | * copy in new kevent settings | |
279 | * (saving the "desired" data and fflags). | |
280 | */ | |
281 | ||
282 | prev_kn_sfflags = kn->kn_sfflags; | |
283 | kn->kn_sfflags = (kev->fflags & EVFILT_MEMORYSTATUS_ALL_MASK); | |
284 | ||
f427ee49 | 285 | #if XNU_TARGET_OS_OSX |
cb323159 A |
286 | /* |
287 | * Only on desktop do we restrict notifications to | |
288 | * one per active/inactive state (soft limits only). | |
289 | */ | |
290 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) { | |
291 | /* | |
292 | * Is there previous state to preserve? | |
293 | */ | |
294 | if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) { | |
295 | /* | |
296 | * This knote was previously interested in proc_limit_warn, | |
297 | * so yes, preserve previous state. | |
298 | */ | |
299 | if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE) { | |
300 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE; | |
301 | } | |
302 | if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE) { | |
303 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE; | |
304 | } | |
305 | } else { | |
306 | /* | |
307 | * This knote was not previously interested in proc_limit_warn, | |
308 | * but it is now. Set both states. | |
309 | */ | |
310 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE; | |
311 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE; | |
312 | } | |
313 | } | |
314 | ||
315 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) { | |
316 | /* | |
317 | * Is there previous state to preserve? | |
318 | */ | |
319 | if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) { | |
320 | /* | |
321 | * This knote was previously interested in proc_limit_critical, | |
322 | * so yes, preserve previous state. | |
323 | */ | |
324 | if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE) { | |
325 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE; | |
326 | } | |
327 | if (prev_kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE) { | |
328 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE; | |
329 | } | |
330 | } else { | |
331 | /* | |
332 | * This knote was not previously interested in proc_limit_critical, | |
333 | * but it is now. Set both states. | |
334 | */ | |
335 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE; | |
336 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE; | |
337 | } | |
338 | } | |
f427ee49 | 339 | #endif /* XNU_TARGET_OS_OSX */ |
cb323159 A |
340 | |
341 | /* | |
342 | * reset the output flags based on a | |
343 | * combination of the old events and | |
344 | * the new desired event list. | |
345 | */ | |
346 | //kn->kn_fflags &= kn->kn_sfflags; | |
347 | ||
348 | res = (kn->kn_fflags != 0); | |
349 | ||
350 | memorystatus_klist_unlock(); | |
351 | ||
352 | return res; | |
353 | } | |
354 | ||
355 | static int | |
356 | filt_memorystatusprocess(struct knote *kn, struct kevent_qos_s *kev) | |
357 | { | |
358 | int res = 0; | |
359 | ||
360 | memorystatus_klist_lock(); | |
361 | if (kn->kn_fflags) { | |
362 | knote_fill_kevent(kn, kev, 0); | |
363 | res = 1; | |
364 | } | |
365 | memorystatus_klist_unlock(); | |
366 | ||
367 | return res; | |
368 | } | |
369 | ||
370 | static void | |
371 | memorystatus_klist_lock(void) | |
372 | { | |
373 | lck_mtx_lock(&memorystatus_klist_mutex); | |
374 | } | |
375 | ||
376 | static void | |
377 | memorystatus_klist_unlock(void) | |
378 | { | |
379 | lck_mtx_unlock(&memorystatus_klist_mutex); | |
380 | } | |
381 | ||
382 | void | |
383 | memorystatus_kevent_init(lck_grp_t *grp, lck_attr_t *attr) | |
384 | { | |
385 | lck_mtx_init(&memorystatus_klist_mutex, grp, attr); | |
386 | klist_init(&memorystatus_klist); | |
387 | } | |
388 | ||
389 | int | |
390 | memorystatus_knote_register(struct knote *kn) | |
391 | { | |
392 | int error = 0; | |
393 | ||
394 | memorystatus_klist_lock(); | |
395 | ||
396 | /* | |
397 | * Support only userspace visible flags. | |
398 | */ | |
399 | if ((kn->kn_sfflags & EVFILT_MEMORYSTATUS_ALL_MASK) == (unsigned int) kn->kn_sfflags) { | |
f427ee49 | 400 | #if XNU_TARGET_OS_OSX |
cb323159 A |
401 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) { |
402 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE; | |
403 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE; | |
404 | } | |
405 | ||
406 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) { | |
407 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE; | |
408 | kn->kn_sfflags |= NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE; | |
409 | } | |
f427ee49 | 410 | #endif /* XNU_TARGET_OS_OSX */ |
cb323159 A |
411 | |
412 | KNOTE_ATTACH(&memorystatus_klist, kn); | |
413 | } else { | |
414 | error = ENOTSUP; | |
415 | } | |
416 | ||
417 | memorystatus_klist_unlock(); | |
418 | ||
419 | return error; | |
420 | } | |
421 | ||
422 | void | |
423 | memorystatus_knote_unregister(struct knote *kn __unused) | |
424 | { | |
425 | memorystatus_klist_lock(); | |
426 | KNOTE_DETACH(&memorystatus_klist, kn); | |
427 | memorystatus_klist_unlock(); | |
428 | } | |
429 | ||
430 | #if VM_PRESSURE_EVENTS | |
431 | ||
432 | #if CONFIG_MEMORYSTATUS | |
433 | ||
f427ee49 A |
434 | static inline int |
435 | memorystatus_send_note_internal(int event_code, int subclass, void *data, uint32_t data_length) | |
cb323159 A |
436 | { |
437 | int ret; | |
438 | struct kev_msg ev_msg; | |
439 | ||
440 | ev_msg.vendor_code = KEV_VENDOR_APPLE; | |
441 | ev_msg.kev_class = KEV_SYSTEM_CLASS; | |
f427ee49 | 442 | ev_msg.kev_subclass = subclass; |
cb323159 A |
443 | |
444 | ev_msg.event_code = event_code; | |
445 | ||
446 | ev_msg.dv[0].data_length = data_length; | |
447 | ev_msg.dv[0].data_ptr = data; | |
448 | ev_msg.dv[1].data_length = 0; | |
449 | ||
450 | ret = kev_post_msg(&ev_msg); | |
451 | if (ret) { | |
452 | printf("%s: kev_post_msg() failed, err %d\n", __func__, ret); | |
453 | } | |
454 | ||
455 | return ret; | |
456 | } | |
457 | ||
f427ee49 A |
458 | int |
459 | memorystatus_send_note(int event_code, void *data, uint32_t data_length) | |
460 | { | |
461 | return memorystatus_send_note_internal(event_code, KEV_MEMORYSTATUS_SUBCLASS, data, data_length); | |
462 | } | |
463 | ||
464 | int | |
465 | memorystatus_send_dirty_status_change_note(void *data, uint32_t data_length) | |
466 | { | |
467 | return memorystatus_send_note_internal(kDirtyStatusChangeNote, KEV_DIRTYSTATUS_SUBCLASS, data, data_length); | |
468 | } | |
469 | ||
cb323159 | 470 | boolean_t |
f427ee49 | 471 | memorystatus_warn_process(const proc_t p, __unused boolean_t is_active, __unused boolean_t is_fatal, boolean_t limit_exceeded) |
cb323159 | 472 | { |
f427ee49 A |
473 | /* |
474 | * This function doesn't take a reference to p or lock it. So it better be the current process. | |
475 | */ | |
476 | assert(p == current_proc()); | |
477 | pid_t pid = p->p_pid; | |
cb323159 A |
478 | boolean_t ret = FALSE; |
479 | boolean_t found_knote = FALSE; | |
480 | struct knote *kn = NULL; | |
481 | int send_knote_count = 0; | |
f427ee49 A |
482 | uint32_t platform; |
483 | platform = proc_platform(p); | |
cb323159 A |
484 | |
485 | /* | |
486 | * See comment in sysctl_memorystatus_vm_pressure_send. | |
487 | */ | |
488 | ||
489 | memorystatus_klist_lock(); | |
490 | ||
491 | SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { | |
492 | proc_t knote_proc = knote_get_kq(kn)->kq_p; | |
493 | pid_t knote_pid = knote_proc->p_pid; | |
494 | ||
495 | if (knote_pid == pid) { | |
496 | /* | |
497 | * By setting the "fflags" here, we are forcing | |
498 | * a process to deal with the case where it's | |
499 | * bumping up into its memory limits. If we don't | |
500 | * do this here, we will end up depending on the | |
501 | * system pressure snapshot evaluation in | |
502 | * filt_memorystatus(). | |
503 | */ | |
504 | ||
f427ee49 A |
505 | /* |
506 | * The type of notification and the frequency are different between | |
507 | * embedded and desktop. | |
508 | * | |
509 | * Embedded processes register for global pressure notifications | |
510 | * (NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) via UIKit | |
511 | * (see applicationDidReceiveMemoryWarning in UIKit). We'll warn them here if | |
512 | * they are near there memory limit. filt_memorystatus() will warn them based | |
513 | * on the system pressure level. | |
514 | * | |
515 | * On desktop, (NOTE_MEMORYSTATUS_PRESSURE_WARN | NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) | |
516 | * are only expected to fire for system level warnings. Desktop procesess | |
517 | * register for NOTE_MEMORYSTATUS_PROC_LIMIT_WARN | |
518 | * if they want to be warned when they approach their limit | |
519 | * and for NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL to be warned when they | |
520 | * exceed their limit. | |
521 | * | |
522 | * On embedded we continuously warn processes that are approaching their | |
523 | * memory limit. However on desktop, we only send one warning while | |
524 | * the process is active/inactive if the limit is soft.. | |
525 | * | |
526 | */ | |
527 | if (platform == PLATFORM_MACOS || platform == PLATFORM_MACCATALYST || platform == PLATFORM_DRIVERKIT) { | |
528 | if (!limit_exceeded) { | |
529 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) { | |
530 | found_knote = TRUE; | |
531 | if (!is_fatal) { | |
532 | /* | |
533 | * Restrict proc_limit_warn notifications when | |
534 | * non-fatal (soft) limit is at play. | |
535 | */ | |
536 | if (is_active) { | |
537 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE) { | |
538 | /* | |
539 | * Mark this knote for delivery. | |
540 | */ | |
541 | kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_WARN; | |
542 | /* | |
543 | * And suppress it from future notifications. | |
544 | */ | |
545 | kn->kn_sfflags &= ~NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_ACTIVE; | |
546 | send_knote_count++; | |
547 | } | |
548 | } else { | |
549 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE) { | |
550 | /* | |
551 | * Mark this knote for delivery. | |
552 | */ | |
553 | kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_WARN; | |
554 | /* | |
555 | * And suppress it from future notifications. | |
556 | */ | |
557 | kn->kn_sfflags &= ~NOTE_MEMORYSTATUS_PROC_LIMIT_WARN_INACTIVE; | |
558 | send_knote_count++; | |
559 | } | |
cb323159 A |
560 | } |
561 | } else { | |
f427ee49 A |
562 | /* |
563 | * No restriction on proc_limit_warn notifications when | |
564 | * fatal (hard) limit is at play. | |
565 | */ | |
566 | kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_WARN; | |
567 | send_knote_count++; | |
cb323159 | 568 | } |
cb323159 | 569 | } |
f427ee49 A |
570 | } else { |
571 | /* | |
572 | * Send this notification when a process has exceeded a soft limit, | |
573 | */ | |
574 | ||
575 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) { | |
576 | found_knote = TRUE; | |
577 | if (!is_fatal) { | |
578 | /* | |
579 | * Restrict critical notifications for soft limits. | |
580 | */ | |
581 | ||
582 | if (is_active) { | |
583 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE) { | |
584 | /* | |
585 | * Suppress future proc_limit_critical notifications | |
586 | * for the active soft limit. | |
587 | */ | |
588 | kn->kn_sfflags &= ~NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_ACTIVE; | |
589 | kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL; | |
590 | send_knote_count++; | |
591 | } | |
592 | } else { | |
593 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE) { | |
594 | /* | |
595 | * Suppress future proc_limit_critical_notifications | |
596 | * for the inactive soft limit. | |
597 | */ | |
598 | kn->kn_sfflags &= ~NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL_INACTIVE; | |
599 | kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL; | |
600 | send_knote_count++; | |
601 | } | |
cb323159 A |
602 | } |
603 | } else { | |
f427ee49 A |
604 | /* |
605 | * We should never be trying to send a critical notification for | |
606 | * a hard limit... the process would be killed before it could be | |
607 | * received. | |
608 | */ | |
609 | panic("Caught sending pid %d a critical warning for a fatal limit.\n", pid); | |
cb323159 | 610 | } |
f427ee49 A |
611 | } |
612 | } | |
613 | } else { | |
614 | if (!limit_exceeded) { | |
615 | /* | |
616 | * Intentionally set either the unambiguous limit warning, | |
617 | * the system-wide critical or the system-wide warning | |
618 | * notification bit. | |
619 | */ | |
620 | ||
621 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) { | |
622 | kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_WARN; | |
623 | found_knote = TRUE; | |
624 | send_knote_count++; | |
625 | } else if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { | |
626 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; | |
627 | found_knote = TRUE; | |
628 | send_knote_count++; | |
629 | } else if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_WARN) { | |
630 | kn->kn_fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
631 | found_knote = TRUE; | |
632 | send_knote_count++; | |
633 | } | |
634 | } else { | |
635 | /* | |
636 | * Send this notification when a process has exceeded a soft limit. | |
637 | */ | |
638 | if (kn->kn_sfflags & NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) { | |
639 | kn->kn_fflags = NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL; | |
640 | found_knote = TRUE; | |
641 | send_knote_count++; | |
cb323159 A |
642 | } |
643 | } | |
644 | } | |
cb323159 A |
645 | } |
646 | } | |
647 | ||
648 | if (found_knote) { | |
649 | if (send_knote_count > 0) { | |
650 | KNOTE(&memorystatus_klist, 0); | |
651 | } | |
652 | ret = TRUE; | |
653 | } | |
654 | ||
655 | memorystatus_klist_unlock(); | |
656 | ||
657 | return ret; | |
658 | } | |
659 | ||
660 | /* | |
661 | * Can only be set by the current task on itself. | |
662 | */ | |
663 | int | |
664 | memorystatus_low_mem_privileged_listener(uint32_t op_flags) | |
665 | { | |
666 | boolean_t set_privilege = FALSE; | |
667 | /* | |
668 | * Need an entitlement check here? | |
669 | */ | |
670 | if (op_flags == MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_ENABLE) { | |
671 | set_privilege = TRUE; | |
672 | } else if (op_flags == MEMORYSTATUS_CMD_PRIVILEGED_LISTENER_DISABLE) { | |
673 | set_privilege = FALSE; | |
674 | } else { | |
675 | return EINVAL; | |
676 | } | |
677 | ||
678 | return task_low_mem_privileged_listener(current_task(), set_privilege, NULL); | |
679 | } | |
680 | ||
681 | int | |
682 | memorystatus_send_pressure_note(pid_t pid) | |
683 | { | |
684 | MEMORYSTATUS_DEBUG(1, "memorystatus_send_pressure_note(): pid %d\n", pid); | |
685 | return memorystatus_send_note(kMemorystatusPressureNote, &pid, sizeof(pid)); | |
686 | } | |
687 | ||
688 | boolean_t | |
689 | memorystatus_is_foreground_locked(proc_t p) | |
690 | { | |
691 | return (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND) || | |
692 | (p->p_memstat_effectivepriority == JETSAM_PRIORITY_FOREGROUND_SUPPORT); | |
693 | } | |
694 | ||
695 | /* | |
696 | * This is meant for stackshot and kperf -- it does not take the proc_list_lock | |
697 | * to access the p_memstat_dirty field. | |
698 | */ | |
699 | void | |
700 | memorystatus_proc_flags_unsafe(void * v, boolean_t *is_dirty, boolean_t *is_dirty_tracked, boolean_t *allow_idle_exit) | |
701 | { | |
702 | if (!v) { | |
703 | *is_dirty = FALSE; | |
704 | *is_dirty_tracked = FALSE; | |
705 | *allow_idle_exit = FALSE; | |
706 | } else { | |
707 | proc_t p = (proc_t)v; | |
708 | *is_dirty = (p->p_memstat_dirty & P_DIRTY_IS_DIRTY) != 0; | |
709 | *is_dirty_tracked = (p->p_memstat_dirty & P_DIRTY_TRACK) != 0; | |
710 | *allow_idle_exit = (p->p_memstat_dirty & P_DIRTY_ALLOW_IDLE_EXIT) != 0; | |
711 | } | |
712 | } | |
713 | ||
714 | boolean_t | |
715 | memorystatus_bg_pressure_eligible(proc_t p) | |
716 | { | |
717 | boolean_t eligible = FALSE; | |
718 | ||
719 | proc_list_lock(); | |
720 | ||
721 | MEMORYSTATUS_DEBUG(1, "memorystatus_bg_pressure_eligible: pid %d, state 0x%x\n", p->p_pid, p->p_memstat_state); | |
722 | ||
723 | /* Foreground processes have already been dealt with at this point, so just test for eligibility */ | |
724 | if (!(p->p_memstat_state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_SUSPENDED | P_MEMSTAT_FROZEN))) { | |
725 | eligible = TRUE; | |
726 | } | |
727 | ||
728 | if (p->p_memstat_effectivepriority < JETSAM_PRIORITY_BACKGROUND_OPPORTUNISTIC) { | |
729 | /* | |
730 | * IDLE and IDLE_DEFERRED bands contain processes | |
731 | * that have dropped memory to be under their inactive | |
732 | * memory limits. And so they can't really give back | |
733 | * anything. | |
734 | */ | |
735 | eligible = FALSE; | |
736 | } | |
737 | ||
738 | proc_list_unlock(); | |
739 | ||
740 | return eligible; | |
741 | } | |
742 | ||
743 | void | |
744 | memorystatus_send_low_swap_note(void) | |
745 | { | |
746 | struct knote *kn = NULL; | |
747 | ||
748 | memorystatus_klist_lock(); | |
749 | SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { | |
750 | /* We call is_knote_registered_modify_task_pressure_bits to check if the sfflags for the | |
751 | * current note contain NOTE_MEMORYSTATUS_LOW_SWAP. Once we find one note in the memorystatus_klist | |
752 | * that has the NOTE_MEMORYSTATUS_LOW_SWAP flags in its sfflags set, we call KNOTE with | |
753 | * kMemoryStatusLowSwap as the hint to process and update all knotes on the memorystatus_klist accordingly. */ | |
754 | if (is_knote_registered_modify_task_pressure_bits(kn, NOTE_MEMORYSTATUS_LOW_SWAP, NULL, 0, 0) == TRUE) { | |
755 | KNOTE(&memorystatus_klist, kMemorystatusLowSwap); | |
756 | break; | |
757 | } | |
758 | } | |
759 | ||
760 | memorystatus_klist_unlock(); | |
761 | } | |
762 | ||
763 | #endif /* CONFIG_MEMORYSTATUS */ | |
764 | ||
765 | /* | |
766 | * kn_max - knote | |
767 | * | |
768 | * knote_pressure_level - to check if the knote is registered for this notification level. | |
769 | * | |
770 | * task - task whose bits we'll be modifying | |
771 | * | |
772 | * pressure_level_to_clear - if the task has been notified of this past level, clear that notification bit so that if/when we revert to that level, the task will be notified again. | |
773 | * | |
774 | * pressure_level_to_set - the task is about to be notified of this new level. Update the task's bit notification information appropriately. | |
775 | * | |
776 | */ | |
777 | ||
778 | static boolean_t | |
779 | is_knote_registered_modify_task_pressure_bits(struct knote *kn_max, int knote_pressure_level, task_t task, vm_pressure_level_t pressure_level_to_clear, vm_pressure_level_t pressure_level_to_set) | |
780 | { | |
781 | if (kn_max->kn_sfflags & knote_pressure_level) { | |
782 | if (pressure_level_to_clear && task_has_been_notified(task, pressure_level_to_clear) == TRUE) { | |
783 | task_clear_has_been_notified(task, pressure_level_to_clear); | |
784 | } | |
785 | ||
786 | task_mark_has_been_notified(task, pressure_level_to_set); | |
787 | return TRUE; | |
788 | } | |
789 | ||
790 | return FALSE; | |
791 | } | |
792 | ||
793 | static void | |
794 | memorystatus_klist_reset_all_for_level(vm_pressure_level_t pressure_level_to_clear) | |
795 | { | |
796 | struct knote *kn = NULL; | |
797 | ||
798 | memorystatus_klist_lock(); | |
799 | SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { | |
800 | proc_t p = PROC_NULL; | |
801 | struct task* t = TASK_NULL; | |
802 | ||
803 | p = knote_get_kq(kn)->kq_p; | |
804 | proc_list_lock(); | |
805 | if (p != proc_ref_locked(p)) { | |
806 | p = PROC_NULL; | |
807 | proc_list_unlock(); | |
808 | continue; | |
809 | } | |
810 | proc_list_unlock(); | |
811 | ||
812 | t = (struct task *)(p->task); | |
813 | ||
814 | task_clear_has_been_notified(t, pressure_level_to_clear); | |
815 | ||
816 | proc_rele(p); | |
817 | } | |
818 | ||
819 | memorystatus_klist_unlock(); | |
820 | } | |
821 | ||
822 | /* | |
823 | * Used by the vm_pressure_thread which is | |
824 | * signalled from within vm_pageout_scan(). | |
825 | */ | |
826 | ||
827 | void | |
828 | consider_vm_pressure_events(void) | |
829 | { | |
830 | vm_dispatch_memory_pressure(); | |
831 | } | |
832 | ||
833 | static void | |
834 | vm_dispatch_memory_pressure(void) | |
835 | { | |
836 | memorystatus_update_vm_pressure(FALSE); | |
837 | } | |
838 | ||
839 | static struct knote * | |
840 | vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level, boolean_t target_foreground_process) | |
841 | { | |
842 | struct knote *kn = NULL, *kn_max = NULL; | |
843 | uint64_t resident_max = 0;/* MB */ | |
cb323159 A |
844 | int selected_task_importance = 0; |
845 | static int pressure_snapshot = -1; | |
846 | boolean_t pressure_increase = FALSE; | |
847 | ||
848 | if (pressure_snapshot == -1) { | |
849 | /* | |
850 | * Initial snapshot. | |
851 | */ | |
852 | pressure_snapshot = level; | |
853 | pressure_increase = TRUE; | |
854 | } else { | |
855 | if (level && (level >= pressure_snapshot)) { | |
856 | pressure_increase = TRUE; | |
857 | } else { | |
858 | pressure_increase = FALSE; | |
859 | } | |
860 | ||
861 | pressure_snapshot = level; | |
862 | } | |
863 | ||
864 | if (pressure_increase == TRUE) { | |
865 | /* | |
866 | * We'll start by considering the largest | |
867 | * unimportant task in our list. | |
868 | */ | |
869 | selected_task_importance = INT_MAX; | |
870 | } else { | |
871 | /* | |
872 | * We'll start by considering the largest | |
873 | * important task in our list. | |
874 | */ | |
875 | selected_task_importance = 0; | |
876 | } | |
877 | ||
cb323159 A |
878 | SLIST_FOREACH(kn, candidate_list, kn_selnext) { |
879 | uint64_t resident_size = 0;/* MB */ | |
880 | proc_t p = PROC_NULL; | |
881 | struct task* t = TASK_NULL; | |
882 | int curr_task_importance = 0; | |
883 | boolean_t consider_knote = FALSE; | |
884 | boolean_t privileged_listener = FALSE; | |
885 | ||
886 | p = knote_get_kq(kn)->kq_p; | |
887 | proc_list_lock(); | |
888 | if (p != proc_ref_locked(p)) { | |
889 | p = PROC_NULL; | |
890 | proc_list_unlock(); | |
891 | continue; | |
892 | } | |
893 | proc_list_unlock(); | |
894 | ||
895 | #if CONFIG_MEMORYSTATUS | |
896 | if (target_foreground_process == TRUE && !memorystatus_is_foreground_locked(p)) { | |
897 | /* | |
898 | * Skip process not marked foreground. | |
899 | */ | |
900 | proc_rele(p); | |
901 | continue; | |
902 | } | |
903 | #endif /* CONFIG_MEMORYSTATUS */ | |
904 | ||
905 | t = (struct task *)(p->task); | |
906 | ||
cb323159 A |
907 | vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(level); |
908 | ||
909 | if ((kn->kn_sfflags & dispatch_level) == 0) { | |
910 | proc_rele(p); | |
911 | continue; | |
912 | } | |
913 | ||
914 | #if CONFIG_MEMORYSTATUS | |
915 | if (target_foreground_process == FALSE && !memorystatus_bg_pressure_eligible(p)) { | |
916 | VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p->p_pid); | |
917 | proc_rele(p); | |
918 | continue; | |
919 | } | |
920 | #endif /* CONFIG_MEMORYSTATUS */ | |
921 | ||
f427ee49 | 922 | #if XNU_TARGET_OS_OSX |
cb323159 | 923 | curr_task_importance = task_importance_estimate(t); |
f427ee49 A |
924 | #else /* XNU_TARGET_OS_OSX */ |
925 | curr_task_importance = p->p_memstat_effectivepriority; | |
926 | #endif /* XNU_TARGET_OS_OSX */ | |
cb323159 A |
927 | |
928 | /* | |
929 | * Privileged listeners are only considered in the multi-level pressure scheme | |
930 | * AND only if the pressure is increasing. | |
931 | */ | |
932 | if (level > 0) { | |
933 | if (task_has_been_notified(t, level) == FALSE) { | |
934 | /* | |
935 | * Is this a privileged listener? | |
936 | */ | |
937 | if (task_low_mem_privileged_listener(t, FALSE, &privileged_listener) == 0) { | |
938 | if (privileged_listener) { | |
939 | kn_max = kn; | |
940 | proc_rele(p); | |
941 | goto done_scanning; | |
942 | } | |
943 | } | |
944 | } else { | |
945 | proc_rele(p); | |
946 | continue; | |
947 | } | |
948 | } else if (level == 0) { | |
949 | /* | |
950 | * Task wasn't notified when the pressure was increasing and so | |
951 | * no need to notify it that the pressure is decreasing. | |
952 | */ | |
953 | if ((task_has_been_notified(t, kVMPressureWarning) == FALSE) && (task_has_been_notified(t, kVMPressureCritical) == FALSE)) { | |
954 | proc_rele(p); | |
955 | continue; | |
956 | } | |
957 | } | |
958 | ||
959 | /* | |
960 | * We don't want a small process to block large processes from | |
961 | * being notified again. <rdar://problem/7955532> | |
962 | */ | |
963 | resident_size = (get_task_phys_footprint(t)) / (1024 * 1024ULL); /* MB */ | |
964 | ||
965 | if (resident_size >= vm_pressure_task_footprint_min) { | |
966 | if (level > 0) { | |
967 | /* | |
968 | * Warning or Critical Pressure. | |
969 | */ | |
970 | if (pressure_increase) { | |
971 | if ((curr_task_importance < selected_task_importance) || | |
972 | ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) { | |
973 | /* | |
974 | * We have found a candidate process which is: | |
975 | * a) at a lower importance than the current selected process | |
976 | * OR | |
977 | * b) has importance equal to that of the current selected process but is larger | |
978 | */ | |
979 | ||
980 | consider_knote = TRUE; | |
981 | } | |
982 | } else { | |
983 | if ((curr_task_importance > selected_task_importance) || | |
984 | ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) { | |
985 | /* | |
986 | * We have found a candidate process which is: | |
987 | * a) at a higher importance than the current selected process | |
988 | * OR | |
989 | * b) has importance equal to that of the current selected process but is larger | |
990 | */ | |
991 | ||
992 | consider_knote = TRUE; | |
993 | } | |
994 | } | |
995 | } else if (level == 0) { | |
996 | /* | |
997 | * Pressure back to normal. | |
998 | */ | |
999 | if ((curr_task_importance > selected_task_importance) || | |
1000 | ((curr_task_importance == selected_task_importance) && (resident_size > resident_max))) { | |
1001 | consider_knote = TRUE; | |
1002 | } | |
1003 | } | |
1004 | ||
1005 | if (consider_knote) { | |
1006 | resident_max = resident_size; | |
1007 | kn_max = kn; | |
1008 | selected_task_importance = curr_task_importance; | |
1009 | consider_knote = FALSE; /* reset for the next candidate */ | |
1010 | } | |
1011 | } else { | |
1012 | /* There was no candidate with enough resident memory to scavenge */ | |
1013 | VM_PRESSURE_DEBUG(0, "[vm_pressure] threshold failed for pid %d with %llu resident...\n", p->p_pid, resident_size); | |
1014 | } | |
1015 | proc_rele(p); | |
1016 | } | |
1017 | ||
1018 | done_scanning: | |
1019 | if (kn_max) { | |
1020 | VM_DEBUG_CONSTANT_EVENT(vm_pressure_event, VM_PRESSURE_EVENT, DBG_FUNC_NONE, knote_get_kq(kn_max)->kq_p->p_pid, resident_max, 0, 0); | |
1021 | VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %llu resident\n", knote_get_kq(kn_max)->kq_p->p_pid, resident_max); | |
1022 | } | |
1023 | ||
1024 | return kn_max; | |
1025 | } | |
1026 | ||
1027 | static uint64_t next_warning_notification_sent_at_ts = 0; | |
1028 | static uint64_t next_critical_notification_sent_at_ts = 0; | |
1029 | ||
1030 | boolean_t memorystatus_manual_testing_on = FALSE; | |
1031 | vm_pressure_level_t memorystatus_manual_testing_level = kVMPressureNormal; | |
1032 | ||
1033 | kern_return_t | |
1034 | memorystatus_update_vm_pressure(boolean_t target_foreground_process) | |
1035 | { | |
1036 | struct knote *kn_max = NULL; | |
1037 | struct knote *kn_cur = NULL, *kn_temp = NULL;/* for safe list traversal */ | |
1038 | pid_t target_pid = -1; | |
1039 | struct klist dispatch_klist = { NULL }; | |
1040 | proc_t target_proc = PROC_NULL; | |
1041 | struct task *task = NULL; | |
1042 | boolean_t found_candidate = FALSE; | |
1043 | ||
1044 | static vm_pressure_level_t level_snapshot = kVMPressureNormal; | |
1045 | static vm_pressure_level_t prev_level_snapshot = kVMPressureNormal; | |
1046 | boolean_t smoothing_window_started = FALSE; | |
1047 | struct timeval smoothing_window_start_tstamp = {0, 0}; | |
1048 | struct timeval curr_tstamp = {0, 0}; | |
f427ee49 | 1049 | int64_t elapsed_msecs = 0; |
cb323159 A |
1050 | uint64_t curr_ts = mach_absolute_time(); |
1051 | ||
1052 | #if !CONFIG_JETSAM | |
1053 | #define MAX_IDLE_KILLS 100 /* limit the number of idle kills allowed */ | |
1054 | ||
1055 | int idle_kill_counter = 0; | |
1056 | ||
1057 | /* | |
1058 | * On desktop we take this opportunity to free up memory pressure | |
1059 | * by immediately killing idle exitable processes. We use a delay | |
1060 | * to avoid overkill. And we impose a max counter as a fail safe | |
1061 | * in case daemons re-launch too fast. | |
1062 | */ | |
1063 | while ((memorystatus_vm_pressure_level != kVMPressureNormal) && (idle_kill_counter < MAX_IDLE_KILLS)) { | |
1064 | if (memorystatus_idle_exit_from_VM() == FALSE) { | |
1065 | /* No idle exitable processes left to kill */ | |
1066 | break; | |
1067 | } | |
1068 | idle_kill_counter++; | |
1069 | ||
1070 | if (memorystatus_manual_testing_on == TRUE) { | |
1071 | /* | |
1072 | * Skip the delay when testing | |
1073 | * the pressure notification scheme. | |
1074 | */ | |
1075 | } else { | |
1076 | delay(1000000); /* 1 second */ | |
1077 | } | |
1078 | } | |
1079 | #endif /* !CONFIG_JETSAM */ | |
1080 | ||
1081 | if (level_snapshot != kVMPressureNormal) { | |
1082 | /* | |
1083 | * Check to see if we are still in the 'resting' period | |
1084 | * after having notified all clients interested in | |
1085 | * a particular pressure level. | |
1086 | */ | |
1087 | ||
1088 | level_snapshot = memorystatus_vm_pressure_level; | |
1089 | ||
1090 | if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) { | |
1091 | if (next_warning_notification_sent_at_ts) { | |
1092 | if (curr_ts < next_warning_notification_sent_at_ts) { | |
1093 | delay(INTER_NOTIFICATION_DELAY * 4 /* 1 sec */); | |
1094 | return KERN_SUCCESS; | |
1095 | } | |
1096 | ||
1097 | next_warning_notification_sent_at_ts = 0; | |
1098 | memorystatus_klist_reset_all_for_level(kVMPressureWarning); | |
1099 | } | |
1100 | } else if (level_snapshot == kVMPressureCritical) { | |
1101 | if (next_critical_notification_sent_at_ts) { | |
1102 | if (curr_ts < next_critical_notification_sent_at_ts) { | |
1103 | delay(INTER_NOTIFICATION_DELAY * 4 /* 1 sec */); | |
1104 | return KERN_SUCCESS; | |
1105 | } | |
1106 | next_critical_notification_sent_at_ts = 0; | |
1107 | memorystatus_klist_reset_all_for_level(kVMPressureCritical); | |
1108 | } | |
1109 | } | |
1110 | } | |
1111 | ||
1112 | while (1) { | |
1113 | /* | |
1114 | * There is a race window here. But it's not clear | |
1115 | * how much we benefit from having extra synchronization. | |
1116 | */ | |
1117 | level_snapshot = memorystatus_vm_pressure_level; | |
1118 | ||
1119 | if (prev_level_snapshot > level_snapshot) { | |
1120 | /* | |
1121 | * Pressure decreased? Let's take a little breather | |
1122 | * and see if this condition stays. | |
1123 | */ | |
1124 | if (smoothing_window_started == FALSE) { | |
1125 | smoothing_window_started = TRUE; | |
1126 | microuptime(&smoothing_window_start_tstamp); | |
1127 | } | |
1128 | ||
1129 | microuptime(&curr_tstamp); | |
1130 | timevalsub(&curr_tstamp, &smoothing_window_start_tstamp); | |
1131 | elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000; | |
1132 | ||
1133 | if (elapsed_msecs < VM_PRESSURE_DECREASED_SMOOTHING_PERIOD) { | |
1134 | delay(INTER_NOTIFICATION_DELAY); | |
1135 | continue; | |
1136 | } | |
1137 | } | |
1138 | ||
1139 | prev_level_snapshot = level_snapshot; | |
1140 | smoothing_window_started = FALSE; | |
1141 | ||
1142 | memorystatus_klist_lock(); | |
1143 | kn_max = vm_pressure_select_optimal_candidate_to_notify(&memorystatus_klist, level_snapshot, target_foreground_process); | |
1144 | ||
1145 | if (kn_max == NULL) { | |
1146 | memorystatus_klist_unlock(); | |
1147 | ||
1148 | /* | |
1149 | * No more level-based clients to notify. | |
1150 | * | |
1151 | * Start the 'resting' window within which clients will not be re-notified. | |
1152 | */ | |
1153 | ||
1154 | if (level_snapshot != kVMPressureNormal) { | |
1155 | if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) { | |
1156 | nanoseconds_to_absolutetime(WARNING_NOTIFICATION_RESTING_PERIOD * NSEC_PER_SEC, &curr_ts); | |
1157 | ||
1158 | /* Next warning notification (if nothing changes) won't be sent before...*/ | |
1159 | next_warning_notification_sent_at_ts = mach_absolute_time() + curr_ts; | |
1160 | } | |
1161 | ||
1162 | if (level_snapshot == kVMPressureCritical) { | |
1163 | nanoseconds_to_absolutetime(CRITICAL_NOTIFICATION_RESTING_PERIOD * NSEC_PER_SEC, &curr_ts); | |
1164 | ||
1165 | /* Next critical notification (if nothing changes) won't be sent before...*/ | |
1166 | next_critical_notification_sent_at_ts = mach_absolute_time() + curr_ts; | |
1167 | } | |
1168 | } | |
1169 | return KERN_FAILURE; | |
1170 | } | |
1171 | ||
1172 | target_proc = knote_get_kq(kn_max)->kq_p; | |
1173 | ||
1174 | proc_list_lock(); | |
1175 | if (target_proc != proc_ref_locked(target_proc)) { | |
1176 | target_proc = PROC_NULL; | |
1177 | proc_list_unlock(); | |
1178 | memorystatus_klist_unlock(); | |
1179 | continue; | |
1180 | } | |
1181 | proc_list_unlock(); | |
1182 | ||
1183 | target_pid = target_proc->p_pid; | |
1184 | ||
1185 | task = (struct task *)(target_proc->task); | |
1186 | ||
1187 | if (level_snapshot != kVMPressureNormal) { | |
1188 | if (level_snapshot == kVMPressureWarning || level_snapshot == kVMPressureUrgent) { | |
1189 | if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_WARN, task, 0, kVMPressureWarning) == TRUE) { | |
1190 | found_candidate = TRUE; | |
1191 | } | |
1192 | } else { | |
1193 | if (level_snapshot == kVMPressureCritical) { | |
1194 | if (is_knote_registered_modify_task_pressure_bits(kn_max, NOTE_MEMORYSTATUS_PRESSURE_CRITICAL, task, 0, kVMPressureCritical) == TRUE) { | |
1195 | found_candidate = TRUE; | |
1196 | } | |
1197 | } | |
1198 | } | |
1199 | } else { | |
1200 | if (kn_max->kn_sfflags & NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
1201 | task_clear_has_been_notified(task, kVMPressureWarning); | |
1202 | task_clear_has_been_notified(task, kVMPressureCritical); | |
1203 | ||
1204 | found_candidate = TRUE; | |
1205 | } | |
1206 | } | |
1207 | ||
1208 | if (found_candidate == FALSE) { | |
1209 | proc_rele(target_proc); | |
1210 | memorystatus_klist_unlock(); | |
1211 | continue; | |
1212 | } | |
1213 | ||
1214 | SLIST_FOREACH_SAFE(kn_cur, &memorystatus_klist, kn_selnext, kn_temp) { | |
1215 | int knote_pressure_level = convert_internal_pressure_level_to_dispatch_level(level_snapshot); | |
1216 | ||
1217 | if (is_knote_registered_modify_task_pressure_bits(kn_cur, knote_pressure_level, task, 0, level_snapshot) == TRUE) { | |
1218 | proc_t knote_proc = knote_get_kq(kn_cur)->kq_p; | |
1219 | pid_t knote_pid = knote_proc->p_pid; | |
1220 | if (knote_pid == target_pid) { | |
1221 | KNOTE_DETACH(&memorystatus_klist, kn_cur); | |
1222 | KNOTE_ATTACH(&dispatch_klist, kn_cur); | |
1223 | } | |
1224 | } | |
1225 | } | |
1226 | ||
1227 | KNOTE(&dispatch_klist, (level_snapshot != kVMPressureNormal) ? kMemorystatusPressure : kMemorystatusNoPressure); | |
1228 | ||
1229 | SLIST_FOREACH_SAFE(kn_cur, &dispatch_klist, kn_selnext, kn_temp) { | |
1230 | KNOTE_DETACH(&dispatch_klist, kn_cur); | |
1231 | KNOTE_ATTACH(&memorystatus_klist, kn_cur); | |
1232 | } | |
1233 | ||
1234 | memorystatus_klist_unlock(); | |
1235 | ||
1236 | microuptime(&target_proc->vm_pressure_last_notify_tstamp); | |
1237 | proc_rele(target_proc); | |
1238 | ||
1239 | if (memorystatus_manual_testing_on == TRUE && target_foreground_process == TRUE) { | |
1240 | break; | |
1241 | } | |
1242 | ||
1243 | if (memorystatus_manual_testing_on == TRUE) { | |
1244 | /* | |
1245 | * Testing out the pressure notification scheme. | |
1246 | * No need for delays etc. | |
1247 | */ | |
1248 | } else { | |
1249 | uint32_t sleep_interval = INTER_NOTIFICATION_DELAY; | |
1250 | #if CONFIG_JETSAM | |
1251 | unsigned int page_delta = 0; | |
1252 | unsigned int skip_delay_page_threshold = 0; | |
1253 | ||
1254 | assert(memorystatus_available_pages_pressure >= memorystatus_available_pages_critical_base); | |
1255 | ||
1256 | page_delta = (memorystatus_available_pages_pressure - memorystatus_available_pages_critical_base) / 2; | |
1257 | skip_delay_page_threshold = memorystatus_available_pages_pressure - page_delta; | |
1258 | ||
1259 | if (memorystatus_available_pages <= skip_delay_page_threshold) { | |
1260 | /* | |
1261 | * We are nearing the critcal mark fast and can't afford to wait between | |
1262 | * notifications. | |
1263 | */ | |
1264 | sleep_interval = 0; | |
1265 | } | |
1266 | #endif /* CONFIG_JETSAM */ | |
1267 | ||
1268 | if (sleep_interval) { | |
1269 | delay(sleep_interval); | |
1270 | } | |
1271 | } | |
1272 | } | |
1273 | ||
1274 | return KERN_SUCCESS; | |
1275 | } | |
1276 | ||
1277 | static uint32_t | |
1278 | convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t internal_pressure_level) | |
1279 | { | |
1280 | uint32_t dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; | |
1281 | ||
1282 | switch (internal_pressure_level) { | |
1283 | case kVMPressureNormal: | |
1284 | { | |
1285 | dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_NORMAL; | |
1286 | break; | |
1287 | } | |
1288 | ||
1289 | case kVMPressureWarning: | |
1290 | case kVMPressureUrgent: | |
1291 | { | |
1292 | dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
1293 | break; | |
1294 | } | |
1295 | ||
1296 | case kVMPressureCritical: | |
1297 | { | |
1298 | dispatch_level = NOTE_MEMORYSTATUS_PRESSURE_CRITICAL; | |
1299 | break; | |
1300 | } | |
1301 | ||
1302 | default: | |
1303 | break; | |
1304 | } | |
1305 | ||
1306 | return dispatch_level; | |
1307 | } | |
1308 | ||
1309 | /* | |
1310 | * Notify any kexts that are waiting for notification that jetsam | |
1311 | * is approaching the foreground bands. They should use this notification | |
1312 | * to free cached memory. | |
1313 | */ | |
1314 | void | |
1315 | memorystatus_issue_fg_band_notify(void) | |
1316 | { | |
1317 | uint64_t now; | |
1318 | ||
1319 | lck_mtx_lock(&memorystatus_jetsam_fg_band_lock); | |
1320 | absolutetime_to_nanoseconds(mach_absolute_time(), &now); | |
1321 | if (now - memorystatus_jetsam_fg_band_timestamp_ns < memorystatus_jetsam_fg_band_delay_ns) { | |
1322 | lck_mtx_unlock(&memorystatus_jetsam_fg_band_lock); | |
1323 | return; | |
1324 | } | |
1325 | ||
1326 | if (memorystatus_jetsam_fg_band_waiters > 0) { | |
1327 | thread_wakeup(&memorystatus_jetsam_fg_band_waiters); | |
1328 | memorystatus_jetsam_fg_band_waiters = 0; | |
1329 | memorystatus_jetsam_fg_band_timestamp_ns = now; | |
1330 | } | |
1331 | lck_mtx_unlock(&memorystatus_jetsam_fg_band_lock); | |
1332 | ||
1333 | /* Notify the buffer cache, file systems, etc. to jetison everything they can. */ | |
1334 | if (consider_buffer_cache_collect != NULL) { | |
1335 | (void)(*consider_buffer_cache_collect)(1); | |
1336 | } | |
1337 | } | |
1338 | ||
1339 | ||
1340 | /* | |
1341 | * Memorystatus notification debugging support | |
1342 | */ | |
1343 | ||
1344 | static int | |
1345 | sysctl_memorystatus_vm_pressure_level SYSCTL_HANDLER_ARGS | |
1346 | { | |
1347 | #pragma unused(arg1, arg2, oidp) | |
f427ee49 | 1348 | #if !XNU_TARGET_OS_OSX |
cb323159 A |
1349 | int error = 0; |
1350 | ||
1351 | error = priv_check_cred(kauth_cred_get(), PRIV_VM_PRESSURE, 0); | |
1352 | if (error) { | |
1353 | return error; | |
1354 | } | |
1355 | ||
f427ee49 | 1356 | #endif /* !XNU_TARGET_OS_OSX */ |
cb323159 A |
1357 | uint32_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(memorystatus_vm_pressure_level); |
1358 | ||
1359 | return SYSCTL_OUT(req, &dispatch_level, sizeof(dispatch_level)); | |
1360 | } | |
1361 | ||
1362 | #if DEBUG || DEVELOPMENT | |
1363 | ||
1364 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, | |
1365 | 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); | |
1366 | ||
1367 | #else /* DEBUG || DEVELOPMENT */ | |
1368 | ||
1369 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_level, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED | CTLFLAG_MASKED, | |
1370 | 0, 0, &sysctl_memorystatus_vm_pressure_level, "I", ""); | |
1371 | ||
1372 | #endif /* DEBUG || DEVELOPMENT */ | |
1373 | ||
1374 | /* | |
1375 | * Trigger levels to test the mechanism. | |
1376 | * Can be used via a sysctl. | |
1377 | */ | |
1378 | #define TEST_LOW_MEMORY_TRIGGER_ONE 1 | |
1379 | #define TEST_LOW_MEMORY_TRIGGER_ALL 2 | |
1380 | #define TEST_PURGEABLE_TRIGGER_ONE 3 | |
1381 | #define TEST_PURGEABLE_TRIGGER_ALL 4 | |
1382 | #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE 5 | |
1383 | #define TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL 6 | |
1384 | ||
1385 | static int | |
1386 | sysctl_memorypressure_manual_trigger SYSCTL_HANDLER_ARGS | |
1387 | { | |
1388 | #pragma unused(arg1, arg2) | |
1389 | ||
1390 | int level = 0; | |
1391 | int error = 0; | |
1392 | int pressure_level = 0; | |
1393 | int trigger_request = 0; | |
1394 | int force_purge; | |
1395 | ||
1396 | error = sysctl_handle_int(oidp, &level, 0, req); | |
1397 | if (error || !req->newptr) { | |
1398 | return error; | |
1399 | } | |
1400 | ||
1401 | memorystatus_manual_testing_on = TRUE; | |
1402 | ||
1403 | trigger_request = (level >> 16) & 0xFFFF; | |
1404 | pressure_level = (level & 0xFFFF); | |
1405 | ||
1406 | if (trigger_request < TEST_LOW_MEMORY_TRIGGER_ONE || | |
1407 | trigger_request > TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL) { | |
1408 | return EINVAL; | |
1409 | } | |
1410 | switch (pressure_level) { | |
1411 | case NOTE_MEMORYSTATUS_PRESSURE_NORMAL: | |
1412 | case NOTE_MEMORYSTATUS_PRESSURE_WARN: | |
1413 | case NOTE_MEMORYSTATUS_PRESSURE_CRITICAL: | |
1414 | break; | |
1415 | default: | |
1416 | return EINVAL; | |
1417 | } | |
1418 | ||
1419 | /* | |
1420 | * The pressure level is being set from user-space. | |
1421 | * And user-space uses the constants in sys/event.h | |
1422 | * So we translate those events to our internal levels here. | |
1423 | */ | |
1424 | if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
1425 | memorystatus_manual_testing_level = kVMPressureNormal; | |
1426 | force_purge = 0; | |
1427 | } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_WARN) { | |
1428 | memorystatus_manual_testing_level = kVMPressureWarning; | |
1429 | force_purge = vm_pageout_state.memorystatus_purge_on_warning; | |
1430 | } else if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) { | |
1431 | memorystatus_manual_testing_level = kVMPressureCritical; | |
1432 | force_purge = vm_pageout_state.memorystatus_purge_on_critical; | |
1433 | } | |
1434 | ||
1435 | memorystatus_vm_pressure_level = memorystatus_manual_testing_level; | |
1436 | ||
1437 | /* purge according to the new pressure level */ | |
1438 | switch (trigger_request) { | |
1439 | case TEST_PURGEABLE_TRIGGER_ONE: | |
1440 | case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE: | |
1441 | if (force_purge == 0) { | |
1442 | /* no purging requested */ | |
1443 | break; | |
1444 | } | |
1445 | vm_purgeable_object_purge_one_unlocked(force_purge); | |
1446 | break; | |
1447 | case TEST_PURGEABLE_TRIGGER_ALL: | |
1448 | case TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL: | |
1449 | if (force_purge == 0) { | |
1450 | /* no purging requested */ | |
1451 | break; | |
1452 | } | |
1453 | while (vm_purgeable_object_purge_one_unlocked(force_purge)) { | |
1454 | ; | |
1455 | } | |
1456 | break; | |
1457 | } | |
1458 | ||
1459 | if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ONE) || | |
1460 | (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ONE)) { | |
1461 | memorystatus_update_vm_pressure(TRUE); | |
1462 | } | |
1463 | ||
1464 | if ((trigger_request == TEST_LOW_MEMORY_TRIGGER_ALL) || | |
1465 | (trigger_request == TEST_LOW_MEMORY_PURGEABLE_TRIGGER_ALL)) { | |
1466 | while (memorystatus_update_vm_pressure(FALSE) == KERN_SUCCESS) { | |
1467 | continue; | |
1468 | } | |
1469 | } | |
1470 | ||
1471 | if (pressure_level == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) { | |
1472 | memorystatus_manual_testing_on = FALSE; | |
1473 | } | |
1474 | ||
1475 | return 0; | |
1476 | } | |
1477 | ||
1478 | SYSCTL_PROC(_kern, OID_AUTO, memorypressure_manual_trigger, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, | |
1479 | 0, 0, &sysctl_memorypressure_manual_trigger, "I", ""); | |
1480 | ||
1481 | ||
1482 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_warning, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_state.memorystatus_purge_on_warning, 0, ""); | |
1483 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_urgent, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_state.memorystatus_purge_on_urgent, 0, ""); | |
1484 | SYSCTL_INT(_kern, OID_AUTO, memorystatus_purge_on_critical, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pageout_state.memorystatus_purge_on_critical, 0, ""); | |
1485 | ||
1486 | #if DEBUG || DEVELOPMENT | |
1487 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_vm_pressure_events_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_pressure_events_enabled, 0, ""); | |
1488 | ||
1489 | #if 0 | |
1490 | #if CONFIG_JETSAM && VM_PRESSURE_EVENTS | |
1491 | static boolean_t | |
1492 | memorystatus_issue_pressure_kevent(boolean_t pressured) | |
1493 | { | |
1494 | memorystatus_klist_lock(); | |
1495 | KNOTE(&memorystatus_klist, pressured ? kMemorystatusPressure : kMemorystatusNoPressure); | |
1496 | memorystatus_klist_unlock(); | |
1497 | return TRUE; | |
1498 | } | |
1499 | #endif /* CONFIG_JETSAM && VM_PRESSURE_EVENTS */ | |
1500 | #endif /* 0 */ | |
1501 | ||
1502 | /* | |
1503 | * This routine is used for targeted notifications regardless of system memory pressure | |
1504 | * and regardless of whether or not the process has already been notified. | |
1505 | * It bypasses and has no effect on the only-one-notification per soft-limit policy. | |
1506 | * | |
1507 | * "memnote" is the current user. | |
1508 | */ | |
1509 | ||
1510 | static int | |
1511 | sysctl_memorystatus_vm_pressure_send SYSCTL_HANDLER_ARGS | |
1512 | { | |
1513 | #pragma unused(arg1, arg2) | |
1514 | /* Need to be root or have memorystatus entitlement */ | |
1515 | if (!kauth_cred_issuser(kauth_cred_get()) && !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) { | |
1516 | return EPERM; | |
1517 | } | |
1518 | ||
1519 | int error = 0, pid = 0; | |
1520 | struct knote *kn = NULL; | |
1521 | boolean_t found_knote = FALSE; | |
1522 | int fflags = 0; /* filter flags for EVFILT_MEMORYSTATUS */ | |
1523 | uint64_t value = 0; | |
1524 | ||
1525 | error = sysctl_handle_quad(oidp, &value, 0, req); | |
1526 | if (error || !req->newptr) { | |
1527 | return error; | |
1528 | } | |
1529 | ||
1530 | /* | |
1531 | * Find the pid in the low 32 bits of value passed in. | |
1532 | */ | |
1533 | pid = (int)(value & 0xFFFFFFFF); | |
1534 | ||
1535 | /* | |
1536 | * Find notification in the high 32 bits of the value passed in. | |
1537 | */ | |
1538 | fflags = (int)((value >> 32) & 0xFFFFFFFF); | |
1539 | ||
1540 | /* | |
1541 | * For backwards compatibility, when no notification is | |
1542 | * passed in, default to the NOTE_MEMORYSTATUS_PRESSURE_WARN | |
1543 | */ | |
1544 | if (fflags == 0) { | |
1545 | fflags = NOTE_MEMORYSTATUS_PRESSURE_WARN; | |
1546 | // printf("memorystatus_vm_pressure_send: using default notification [0x%x]\n", fflags); | |
1547 | } | |
1548 | ||
1549 | /* wake up everybody waiting for kVMPressureJetsam */ | |
1550 | if (fflags == NOTE_MEMORYSTATUS_JETSAM_FG_BAND) { | |
1551 | memorystatus_issue_fg_band_notify(); | |
1552 | return error; | |
1553 | } | |
1554 | ||
1555 | /* | |
1556 | * See event.h ... fflags for EVFILT_MEMORYSTATUS | |
1557 | */ | |
1558 | if (!((fflags == NOTE_MEMORYSTATUS_PRESSURE_NORMAL) || | |
1559 | (fflags == NOTE_MEMORYSTATUS_PRESSURE_WARN) || | |
1560 | (fflags == NOTE_MEMORYSTATUS_PRESSURE_CRITICAL) || | |
1561 | (fflags == NOTE_MEMORYSTATUS_LOW_SWAP) || | |
1562 | (fflags == NOTE_MEMORYSTATUS_PROC_LIMIT_WARN) || | |
1563 | (fflags == NOTE_MEMORYSTATUS_PROC_LIMIT_CRITICAL) || | |
1564 | (((fflags & NOTE_MEMORYSTATUS_MSL_STATUS) != 0 && | |
1565 | ((fflags & ~NOTE_MEMORYSTATUS_MSL_STATUS) == 0))))) { | |
1566 | printf("memorystatus_vm_pressure_send: notification [0x%x] not supported \n", fflags); | |
1567 | error = 1; | |
1568 | return error; | |
1569 | } | |
1570 | ||
1571 | /* | |
1572 | * Forcibly send pid a memorystatus notification. | |
1573 | */ | |
1574 | ||
1575 | memorystatus_klist_lock(); | |
1576 | ||
1577 | SLIST_FOREACH(kn, &memorystatus_klist, kn_selnext) { | |
1578 | proc_t knote_proc = knote_get_kq(kn)->kq_p; | |
1579 | pid_t knote_pid = knote_proc->p_pid; | |
1580 | ||
1581 | if (knote_pid == pid) { | |
1582 | /* | |
1583 | * Forcibly send this pid a memorystatus notification. | |
1584 | */ | |
1585 | kn->kn_fflags = fflags; | |
1586 | found_knote = TRUE; | |
1587 | } | |
1588 | } | |
1589 | ||
1590 | if (found_knote) { | |
1591 | KNOTE(&memorystatus_klist, 0); | |
1592 | printf("memorystatus_vm_pressure_send: (value 0x%llx) notification [0x%x] sent to process [%d] \n", value, fflags, pid); | |
1593 | error = 0; | |
1594 | } else { | |
1595 | printf("memorystatus_vm_pressure_send: (value 0x%llx) notification [0x%x] not sent to process [%d] (none registered?)\n", value, fflags, pid); | |
1596 | error = 1; | |
1597 | } | |
1598 | ||
1599 | memorystatus_klist_unlock(); | |
1600 | ||
1601 | return error; | |
1602 | } | |
1603 | ||
1604 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_vm_pressure_send, CTLTYPE_QUAD | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED | CTLFLAG_ANYBODY, | |
1605 | 0, 0, &sysctl_memorystatus_vm_pressure_send, "Q", ""); | |
1606 | ||
1607 | #endif /* DEBUG || DEVELOPMENT */ | |
1608 | ||
1609 | #endif /* VM_PRESSURE_EVENTS */ |