]> git.saurik.com Git - apple/xnu.git/blob - bsd/kern/vm_pressure.c
xnu-2422.110.17.tar.gz
[apple/xnu.git] / bsd / kern / vm_pressure.c
1 /*
2 * Copyright (c) 2009-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <libkern/libkern.h>
30 #include <mach/mach_types.h>
31 #include <mach/task.h>
32 #include <sys/proc_internal.h>
33 #include <sys/event.h>
34 #include <sys/eventvar.h>
35 #include <kern/locks.h>
36 #include <sys/queue.h>
37 #include <kern/vm_pressure.h>
38 #include <sys/malloc.h>
39 #include <sys/errno.h>
40 #include <sys/systm.h>
41 #include <sys/types.h>
42 #include <sys/sysctl.h>
43 #include <kern/assert.h>
44 #include <kern/task.h>
45 #include <vm/vm_pageout.h>
46
47 #include <kern/task.h>
48
49 #if CONFIG_MEMORYSTATUS
50 #include <sys/kern_memorystatus.h>
51 #endif
52
53 /*
54 * This value is the threshold that a process must meet to be considered for scavenging.
55 */
56 #define VM_PRESSURE_MINIMUM_RSIZE 10 /* MB */
57 #define VM_PRESSURE_NOTIFY_WAIT_PERIOD 10000 /* milliseconds */
58
59 void vm_pressure_klist_lock(void);
60 void vm_pressure_klist_unlock(void);
61
62 static void vm_dispatch_memory_pressure(void);
63 void vm_reset_active_list(void);
64
65 #if !(CONFIG_MEMORYSTATUS && CONFIG_JETSAM)
66 static kern_return_t vm_try_pressure_candidates(void);
67 #endif
68
69 static lck_mtx_t vm_pressure_klist_mutex;
70
71 struct klist vm_pressure_klist;
72 struct klist vm_pressure_klist_dormant;
73
74 #if DEBUG
75 #define VM_PRESSURE_DEBUG(cond, format, ...) \
76 do { \
77 if (cond) { printf(format, ##__VA_ARGS__); } \
78 } while(0)
79 #else
80 #define VM_PRESSURE_DEBUG(cond, format, ...)
81 #endif
82
83 void vm_pressure_init(lck_grp_t *grp, lck_attr_t *attr) {
84 lck_mtx_init(&vm_pressure_klist_mutex, grp, attr);
85 }
86
87 void vm_pressure_klist_lock(void) {
88 lck_mtx_lock(&vm_pressure_klist_mutex);
89 }
90
91 void vm_pressure_klist_unlock(void) {
92 lck_mtx_unlock(&vm_pressure_klist_mutex);
93 }
94
95 int vm_knote_register(struct knote *kn) {
96 int rv = 0;
97
98 vm_pressure_klist_lock();
99
100 if ((kn->kn_sfflags) & (NOTE_VM_PRESSURE)) {
101 KNOTE_ATTACH(&vm_pressure_klist, kn);
102 } else {
103 rv = ENOTSUP;
104 }
105
106 vm_pressure_klist_unlock();
107
108 return rv;
109 }
110
111 void vm_knote_unregister(struct knote *kn) {
112 struct knote *kn_temp;
113
114 vm_pressure_klist_lock();
115
116 VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d cancelling pressure notification\n", kn->kn_kq->kq_p->p_pid);
117
118 SLIST_FOREACH(kn_temp, &vm_pressure_klist, kn_selnext) {
119 if (kn_temp == kn) {
120 KNOTE_DETACH(&vm_pressure_klist, kn);
121 vm_pressure_klist_unlock();
122 return;
123 }
124 }
125
126 SLIST_FOREACH(kn_temp, &vm_pressure_klist_dormant, kn_selnext) {
127 if (kn_temp == kn) {
128 KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
129 vm_pressure_klist_unlock();
130 return;
131 }
132 }
133
134 vm_pressure_klist_unlock();
135 }
136
137 void vm_pressure_proc_cleanup(proc_t p)
138 {
139 struct knote *kn = NULL;
140
141 vm_pressure_klist_lock();
142
143 VM_PRESSURE_DEBUG(0, "[vm_pressure] process %d exiting pressure notification\n", p->p_pid);
144
145 SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {
146 if (kn->kn_kq->kq_p == p) {
147 KNOTE_DETACH(&vm_pressure_klist, kn);
148 vm_pressure_klist_unlock();
149 return;
150 }
151 }
152
153 SLIST_FOREACH(kn, &vm_pressure_klist_dormant, kn_selnext) {
154 if (kn->kn_kq->kq_p == p) {
155 KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
156 vm_pressure_klist_unlock();
157 return;
158 }
159 }
160
161 vm_pressure_klist_unlock();
162 }
163
164 /*
165 * Used by the vm_pressure_thread which is
166 * signalled from within vm_pageout_scan().
167 */
168 void consider_vm_pressure_events(void)
169 {
170 vm_dispatch_memory_pressure();
171 }
172
173 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
174
175 static void vm_dispatch_memory_pressure(void)
176 {
177 /* Update the pressure level and target the foreground or next-largest process as appropriate */
178 memorystatus_update_vm_pressure(FALSE);
179 }
180
181 /* Jetsam aware version. Called with lock held */
182
183 static struct knote *vm_find_knote_from_pid(pid_t pid, struct klist *list) {
184 struct knote *kn = NULL;
185
186 SLIST_FOREACH(kn, list, kn_selnext) {
187 struct proc *p;
188 pid_t current_pid;
189
190 p = kn->kn_kq->kq_p;
191 current_pid = p->p_pid;
192
193 if (current_pid == pid) {
194 break;
195 }
196 }
197
198 return kn;
199 }
200
201 int vm_dispatch_pressure_note_to_pid(pid_t pid, boolean_t locked) {
202 int ret = EINVAL;
203 struct knote *kn;
204
205 VM_PRESSURE_DEBUG(1, "vm_dispatch_pressure_note_to_pid(): pid %d\n", pid);
206
207 if (!locked) {
208 vm_pressure_klist_lock();
209 }
210
211 /*
212 * Because we're specifically targeting a process here, we don't care
213 * if a warning has already been sent and it's moved to the dormant
214 * list; check that too.
215 */
216 kn = vm_find_knote_from_pid(pid, &vm_pressure_klist);
217 if (kn) {
218 KNOTE(&vm_pressure_klist, pid);
219 ret = 0;
220 } else {
221 kn = vm_find_knote_from_pid(pid, &vm_pressure_klist_dormant);
222 if (!kn) {
223 KNOTE(&vm_pressure_klist_dormant, pid);
224 }
225 }
226
227 if (!locked) {
228 vm_pressure_klist_unlock();
229 }
230
231 return ret;
232 }
233
234 void vm_find_pressure_foreground_candidates(void)
235 {
236 struct knote *kn, *kn_tmp;
237 struct klist dispatch_klist = { NULL };
238
239 vm_pressure_klist_lock();
240 proc_list_lock();
241
242 /* Find the foreground processes. */
243 SLIST_FOREACH_SAFE(kn, &vm_pressure_klist, kn_selnext, kn_tmp) {
244 proc_t p = kn->kn_kq->kq_p;
245
246 if (memorystatus_is_foreground_locked(p)) {
247 KNOTE_DETACH(&vm_pressure_klist, kn);
248 KNOTE_ATTACH(&dispatch_klist, kn);
249 }
250 }
251
252 SLIST_FOREACH_SAFE(kn, &vm_pressure_klist_dormant, kn_selnext, kn_tmp) {
253 proc_t p = kn->kn_kq->kq_p;
254
255 if (memorystatus_is_foreground_locked(p)) {
256 KNOTE_DETACH(&vm_pressure_klist_dormant, kn);
257 KNOTE_ATTACH(&dispatch_klist, kn);
258 }
259 }
260
261 proc_list_unlock();
262
263 /* Dispatch pressure notifications accordingly */
264 SLIST_FOREACH_SAFE(kn, &dispatch_klist, kn_selnext, kn_tmp) {
265 proc_t p = kn->kn_kq->kq_p;
266
267 proc_list_lock();
268 if (p != proc_ref_locked(p)) {
269 proc_list_unlock();
270 KNOTE_DETACH(&dispatch_klist, kn);
271 KNOTE_ATTACH(&vm_pressure_klist_dormant, kn);
272 continue;
273 }
274 proc_list_unlock();
275
276 VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d\n", kn->kn_kq->kq_p->p_pid);
277 KNOTE(&dispatch_klist, p->p_pid);
278 KNOTE_DETACH(&dispatch_klist, kn);
279 KNOTE_ATTACH(&vm_pressure_klist_dormant, kn);
280 microuptime(&p->vm_pressure_last_notify_tstamp);
281 memorystatus_send_pressure_note(p->p_pid);
282 proc_rele(p);
283 }
284
285 vm_pressure_klist_unlock();
286 }
287
288 void vm_find_pressure_candidate(void)
289 {
290 struct knote *kn = NULL, *kn_max = NULL;
291 unsigned int resident_max = 0;
292 pid_t target_pid = -1;
293 struct klist dispatch_klist = { NULL };
294 struct timeval curr_tstamp = {0, 0};
295 int elapsed_msecs = 0;
296 proc_t target_proc = PROC_NULL;
297 kern_return_t kr = KERN_SUCCESS;
298
299 microuptime(&curr_tstamp);
300
301 vm_pressure_klist_lock();
302
303 SLIST_FOREACH(kn, &vm_pressure_klist, kn_selnext) {\
304 struct mach_task_basic_info basic_info;
305 mach_msg_type_number_t size = MACH_TASK_BASIC_INFO_COUNT;
306 unsigned int resident_size = 0;
307 proc_t p = PROC_NULL;
308 struct task* t = TASK_NULL;
309
310 p = kn->kn_kq->kq_p;
311 proc_list_lock();
312 if (p != proc_ref_locked(p)) {
313 p = PROC_NULL;
314 proc_list_unlock();
315 continue;
316 }
317 proc_list_unlock();
318
319 t = (struct task *)(p->task);
320
321 timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp);
322 elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000;
323
324 if (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD) {
325 proc_rele(p);
326 continue;
327 }
328
329 if (!memorystatus_bg_pressure_eligible(p)) {
330 VM_PRESSURE_DEBUG(1, "[vm_pressure] skipping process %d\n", p->p_pid);
331 proc_rele(p);
332 continue;
333 }
334
335 if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) {
336 VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed\n", p->p_pid);
337 proc_rele(p);
338 continue;
339 }
340
341 /*
342 * We don't want a small process to block large processes from
343 * being notified again. <rdar://problem/7955532>
344 */
345 resident_size = (basic_info.resident_size)/(1024 * 1024);
346 if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) {
347 if (resident_size > resident_max) {
348 resident_max = resident_size;
349 kn_max = kn;
350 target_pid = p->p_pid;
351 target_proc = p;
352 }
353 } else {
354 /* There was no candidate with enough resident memory to scavenge */
355 VM_PRESSURE_DEBUG(1, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size);
356 }
357 proc_rele(p);
358 }
359
360 if (kn_max == NULL || target_pid == -1) {
361 VM_PRESSURE_DEBUG(1, "[vm_pressure] - no target found!\n");
362 goto exit;
363 }
364
365 VM_DEBUG_EVENT(vm_pageout_scan, VM_PRESSURE_EVENT, DBG_FUNC_NONE, target_pid, resident_max, 0, 0);
366 VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
367
368 KNOTE_DETACH(&vm_pressure_klist, kn_max);
369
370 target_proc = proc_find(target_pid);
371 if (target_proc != PROC_NULL) {
372 KNOTE_ATTACH(&dispatch_klist, kn_max);
373 KNOTE(&dispatch_klist, target_pid);
374 KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max);
375 memorystatus_send_pressure_note(target_pid);
376 microuptime(&target_proc->vm_pressure_last_notify_tstamp);
377 proc_rele(target_proc);
378 }
379
380 exit:
381 vm_pressure_klist_unlock();
382 }
383
384 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
385
386 struct knote *
387 vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level);
388
389 kern_return_t vm_pressure_notification_without_levels(void);
390 kern_return_t vm_pressure_notify_dispatch_vm_clients(void);
391
392 kern_return_t
393 vm_pressure_notify_dispatch_vm_clients(void)
394 {
395 vm_pressure_klist_lock();
396
397 if (SLIST_EMPTY(&vm_pressure_klist)) {
398 vm_reset_active_list();
399 }
400
401 if (!SLIST_EMPTY(&vm_pressure_klist)) {
402
403 VM_PRESSURE_DEBUG(1, "[vm_pressure] vm_dispatch_memory_pressure\n");
404
405 if (KERN_SUCCESS == vm_try_pressure_candidates()) {
406 vm_pressure_klist_unlock();
407 return KERN_SUCCESS;
408 }
409 }
410
411 VM_PRESSURE_DEBUG(1, "[vm_pressure] could not find suitable event candidate\n");
412
413 vm_pressure_klist_unlock();
414
415 return KERN_FAILURE;
416 }
417
418 static void vm_dispatch_memory_pressure(void)
419 {
420 memorystatus_update_vm_pressure(FALSE);
421 }
422
423 extern vm_pressure_level_t
424 convert_internal_pressure_level_to_dispatch_level(vm_pressure_level_t);
425
426 struct knote *
427 vm_pressure_select_optimal_candidate_to_notify(struct klist *candidate_list, int level)
428 {
429 struct knote *kn = NULL, *kn_max = NULL;
430 unsigned int resident_max = 0;
431 kern_return_t kr = KERN_SUCCESS;
432 struct timeval curr_tstamp = {0, 0};
433 int elapsed_msecs = 0;
434 int selected_task_importance = 0;
435 static int pressure_snapshot = -1;
436 boolean_t pressure_increase = FALSE;
437
438 if (level != -1) {
439
440 if (pressure_snapshot == -1) {
441 /*
442 * Initial snapshot.
443 */
444 pressure_snapshot = level;
445 pressure_increase = TRUE;
446 } else {
447
448 if (level >= pressure_snapshot) {
449 pressure_increase = TRUE;
450 } else {
451 pressure_increase = FALSE;
452 }
453
454 pressure_snapshot = level;
455 }
456 }
457
458 if ((level > 0) && (pressure_increase) == TRUE) {
459 /*
460 * We'll start by considering the largest
461 * unimportant task in our list.
462 */
463 selected_task_importance = INT_MAX;
464 } else {
465 /*
466 * We'll start by considering the largest
467 * important task in our list.
468 */
469 selected_task_importance = 0;
470 }
471
472 microuptime(&curr_tstamp);
473
474 SLIST_FOREACH(kn, candidate_list, kn_selnext) {
475
476 struct mach_task_basic_info basic_info;
477 mach_msg_type_number_t size = MACH_TASK_BASIC_INFO_COUNT;
478 unsigned int resident_size = 0;
479 proc_t p = PROC_NULL;
480 struct task* t = TASK_NULL;
481 int curr_task_importance = 0;
482 boolean_t consider_knote = FALSE;
483
484 p = kn->kn_kq->kq_p;
485 proc_list_lock();
486 if (p != proc_ref_locked(p)) {
487 p = PROC_NULL;
488 proc_list_unlock();
489 continue;
490 }
491 proc_list_unlock();
492
493 t = (struct task *)(p->task);
494
495 timevalsub(&curr_tstamp, &p->vm_pressure_last_notify_tstamp);
496 elapsed_msecs = curr_tstamp.tv_sec * 1000 + curr_tstamp.tv_usec / 1000;
497
498 if ((level == -1) && (elapsed_msecs < VM_PRESSURE_NOTIFY_WAIT_PERIOD)) {
499 proc_rele(p);
500 continue;
501 }
502
503 if (level != -1) {
504 /*
505 * For the level based notifications, check and see if this knote is
506 * registered for the current level.
507 */
508 vm_pressure_level_t dispatch_level = convert_internal_pressure_level_to_dispatch_level(level);
509
510 if ((kn->kn_sfflags & dispatch_level) == 0) {
511 proc_rele(p);
512 continue;
513 }
514 }
515
516 if( ( kr = task_info(t, MACH_TASK_BASIC_INFO, (task_info_t)(&basic_info), &size)) != KERN_SUCCESS ) {
517 VM_PRESSURE_DEBUG(1, "[vm_pressure] task_info for pid %d failed with %d\n", p->p_pid, kr);
518 proc_rele(p);
519 continue;
520 }
521
522 curr_task_importance = task_importance_estimate(t);
523
524 /*
525 * We don't want a small process to block large processes from
526 * being notified again. <rdar://problem/7955532>
527 */
528 resident_size = (basic_info.resident_size)/(MB);
529
530 if (resident_size >= VM_PRESSURE_MINIMUM_RSIZE) {
531
532 if (level > 0) {
533 /*
534 * Warning or Critical Pressure.
535 */
536 if (pressure_increase) {
537 if ((curr_task_importance <= selected_task_importance) && (resident_size > resident_max)) {
538 if (task_has_been_notified(t, level) == FALSE) {
539 consider_knote = TRUE;
540 }
541 }
542 } else {
543 if ((curr_task_importance >= selected_task_importance) && (resident_size > resident_max)) {
544 if (task_has_been_notified(t, level) == FALSE) {
545 consider_knote = TRUE;
546 }
547 }
548 }
549 } else if (level == 0) {
550 /*
551 * Pressure back to normal.
552 */
553 if ((curr_task_importance >= selected_task_importance) && (resident_size > resident_max)) {
554
555 if ((task_has_been_notified(t, kVMPressureWarning) == TRUE) || (task_has_been_notified(t, kVMPressureCritical) == TRUE)) {
556 consider_knote = TRUE;
557 }
558 }
559 } else if (level == -1) {
560
561 /*
562 * Simple (importance and level)-free behavior based solely on RSIZE.
563 */
564 if (resident_size > resident_max) {
565 consider_knote = TRUE;
566 }
567 }
568
569
570 if (consider_knote) {
571 resident_max = resident_size;
572 kn_max = kn;
573 selected_task_importance = curr_task_importance;
574 consider_knote = FALSE; /* reset for the next candidate */
575 }
576 } else {
577 /* There was no candidate with enough resident memory to scavenge */
578 VM_PRESSURE_DEBUG(0, "[vm_pressure] threshold failed for pid %d with %u resident...\n", p->p_pid, resident_size);
579 }
580 proc_rele(p);
581 }
582
583 if (kn_max) {
584 VM_PRESSURE_DEBUG(1, "[vm_pressure] sending event to pid %d with %u resident\n", kn_max->kn_kq->kq_p->p_pid, resident_max);
585 }
586
587 return kn_max;
588 }
589
590 /*
591 * vm_pressure_klist_lock is held for this routine.
592 */
593 kern_return_t vm_pressure_notification_without_levels(void)
594 {
595 struct knote *kn_max = NULL;
596 pid_t target_pid = -1;
597 struct klist dispatch_klist = { NULL };
598 proc_t target_proc = PROC_NULL;
599
600 kn_max = vm_pressure_select_optimal_candidate_to_notify(&vm_pressure_klist, -1);
601
602 if (kn_max == NULL) {
603 return KERN_FAILURE;
604 }
605
606 target_proc = kn_max->kn_kq->kq_p;
607
608 KNOTE_DETACH(&vm_pressure_klist, kn_max);
609
610 if (target_proc != PROC_NULL) {
611
612 target_pid = target_proc->p_pid;
613
614 memoryshot(VM_PRESSURE_EVENT, DBG_FUNC_NONE);
615
616 KNOTE_ATTACH(&dispatch_klist, kn_max);
617 KNOTE(&dispatch_klist, target_pid);
618 KNOTE_ATTACH(&vm_pressure_klist_dormant, kn_max);
619
620 microuptime(&target_proc->vm_pressure_last_notify_tstamp);
621 }
622
623 return KERN_SUCCESS;
624 }
625
626 static kern_return_t vm_try_pressure_candidates(void)
627 {
628 /*
629 * This takes care of candidates that use NOTE_VM_PRESSURE.
630 * It's a notification without indication of the level
631 * of memory pressure.
632 */
633 return (vm_pressure_notification_without_levels());
634 }
635
636 #endif /* !(CONFIG_MEMORYSTATUS && CONFIG_JETSAM) */
637
638 /*
639 * Remove all elements from the dormant list and place them on the active list.
640 * Called with klist lock held.
641 */
642 void vm_reset_active_list(void) {
643 /* Re-charge the main list from the dormant list if possible */
644 if (!SLIST_EMPTY(&vm_pressure_klist_dormant)) {
645 struct knote *kn;
646
647 VM_PRESSURE_DEBUG(1, "[vm_pressure] recharging main list from dormant list\n");
648
649 while (!SLIST_EMPTY(&vm_pressure_klist_dormant)) {
650 kn = SLIST_FIRST(&vm_pressure_klist_dormant);
651 SLIST_REMOVE_HEAD(&vm_pressure_klist_dormant, kn_selnext);
652 SLIST_INSERT_HEAD(&vm_pressure_klist, kn, kn_selnext);
653 }
654 }
655 }