]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/kern/kern_memorystatus.c
xnu-1699.26.8.tar.gz
[apple/xnu.git] / bsd / kern / kern_memorystatus.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 *
28 */
29
30#include <sys/kern_event.h>
31#include <sys/kern_memorystatus.h>
32
33#include <kern/sched_prim.h>
34#include <kern/kalloc.h>
35#include <kern/debug.h>
36#include <kern/lock.h>
37#include <kern/task.h>
38#include <kern/thread.h>
39#include <libkern/libkern.h>
40#include <mach/task.h>
41#include <mach/task_info.h>
42#include <sys/proc.h>
43#include <sys/signal.h>
44#include <sys/signalvar.h>
45#include <sys/sysctl.h>
46#include <sys/wait.h>
47#include <sys/tree.h>
48#include <pexpert/pexpert.h>
49
50#if CONFIG_FREEZE
51#include <vm/vm_protos.h>
52#include <vm/vm_map.h>
53
54enum {
55 kProcessSuspended = (1 << 0),
56 kProcessHibernated = (1 << 1),
57 kProcessNoReclaimWorth = (1 << 2),
58 kProcessIgnored = (1 << 3),
59 kProcessBusy = (1 << 4)
60};
61
62static lck_mtx_t * hibernation_mlock;
63static lck_attr_t * hibernation_lck_attr;
64static lck_grp_t * hibernation_lck_grp;
65static lck_grp_attr_t * hibernation_lck_grp_attr;
66
67typedef struct hibernation_node {
68 RB_ENTRY(hibernation_node) link;
69 pid_t pid;
70 uint32_t state;
71 mach_timespec_t hibernation_ts;
72} hibernation_node;
73
74static int hibernation_tree_compare(hibernation_node *n1, hibernation_node *n2) {
75 if (n1->pid < n2->pid)
76 return -1;
77 else if (n1->pid > n2->pid)
78 return 1;
79 else
80 return 0;
81}
82
83static RB_HEAD(hibernation_tree, hibernation_node) hibernation_tree_head;
84RB_PROTOTYPE_SC(static, hibernation_tree, hibernation_node, link, hibernation_tree_compare);
85
86RB_GENERATE(hibernation_tree, hibernation_node, link, hibernation_tree_compare);
87
88static inline boolean_t kern_hibernation_can_hibernate_processes(void);
89static boolean_t kern_hibernation_can_hibernate(void);
90
91static void kern_hibernation_add_node(hibernation_node *node);
92static hibernation_node *kern_hibernation_get_node(pid_t pid);
93static void kern_hibernation_release_node(hibernation_node *node);
94static void kern_hibernation_free_node(hibernation_node *node, boolean_t unlock);
95
96static void kern_hibernation_register_pid(pid_t pid);
97static void kern_hibernation_unregister_pid(pid_t pid);
98
99static int kern_hibernation_get_process_state(pid_t pid, uint32_t *state, mach_timespec_t *ts);
100static int kern_hibernation_set_process_state(pid_t pid, uint32_t state);
101
102static void kern_hibernation_cull(void);
103
104static void kern_hibernation_thread(void);
105
106extern boolean_t vm_freeze_enabled;
107
108int kern_hibernation_wakeup = 0;
109
110static int jetsam_priority_list_hibernation_index = 0;
111
112/* Thresholds */
113static int kern_memorystatus_level_hibernate = 50;
114
115#define HIBERNATION_PAGES_MIN ( 1 * 1024 * 1024 / PAGE_SIZE)
116#define HIBERNATION_PAGES_MAX (16 * 1024 * 1024 / PAGE_SIZE)
117
118static unsigned int kern_memorystatus_hibernation_pages_min = HIBERNATION_PAGES_MIN;
119static unsigned int kern_memorystatus_hibernation_pages_max = HIBERNATION_PAGES_MAX;
120
121static unsigned int kern_memorystatus_suspended_count = 0;
122static unsigned int kern_memorystatus_hibernated_count = 0;
123
124static unsigned int kern_memorystatus_hibernation_suspended_minimum = 4;
125
126static unsigned int kern_memorystatus_low_swap_pages = 0;
127
128/* Throttling */
129#define HIBERNATION_DAILY_MB_MAX 1024
130#define HIBERNATION_DAILY_PAGEOUTS_MAX (HIBERNATION_DAILY_MB_MAX * (1024 * 1024 / PAGE_SIZE))
131
132static struct throttle_interval_t {
133 uint32_t mins;
134 uint32_t burst_multiple;
135 uint32_t pageouts;
136 uint32_t max_pageouts;
137 mach_timespec_t ts;
138 boolean_t throttle;
139} throttle_intervals[] = {
140 { 60, 8, 0, 0, { 0, 0 }, FALSE }, /* 1 hour intermediate interval, 8x burst */
141 { 24 * 60, 1, 0, 0, { 0, 0 }, FALSE }, /* 24 hour long interval, no burst */
142};
143
144/* Stats */
145static uint64_t kern_memorystatus_hibernation_count = 0;
146SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_hibernation_count, CTLFLAG_RD, &kern_memorystatus_hibernation_count, "");
147
148static uint64_t kern_memorystatus_hibernation_pageouts = 0;
149SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_hibernation_pageouts, CTLFLAG_RD, &kern_memorystatus_hibernation_pageouts, "");
150
151static uint64_t kern_memorystatus_hibernation_throttle_count = 0;
152SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_hibernation_throttle_count, CTLFLAG_RD, &kern_memorystatus_hibernation_throttle_count, "");
153
154SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_min_processes, CTLFLAG_RW, &kern_memorystatus_hibernation_suspended_minimum, 0, "");
155
156#if DEVELOPMENT || DEBUG
157/* Allow parameter tweaking in these builds */
158SYSCTL_UINT(_kern, OID_AUTO, memorystatus_level_hibernate, CTLFLAG_RW, &kern_memorystatus_level_hibernate, 0, "");
159
160SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_pages_min, CTLFLAG_RW, &kern_memorystatus_hibernation_pages_min, 0, "");
161SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_pages_max, CTLFLAG_RW, &kern_memorystatus_hibernation_pages_max, 0, "");
162
163boolean_t kern_memorystatus_hibernation_throttle_enabled = TRUE;
164SYSCTL_UINT(_kern, OID_AUTO, memorystatus_hibernation_throttle_enabled, CTLFLAG_RW, &kern_memorystatus_hibernation_throttle_enabled, 0, "");
165#endif /* DEVELOPMENT || DEBUG */
166#endif /* CONFIG_FREEZE */
167
168extern unsigned int vm_page_free_count;
169extern unsigned int vm_page_active_count;
170extern unsigned int vm_page_inactive_count;
171extern unsigned int vm_page_purgeable_count;
172extern unsigned int vm_page_wire_count;
173
174static void kern_memorystatus_thread(void);
175
176int kern_memorystatus_wakeup = 0;
177int kern_memorystatus_level = 0;
178int kern_memorystatus_last_level = 0;
179unsigned int kern_memorystatus_delta;
180
181unsigned int kern_memorystatus_kev_failure_count = 0;
182int kern_memorystatus_level_critical = 5;
183#define kern_memorystatus_level_highwater (kern_memorystatus_level_critical + 5)
184
185static struct {
186 jetsam_kernel_stats_t stats;
187 size_t entry_count;
188 jetsam_snapshot_entry_t entries[kMaxSnapshotEntries];
189} jetsam_snapshot;
190
191static jetsam_priority_entry_t jetsam_priority_list[kMaxPriorityEntries];
192#define jetsam_snapshot_list jetsam_snapshot.entries
193
194static int jetsam_priority_list_index = 0;
195static int jetsam_priority_list_count = 0;
196static int jetsam_snapshot_list_count = 0;
197
198static lck_mtx_t * jetsam_list_mlock;
199static lck_attr_t * jetsam_lck_attr;
200static lck_grp_t * jetsam_lck_grp;
201static lck_grp_attr_t * jetsam_lck_grp_attr;
202
203SYSCTL_INT(_kern, OID_AUTO, memorystatus_level, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_memorystatus_level, 0, "");
204SYSCTL_UINT(_kern, OID_AUTO, memorystatus_kev_failure_count, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_memorystatus_kev_failure_count, 0, "");
205
206#if DEVELOPMENT || DEBUG
207
208enum {
209 kJetsamDiagnosticModeNone = 0,
210 kJetsamDiagnosticModeAll = 1,
211 kJetsamDiagnosticModeStopAtFirstActive = 2
212} jetsam_diagnostic_mode = kJetsamDiagnosticModeNone;
213
214static int jetsam_diagnostic_suspended_one_active_proc = 0;
215
216static int
217sysctl_jetsam_diagnostic_mode SYSCTL_HANDLER_ARGS
218{
219#pragma unused(arg1, arg2)
220 int error, val = jetsam_diagnostic_mode;
221 boolean_t disabled;
222
223 error = sysctl_handle_int(oidp, &val, 0, req);
224 if (error || !req->newptr)
225 return (error);
226 if ((val < 0) || (val > 2)) {
227 printf("jetsam: diagnostic mode: invalid value - %d\n", val);
228 return (0);
229 }
230
231 /*
232 * If jetsam_diagnostic_mode is set, we need to lower memory threshold for jetsam
233 */
234 disabled = (val == 0) && (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone);
235
236 jetsam_diagnostic_mode = val;
237
238 if (disabled) {
239 kern_memorystatus_level_critical = 5;
240 printf("jetsam: diagnostic mode: resetting critical level to %d\n", kern_memorystatus_level_critical);
241 } else {
242 kern_memorystatus_level_critical = 10;
243 printf("jetsam: diagnostic mode: %d: increasing critical level to %d\n", (int) jetsam_diagnostic_mode, kern_memorystatus_level_critical);
244 if (jetsam_diagnostic_mode == kJetsamDiagnosticModeStopAtFirstActive)
245 printf("jetsam: diagnostic mode: will stop at first active app\n");
246 }
247
248 return (0);
249}
250
251SYSCTL_PROC(_debug, OID_AUTO, jetsam_diagnostic_mode, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_ANYBODY,
252 &jetsam_diagnostic_mode, 0, sysctl_jetsam_diagnostic_mode, "I", "Jetsam Diagnostic Mode");
253#endif /* DEVELOPMENT || DEBUG */
254
255__private_extern__ void
256kern_memorystatus_init(void)
257{
258 jetsam_lck_attr = lck_attr_alloc_init();
259 jetsam_lck_grp_attr= lck_grp_attr_alloc_init();
260 jetsam_lck_grp = lck_grp_alloc_init("jetsam", jetsam_lck_grp_attr);
261 jetsam_list_mlock = lck_mtx_alloc_init(jetsam_lck_grp, jetsam_lck_attr);
262 kern_memorystatus_delta = 5 * atop_64(max_mem) / 100;
263
264 (void)kernel_thread(kernel_task, kern_memorystatus_thread);
265}
266
267static uint32_t
268jetsam_task_page_count(task_t task)
269{
270 kern_return_t ret;
271 static task_info_data_t data;
272 static struct task_basic_info *info = (struct task_basic_info *)&data;
273 static mach_msg_type_number_t count = TASK_BASIC_INFO_COUNT;
274
275 ret = task_info(task, TASK_BASIC_INFO, (task_info_t)&data, &count);
276 if (ret == KERN_SUCCESS) {
277 return info->resident_size / PAGE_SIZE;
278 }
279 return 0;
280}
281
282static uint32_t
283jetsam_flags_for_pid(pid_t pid)
284{
285 int i;
286
287 for (i = 0; i < jetsam_priority_list_count; i++) {
288 if (pid == jetsam_priority_list[i].pid) {
289 return jetsam_priority_list[i].flags;
290 }
291 }
292 return 0;
293}
294
295static void
296jetsam_snapshot_procs(void)
297{
298 proc_t p;
299 int i = 0;
300
301 jetsam_snapshot.stats.free_pages = vm_page_free_count;
302 jetsam_snapshot.stats.active_pages = vm_page_active_count;
303 jetsam_snapshot.stats.inactive_pages = vm_page_inactive_count;
304 jetsam_snapshot.stats.purgeable_pages = vm_page_purgeable_count;
305 jetsam_snapshot.stats.wired_pages = vm_page_wire_count;
306 proc_list_lock();
307 LIST_FOREACH(p, &allproc, p_list) {
308 task_t task = p->task;
309 jetsam_snapshot_list[i].pid = p->p_pid;
310 jetsam_snapshot_list[i].pages = jetsam_task_page_count(task);
311 jetsam_snapshot_list[i].flags = jetsam_flags_for_pid(p->p_pid);
312 strlcpy(&jetsam_snapshot_list[i].name[0], p->p_comm, MAXCOMLEN+1);
313#ifdef DEBUG
314 printf("jetsam snapshot pid = %d, uuid = %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
315 p->p_pid,
316 p->p_uuid[0], p->p_uuid[1], p->p_uuid[2], p->p_uuid[3], p->p_uuid[4], p->p_uuid[5], p->p_uuid[6], p->p_uuid[7],
317 p->p_uuid[8], p->p_uuid[9], p->p_uuid[10], p->p_uuid[11], p->p_uuid[12], p->p_uuid[13], p->p_uuid[14], p->p_uuid[15]);
318#endif
319 memcpy(&jetsam_snapshot_list[i].uuid[0], &p->p_uuid[0], sizeof(p->p_uuid));
320 i++;
321 if (i == kMaxSnapshotEntries) {
322 break;
323 }
324 }
325 proc_list_unlock();
326 jetsam_snapshot.entry_count = jetsam_snapshot_list_count = i - 1;
327}
328
329static void
330jetsam_mark_pid_in_snapshot(pid_t pid, int flags)
331{
332
333 int i = 0;
334
335 for (i = 0; i < jetsam_snapshot_list_count; i++) {
336 if (jetsam_snapshot_list[i].pid == pid) {
337 jetsam_snapshot_list[i].flags |= flags;
338 return;
339 }
340 }
341}
342
343int
344jetsam_kill_top_proc(boolean_t any, uint32_t cause)
345{
346 proc_t p;
347
348#ifndef CONFIG_FREEZE
349#pragma unused(any)
350#endif
351
352 if (jetsam_snapshot_list_count == 0) {
353 jetsam_snapshot_procs();
354 }
355 lck_mtx_lock(jetsam_list_mlock);
356 while (jetsam_priority_list_index < jetsam_priority_list_count) {
357 jetsam_priority_entry_t* jetsam_priority_entry = &jetsam_priority_list[jetsam_priority_list_index];
358 pid_t aPid = jetsam_priority_entry->pid;
359#if DEVELOPMENT || DEBUG
360 int activeProcess = jetsam_priority_entry->flags & kJetsamFlagsFrontmost;
361 int procSuspendedForDiagnosis = jetsam_priority_entry->flags & kJetsamFlagsSuspForDiagnosis;
362#endif /* DEVELOPMENT || DEBUG */
363 jetsam_priority_list_index++;
364 /* skip empty slots in the list */
365 if (aPid == 0) {
366 continue; // with lock held
367 }
368 lck_mtx_unlock(jetsam_list_mlock);
369 p = proc_find(aPid);
370 if (p != NULL) {
371 int flags = cause;
372#if DEVELOPMENT || DEBUG
373 if ((jetsam_diagnostic_mode != kJetsamDiagnosticModeNone) && procSuspendedForDiagnosis) {
374 printf("jetsam: continuing after ignoring proc suspended already for diagnosis - %d\n", aPid);
375 proc_rele(p);
376 lck_mtx_lock(jetsam_list_mlock);
377 continue;
378 }
379#endif /* DEVELOPMENT || DEBUG */
380#if CONFIG_FREEZE
381 hibernation_node *node;
382 boolean_t skip;
383 if ((node = kern_hibernation_get_node(aPid))) {
384 boolean_t reclaim_proc = !(node->state & (kProcessBusy | kProcessNoReclaimWorth));
385 if (any || reclaim_proc) {
386 if (node->state & kProcessHibernated) {
387 flags |= kJetsamFlagsHibernated;
388 }
389 skip = FALSE;
390 } else {
391 skip = TRUE;
392 }
393 kern_hibernation_release_node(node);
394 } else {
395 skip = FALSE;
396 }
397 if (skip) {
398 proc_rele(p);
399 } else
400#endif
401 {
402#if DEVELOPMENT || DEBUG
403 if ((jetsam_diagnostic_mode != kJetsamDiagnosticModeNone) && activeProcess) {
404#if DEBUG
405 printf("jetsam: suspending pid %d [%s] (active) for diagnosis - memory_status_level: %d\n",
406 aPid, (p->p_comm ? p->p_comm: "(unknown)"), kern_memorystatus_level);
407#endif /* DEBUG */
408 jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsSuspForDiagnosis);
409 jetsam_priority_entry->flags |= kJetsamFlagsSuspForDiagnosis;
410 task_suspend(p->task);
411 proc_rele(p);
412 if (jetsam_diagnostic_mode == kJetsamDiagnosticModeStopAtFirstActive) {
413 jetsam_diagnostic_suspended_one_active_proc = 1;
414 printf("jetsam: returning after suspending first active proc - %d\n", aPid);
415 }
416 return 0;
417 } else
418#endif /* DEVELOPMENT || DEBUG */
419 {
420 printf("jetsam: killing pid %d [%s] - memory_status_level: %d\n",
421 aPid, (p->p_comm ? p->p_comm : "(unknown)"), kern_memorystatus_level);
422 jetsam_mark_pid_in_snapshot(aPid, flags);
423 exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL);
424 proc_rele(p);
425#if DEBUG
426 printf("jetsam: pid %d killed - memory_status_level: %d\n", aPid, kern_memorystatus_level);
427#endif /* DEBUG */
428 return 0;
429 }
430 }
431 }
432 lck_mtx_lock(jetsam_list_mlock);
433 }
434 lck_mtx_unlock(jetsam_list_mlock);
435 return -1;
436}
437
438static int
439jetsam_kill_hiwat_proc(void)
440{
441 proc_t p;
442 int i;
443 if (jetsam_snapshot_list_count == 0) {
444 jetsam_snapshot_procs();
445 }
446 lck_mtx_lock(jetsam_list_mlock);
447 for (i = jetsam_priority_list_index; i < jetsam_priority_list_count; i++) {
448 pid_t aPid;
449 int32_t hiwat;
450 aPid = jetsam_priority_list[i].pid;
451 hiwat = jetsam_priority_list[i].hiwat_pages;
452 /* skip empty or non-hiwat slots in the list */
453 if (aPid == 0 || (hiwat < 0)) {
454 continue; // with lock held
455 }
456 p = proc_find(aPid);
457 if (p != NULL) {
458 int32_t pages = (int32_t)jetsam_task_page_count(p->task);
459 boolean_t skip = (pages <= hiwat);
460#if DEVELOPMENT || DEBUG
461 if (!skip && (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone)) {
462 if (jetsam_priority_list[i].flags & kJetsamFlagsSuspForDiagnosis) {
463 proc_rele(p);
464 continue;
465 }
466 }
467#endif /* DEVELOPMENT || DEBUG */
468#if CONFIG_FREEZE
469 if (!skip) {
470 hibernation_node *node;
471 if ((node = kern_hibernation_get_node(aPid))) {
472 if (node->state & kProcessBusy) {
473 kern_hibernation_release_node(node);
474 skip = TRUE;
475 } else {
476 kern_hibernation_free_node(node, TRUE);
477 skip = FALSE;
478 }
479 }
480 }
481#endif
482 if (!skip) {
483#if DEBUG
484 printf("jetsam: %s pid %d [%s] - %d pages > hiwat (%d)\n",
485 (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone)?"suspending": "killing", aPid, p->p_comm, pages, hiwat);
486#endif /* DEBUG */
487#if DEVELOPMENT || DEBUG
488 if (jetsam_diagnostic_mode != kJetsamDiagnosticModeNone) {
489 lck_mtx_unlock(jetsam_list_mlock);
490 task_suspend(p->task);
491 proc_rele(p);
492#if DEBUG
493 printf("jetsam: pid %d suspended for diagnosis - memory_status_level: %d\n", aPid, kern_memorystatus_level);
494#endif /* DEBUG */
495 jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsSuspForDiagnosis);
496 jetsam_priority_list[i].flags |= kJetsamFlagsSuspForDiagnosis;
497 } else
498#endif /* DEVELOPMENT || DEBUG */
499 {
500 jetsam_priority_list[i].pid = 0;
501 lck_mtx_unlock(jetsam_list_mlock);
502 exit1(p, W_EXITCODE(0, SIGKILL), (int *)NULL);
503 proc_rele(p);
504#if DEBUG
505 printf("jetsam: pid %d killed - memory_status_level: %d\n", aPid, kern_memorystatus_level);
506#endif /* DEBUG */
507 jetsam_mark_pid_in_snapshot(aPid, kJetsamFlagsKilledHiwat);
508 }
509 return 0;
510 } else {
511 proc_rele(p);
512 }
513
514 }
515 }
516 lck_mtx_unlock(jetsam_list_mlock);
517 return -1;
518}
519
520#if CONFIG_FREEZE
521static void
522jetsam_send_hibernation_note(uint32_t flags, pid_t pid, uint32_t pages) {
523 int ret;
524 struct kev_msg ev_msg;
525 jetsam_hibernation_entry_t data;
526
527 ev_msg.vendor_code = KEV_VENDOR_APPLE;
528 ev_msg.kev_class = KEV_SYSTEM_CLASS;
529 ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS;
530
531 ev_msg.event_code = kMemoryStatusHibernationNote;
532
533 ev_msg.dv[0].data_length = sizeof data;
534 ev_msg.dv[0].data_ptr = &data;
535 ev_msg.dv[1].data_length = 0;
536
537 data.pid = pid;
538 data.flags = flags;
539 data.pages = pages;
540
541 ret = kev_post_msg(&ev_msg);
542 if (ret) {
543 kern_memorystatus_kev_failure_count++;
544 printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
545 }
546}
547
548static int
549jetsam_hibernate_top_proc(void)
550{
551 int hibernate_index;
552 proc_t p;
553 uint32_t i;
554
555 lck_mtx_lock(jetsam_list_mlock);
556
557 for (hibernate_index = jetsam_priority_list_index; hibernate_index < jetsam_priority_list_count; hibernate_index++) {
558 pid_t aPid;
559 uint32_t state = 0;
560
561 aPid = jetsam_priority_list[hibernate_index].pid;
562
563 /* skip empty slots in the list */
564 if (aPid == 0) {
565 continue; // with lock held
566 }
567
568 if (kern_hibernation_get_process_state(aPid, &state, NULL) != 0) {
569 continue; // with lock held
570 }
571
572 /* ensure the process isn't marked as busy and is suspended */
573 if ((state & kProcessBusy) || !(state & kProcessSuspended)) {
574 continue; // with lock held
575 }
576
577 p = proc_find(aPid);
578 if (p != NULL) {
579 hibernation_node *node;
580 boolean_t skip;
581 uint32_t purgeable, wired, clean, dirty;
582 boolean_t shared;
583
584 lck_mtx_unlock(jetsam_list_mlock);
585
586 if ((node = kern_hibernation_get_node(aPid))) {
587 if (node->state & kProcessBusy) {
588 skip = TRUE;
589 } else {
590 node->state |= kProcessBusy;
591 /* Whether we hibernate or not, increase the count so can we maintain the gap between hibernated and suspended processes. */
592 kern_memorystatus_hibernated_count++;
593 skip = FALSE;
594 }
595 kern_hibernation_release_node(node);
596 } else {
597 skip = TRUE;
598 }
599
600 if (!skip) {
601 /* Only hibernate processes meeting our size criteria. If not met, mark it as such and return. */
602 task_freeze(p->task, &purgeable, &wired, &clean, &dirty, &shared, TRUE);
603 skip = (dirty < kern_memorystatus_hibernation_pages_min) || (dirty > kern_memorystatus_hibernation_pages_max);
604 }
605
606 if (!skip) {
607 unsigned int swap_pages_free = default_pager_swap_pages_free();
608
609 /* Ensure there's actually enough space free to hibernate this process. */
610 if (dirty > swap_pages_free) {
611 kern_memorystatus_low_swap_pages = swap_pages_free;
612 skip = TRUE;
613 }
614 }
615
616 if (skip) {
617 kern_hibernation_set_process_state(aPid, kProcessIgnored);
618 proc_rele(p);
619 return 0;
620 }
621
622#if DEBUG
623 printf("jetsam: pid %d [%s] hibernating - memory_status_level: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, shared %d, free swap: %d\n",
624 aPid, (p->p_comm ? p->p_comm : "(unknown)"), kern_memorystatus_level, purgeable, wired, clean, dirty, shared, default_pager_swap_pages_free());
625#endif
626
627 task_freeze(p->task, &purgeable, &wired, &clean, &dirty, &shared, FALSE);
628 proc_rele(p);
629
630 kern_hibernation_set_process_state(aPid, kProcessHibernated | (shared ? 0: kProcessNoReclaimWorth));
631
632 /* Update stats */
633 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
634 throttle_intervals[i].pageouts += dirty;
635 }
636 kern_memorystatus_hibernation_pageouts += dirty;
637 kern_memorystatus_hibernation_count++;
638
639 jetsam_send_hibernation_note(kJetsamFlagsHibernated, aPid, dirty);
640
641 return dirty;
642 }
643 }
644 lck_mtx_unlock(jetsam_list_mlock);
645 return -1;
646}
647#endif /* CONFIG_FREEZE */
648
649static void
650kern_memorystatus_thread(void)
651{
652 struct kev_msg ev_msg;
653 jetsam_kernel_stats_t data;
654 boolean_t post_memorystatus_snapshot = FALSE;
655 int ret;
656
657 bzero(&data, sizeof(jetsam_kernel_stats_t));
658 bzero(&ev_msg, sizeof(struct kev_msg));
659 while(1) {
660
661#if DEVELOPMENT || DEBUG
662 jetsam_diagnostic_suspended_one_active_proc = 0;
663#endif /* DEVELOPMENT || DEBUG */
664
665 while (kern_memorystatus_level <= kern_memorystatus_level_highwater) {
666 if (jetsam_kill_hiwat_proc() < 0) {
667 break;
668 }
669 post_memorystatus_snapshot = TRUE;
670 }
671
672 while (kern_memorystatus_level <= kern_memorystatus_level_critical) {
673 if (jetsam_kill_top_proc(FALSE, kJetsamFlagsKilled) < 0) {
674 break;
675 }
676 post_memorystatus_snapshot = TRUE;
677#if DEVELOPMENT || DEBUG
678 if ((jetsam_diagnostic_mode == kJetsamDiagnosticModeStopAtFirstActive) && jetsam_diagnostic_suspended_one_active_proc) {
679 printf("jetsam: stopping killing since 1 active proc suspended already for diagnosis\n");
680 break; // we found first active proc, let's not kill any more
681 }
682#endif /* DEVELOPMENT || DEBUG */
683 }
684
685 kern_memorystatus_last_level = kern_memorystatus_level;
686
687 ev_msg.vendor_code = KEV_VENDOR_APPLE;
688 ev_msg.kev_class = KEV_SYSTEM_CLASS;
689 ev_msg.kev_subclass = KEV_MEMORYSTATUS_SUBCLASS;
690
691 /* pass the memory status level (percent free) */
692 ev_msg.event_code = kMemoryStatusLevelNote;
693
694 ev_msg.dv[0].data_length = sizeof kern_memorystatus_last_level;
695 ev_msg.dv[0].data_ptr = &kern_memorystatus_last_level;
696 ev_msg.dv[1].data_length = sizeof data;
697 ev_msg.dv[1].data_ptr = &data;
698 ev_msg.dv[2].data_length = 0;
699
700 data.free_pages = vm_page_free_count;
701 data.active_pages = vm_page_active_count;
702 data.inactive_pages = vm_page_inactive_count;
703 data.purgeable_pages = vm_page_purgeable_count;
704 data.wired_pages = vm_page_wire_count;
705
706 ret = kev_post_msg(&ev_msg);
707 if (ret) {
708 kern_memorystatus_kev_failure_count++;
709 printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
710 }
711
712 if (post_memorystatus_snapshot) {
713 size_t snapshot_size = sizeof(jetsam_kernel_stats_t) + sizeof(size_t) + sizeof(jetsam_snapshot_entry_t) * jetsam_snapshot_list_count;
714 ev_msg.event_code = kMemoryStatusSnapshotNote;
715 ev_msg.dv[0].data_length = sizeof snapshot_size;
716 ev_msg.dv[0].data_ptr = &snapshot_size;
717 ev_msg.dv[1].data_length = 0;
718
719 ret = kev_post_msg(&ev_msg);
720 if (ret) {
721 kern_memorystatus_kev_failure_count++;
722 printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
723 }
724 }
725
726 if (kern_memorystatus_level >= kern_memorystatus_last_level + 5 ||
727 kern_memorystatus_level <= kern_memorystatus_last_level - 5)
728 continue;
729
730 assert_wait(&kern_memorystatus_wakeup, THREAD_UNINT);
731 (void)thread_block((thread_continue_t)kern_memorystatus_thread);
732 }
733}
734
735#if CONFIG_FREEZE
736
737__private_extern__ void
738kern_hibernation_init(void)
739{
740 hibernation_lck_attr = lck_attr_alloc_init();
741 hibernation_lck_grp_attr = lck_grp_attr_alloc_init();
742 hibernation_lck_grp = lck_grp_alloc_init("hibernation", hibernation_lck_grp_attr);
743 hibernation_mlock = lck_mtx_alloc_init(hibernation_lck_grp, hibernation_lck_attr);
744
745 RB_INIT(&hibernation_tree_head);
746
747 (void)kernel_thread(kernel_task, kern_hibernation_thread);
748}
749
750static inline boolean_t
751kern_hibernation_can_hibernate_processes(void)
752{
753 boolean_t ret;
754
755 lck_mtx_lock_spin(hibernation_mlock);
756 ret = (kern_memorystatus_suspended_count - kern_memorystatus_hibernated_count) >
757 kern_memorystatus_hibernation_suspended_minimum ? TRUE : FALSE;
758 lck_mtx_unlock(hibernation_mlock);
759
760 return ret;
761}
762
763static boolean_t
764kern_hibernation_can_hibernate(void)
765{
766 /* Only hibernate if we're sufficiently low on memory; this holds off hibernation right after boot,
767 and is generally is a no-op once we've reached steady state. */
768 if (kern_memorystatus_level > kern_memorystatus_level_hibernate) {
769 return FALSE;
770 }
771
772 /* Check minimum suspended process threshold. */
773 if (!kern_hibernation_can_hibernate_processes()) {
774 return FALSE;
775 }
776
777 /* Is swap running low? */
778 if (kern_memorystatus_low_swap_pages) {
779 /* If there's been no movement in free swap pages since we last attempted hibernation, return. */
780 if (default_pager_swap_pages_free() <= kern_memorystatus_low_swap_pages) {
781 return FALSE;
782 }
783
784 /* Pages have been freed, so we can retry. */
785 kern_memorystatus_low_swap_pages = 0;
786 }
787
788 /* OK */
789 return TRUE;
790}
791
792static void
793kern_hibernation_add_node(hibernation_node *node)
794{
795 lck_mtx_lock_spin(hibernation_mlock);
796
797 RB_INSERT(hibernation_tree, &hibernation_tree_head, node);
798 kern_memorystatus_suspended_count++;
799
800 lck_mtx_unlock(hibernation_mlock);
801}
802
803/* Returns with the hibernation lock taken */
804static hibernation_node *
805kern_hibernation_get_node(pid_t pid)
806{
807 hibernation_node sought, *found;
808 sought.pid = pid;
809 lck_mtx_lock_spin(hibernation_mlock);
810 found = RB_FIND(hibernation_tree, &hibernation_tree_head, &sought);
811 if (!found) {
812 lck_mtx_unlock(hibernation_mlock);
813 }
814 return found;
815}
816
817static void
818kern_hibernation_release_node(hibernation_node *node)
819{
820#pragma unused(node)
821 lck_mtx_unlock(hibernation_mlock);
822}
823
824static void
825kern_hibernation_free_node(hibernation_node *node, boolean_t unlock)
826{
827 /* make sure we're called with the hibernation_mlock held */
828 lck_mtx_assert(hibernation_mlock, LCK_MTX_ASSERT_OWNED);
829
830 if (node->state & (kProcessHibernated | kProcessIgnored)) {
831 kern_memorystatus_hibernated_count--;
832 }
833
834 kern_memorystatus_suspended_count--;
835
836 RB_REMOVE(hibernation_tree, &hibernation_tree_head, node);
837 kfree(node, sizeof(hibernation_node));
838
839 if (unlock) {
840 lck_mtx_unlock(hibernation_mlock);
841 }
842}
843
844static void
845kern_hibernation_register_pid(pid_t pid)
846{
847 hibernation_node *node;
848
849#if DEVELOPMENT || DEBUG
850 node = kern_hibernation_get_node(pid);
851 if (node) {
852 printf("kern_hibernation_register_pid: pid %d already registered!\n", pid);
853 kern_hibernation_release_node(node);
854 return;
855 }
856#endif
857
858 /* Register as a candiate for hibernation */
859 node = (hibernation_node *)kalloc(sizeof(hibernation_node));
860 if (node) {
861 clock_sec_t sec;
862 clock_nsec_t nsec;
863 mach_timespec_t ts;
864
865 memset(node, 0, sizeof(hibernation_node));
866
867 node->pid = pid;
868 node->state = kProcessSuspended;
869
870 clock_get_system_nanotime(&sec, &nsec);
871 ts.tv_sec = sec;
872 ts.tv_nsec = nsec;
873
874 node->hibernation_ts = ts;
875
876 kern_hibernation_add_node(node);
877 }
878}
879
880static void
881kern_hibernation_unregister_pid(pid_t pid)
882{
883 hibernation_node *node;
884
885 node = kern_hibernation_get_node(pid);
886 if (node) {
887 kern_hibernation_free_node(node, TRUE);
888 }
889}
890
891void
892kern_hibernation_on_pid_suspend(pid_t pid)
893{
894 kern_hibernation_register_pid(pid);
895}
896
897/* If enabled, we bring all the hibernated pages back prior to resumption; otherwise, they're faulted back in on demand */
898#define THAW_ON_RESUME 1
899
900void
901kern_hibernation_on_pid_resume(pid_t pid, task_t task)
902{
903#if THAW_ON_RESUME
904 hibernation_node *node;
905 if ((node = kern_hibernation_get_node(pid))) {
906 if (node->state & kProcessHibernated) {
907 node->state |= kProcessBusy;
908 kern_hibernation_release_node(node);
909 task_thaw(task);
910 jetsam_send_hibernation_note(kJetsamFlagsThawed, pid, 0);
911 } else {
912 kern_hibernation_release_node(node);
913 }
914 }
915#else
916#pragma unused(task)
917#endif
918 kern_hibernation_unregister_pid(pid);
919}
920
921void
922kern_hibernation_on_pid_hibernate(pid_t pid)
923{
924#pragma unused(pid)
925
926 /* Wake the hibernation thread */
927 thread_wakeup((event_t)&kern_hibernation_wakeup);
928}
929
930static int
931kern_hibernation_get_process_state(pid_t pid, uint32_t *state, mach_timespec_t *ts)
932{
933 hibernation_node *found;
934 int err = ESRCH;
935
936 *state = 0;
937
938 found = kern_hibernation_get_node(pid);
939 if (found) {
940 *state = found->state;
941 if (ts) {
942 *ts = found->hibernation_ts;
943 }
944 err = 0;
945 kern_hibernation_release_node(found);
946 }
947
948 return err;
949}
950
951static int
952kern_hibernation_set_process_state(pid_t pid, uint32_t state)
953{
954 hibernation_node *found;
955 int err = ESRCH;
956
957 found = kern_hibernation_get_node(pid);
958 if (found) {
959 found->state = state;
960 err = 0;
961 kern_hibernation_release_node(found);
962 }
963
964 return err;
965}
966
967static void
968kern_hibernation_update_throttle_interval(mach_timespec_t *ts, struct throttle_interval_t *interval)
969{
970 if (CMP_MACH_TIMESPEC(ts, &interval->ts) >= 0) {
971 if (!interval->max_pageouts) {
972 interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * HIBERNATION_DAILY_PAGEOUTS_MAX) / (24 * 60)));
973 } else {
974 printf("jetsam: %d minute throttle timeout, resetting\n", interval->mins);
975 }
976 interval->ts.tv_sec = interval->mins * 60;
977 interval->ts.tv_nsec = 0;
978 ADD_MACH_TIMESPEC(&interval->ts, ts);
979 /* Since we update the throttle stats pre-hibernation, adjust for overshoot here */
980 if (interval->pageouts > interval->max_pageouts) {
981 interval->pageouts -= interval->max_pageouts;
982 } else {
983 interval->pageouts = 0;
984 }
985 interval->throttle = FALSE;
986 } else if (!interval->throttle && interval->pageouts >= interval->max_pageouts) {
987 printf("jetsam: %d minute pageout limit exceeded; enabling throttle\n", interval->mins);
988 interval->throttle = TRUE;
989 }
990#ifdef DEBUG
991 printf("jetsam: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n",
992 interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60,
993 interval->throttle ? "on" : "off");
994#endif
995}
996
997static boolean_t
998kern_hibernation_throttle_update(void)
999{
1000 clock_sec_t sec;
1001 clock_nsec_t nsec;
1002 mach_timespec_t ts;
1003 uint32_t i;
1004 boolean_t throttled = FALSE;
1005
1006#if DEVELOPMENT || DEBUG
1007 if (!kern_memorystatus_hibernation_throttle_enabled)
1008 return FALSE;
1009#endif
1010
1011 clock_get_system_nanotime(&sec, &nsec);
1012 ts.tv_sec = sec;
1013 ts.tv_nsec = nsec;
1014
1015 /* Check hibernation pageouts over multiple intervals and throttle if we've exceeded our budget.
1016 *
1017 * This ensures that periods of inactivity can't be used as 'credit' towards hibernation if the device has
1018 * remained dormant for a long period. We do, however, allow increased thresholds for shorter intervals in
1019 * order to allow for bursts of activity.
1020 */
1021 for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) {
1022 kern_hibernation_update_throttle_interval(&ts, &throttle_intervals[i]);
1023 if (throttle_intervals[i].throttle == TRUE)
1024 throttled = TRUE;
1025 }
1026
1027 return throttled;
1028}
1029
1030static void
1031kern_hibernation_cull(void)
1032{
1033 hibernation_node *node, *next;
1034 lck_mtx_lock(hibernation_mlock);
1035
1036 for (node = RB_MIN(hibernation_tree, &hibernation_tree_head); node != NULL; node = next) {
1037 proc_t p;
1038
1039 next = RB_NEXT(hibernation_tree, &hibernation_tree_head, node);
1040
1041 /* TODO: probably suboptimal, so revisit should it cause a performance issue */
1042 p = proc_find(node->pid);
1043 if (p) {
1044 proc_rele(p);
1045 } else {
1046 kern_hibernation_free_node(node, FALSE);
1047 }
1048 }
1049
1050 lck_mtx_unlock(hibernation_mlock);
1051}
1052
1053static void
1054kern_hibernation_thread(void)
1055{
1056 if (vm_freeze_enabled) {
1057 if (kern_hibernation_can_hibernate()) {
1058
1059 /* Cull dead processes */
1060 kern_hibernation_cull();
1061
1062 /* Only hibernate if we've not exceeded our pageout budgets */
1063 if (!kern_hibernation_throttle_update()) {
1064 jetsam_hibernate_top_proc();
1065 } else {
1066 printf("kern_hibernation_thread: in throttle, ignoring hibernation\n");
1067 kern_memorystatus_hibernation_throttle_count++; /* Throttled, update stats */
1068 }
1069 }
1070 }
1071
1072 assert_wait((event_t) &kern_hibernation_wakeup, THREAD_UNINT);
1073 thread_block((thread_continue_t) kern_hibernation_thread);
1074}
1075
1076#endif /* CONFIG_FREEZE */
1077
1078static int
1079sysctl_io_variable(struct sysctl_req *req, void *pValue, size_t currentsize, size_t maxsize, size_t *newsize)
1080{
1081 int error;
1082
1083 /* Copy blob out */
1084 error = SYSCTL_OUT(req, pValue, currentsize);
1085
1086 /* error or nothing to set */
1087 if (error || !req->newptr)
1088 return(error);
1089
1090 if (req->newlen > maxsize) {
1091 return EINVAL;
1092 }
1093 error = SYSCTL_IN(req, pValue, req->newlen);
1094
1095 if (!error) {
1096 *newsize = req->newlen;
1097 }
1098
1099 return(error);
1100}
1101
1102static int
1103sysctl_handle_kern_memorystatus_priority_list(__unused struct sysctl_oid *oid, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
1104{
1105 int i, ret;
1106 jetsam_priority_entry_t temp_list[kMaxPriorityEntries];
1107 size_t newsize, currentsize;
1108
1109 if (req->oldptr) {
1110 lck_mtx_lock(jetsam_list_mlock);
1111 for (i = 0; i < jetsam_priority_list_count; i++) {
1112 temp_list[i] = jetsam_priority_list[i];
1113 }
1114 lck_mtx_unlock(jetsam_list_mlock);
1115 }
1116
1117 currentsize = sizeof(jetsam_priority_list[0]) * jetsam_priority_list_count;
1118
1119 ret = sysctl_io_variable(req, &temp_list[0], currentsize, sizeof(temp_list), &newsize);
1120
1121 if (!ret && req->newptr) {
1122 int temp_list_count = newsize / sizeof(jetsam_priority_list[0]);
1123#if DEBUG
1124 printf("set jetsam priority pids = { ");
1125 for (i = 0; i < temp_list_count; i++) {
1126 printf("(%d, 0x%08x, %d) ", temp_list[i].pid, temp_list[i].flags, temp_list[i].hiwat_pages);
1127 }
1128 printf("}\n");
1129#endif /* DEBUG */
1130 lck_mtx_lock(jetsam_list_mlock);
1131#if CONFIG_FREEZE
1132 jetsam_priority_list_hibernation_index = 0;
1133#endif
1134 jetsam_priority_list_index = 0;
1135 jetsam_priority_list_count = temp_list_count;
1136 for (i = 0; i < temp_list_count; i++) {
1137 jetsam_priority_list[i] = temp_list[i];
1138 }
1139 for (i = temp_list_count; i < kMaxPriorityEntries; i++) {
1140 jetsam_priority_list[i].pid = 0;
1141 jetsam_priority_list[i].flags = 0;
1142 jetsam_priority_list[i].hiwat_pages = -1;
1143 jetsam_priority_list[i].hiwat_reserved1 = -1;
1144 jetsam_priority_list[i].hiwat_reserved2 = -1;
1145 jetsam_priority_list[i].hiwat_reserved3 = -1;
1146 }
1147 lck_mtx_unlock(jetsam_list_mlock);
1148 }
1149 return ret;
1150}
1151
1152static int
1153sysctl_handle_kern_memorystatus_snapshot(__unused struct sysctl_oid *oid, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
1154{
1155 int ret;
1156 size_t currentsize = 0;
1157
1158 if (jetsam_snapshot_list_count > 0) {
1159 currentsize = sizeof(jetsam_kernel_stats_t) + sizeof(size_t) + sizeof(jetsam_snapshot_entry_t) * jetsam_snapshot_list_count;
1160 }
1161 if (!currentsize) {
1162 if (req->oldptr) {
1163#ifdef DEBUG
1164 printf("kern.memorystatus_snapshot returning EINVAL\n");
1165#endif
1166 return EINVAL;
1167 }
1168 else {
1169#ifdef DEBUG
1170 printf("kern.memorystatus_snapshot returning 0 for size\n");
1171#endif
1172 }
1173 } else {
1174#ifdef DEBUG
1175 printf("kern.memorystatus_snapshot returning %ld for size\n", (long)currentsize);
1176#endif
1177 }
1178 ret = sysctl_io_variable(req, &jetsam_snapshot, currentsize, 0, NULL);
1179 if (!ret && req->oldptr) {
1180 jetsam_snapshot.entry_count = jetsam_snapshot_list_count = 0;
1181 }
1182 return ret;
1183}
1184
1185SYSCTL_PROC(_kern, OID_AUTO, memorystatus_priority_list, CTLTYPE_OPAQUE|CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, sysctl_handle_kern_memorystatus_priority_list, "S,jetsam_priorities", "");
1186SYSCTL_PROC(_kern, OID_AUTO, memorystatus_snapshot, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_handle_kern_memorystatus_snapshot, "S,jetsam_snapshot", "");