]>
Commit | Line | Data |
---|---|---|
cb323159 A |
1 | /* |
2 | * Copyright (c) 2006-2018 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | * | |
28 | */ | |
29 | ||
30 | #include <kern/sched_prim.h> | |
31 | #include <kern/kalloc.h> | |
32 | #include <kern/assert.h> | |
33 | #include <kern/debug.h> | |
34 | #include <kern/locks.h> | |
35 | #include <kern/task.h> | |
36 | #include <kern/thread.h> | |
37 | #include <kern/host.h> | |
38 | #include <kern/policy_internal.h> | |
39 | #include <kern/thread_group.h> | |
40 | ||
41 | #include <IOKit/IOBSD.h> | |
42 | ||
43 | #include <libkern/libkern.h> | |
44 | #include <mach/coalition.h> | |
45 | #include <mach/mach_time.h> | |
46 | #include <mach/task.h> | |
47 | #include <mach/host_priv.h> | |
48 | #include <mach/mach_host.h> | |
49 | #include <os/log.h> | |
50 | #include <pexpert/pexpert.h> | |
51 | #include <sys/coalition.h> | |
52 | #include <sys/kern_event.h> | |
53 | #include <sys/proc.h> | |
54 | #include <sys/proc_info.h> | |
55 | #include <sys/reason.h> | |
56 | #include <sys/signal.h> | |
57 | #include <sys/signalvar.h> | |
58 | #include <sys/sysctl.h> | |
59 | #include <sys/sysproto.h> | |
60 | #include <sys/wait.h> | |
61 | #include <sys/tree.h> | |
62 | #include <sys/priv.h> | |
63 | #include <vm/vm_pageout.h> | |
64 | #include <vm/vm_protos.h> | |
65 | #include <mach/machine/sdt.h> | |
66 | #include <libkern/section_keywords.h> | |
67 | #include <stdatomic.h> | |
68 | ||
69 | #if CONFIG_FREEZE | |
70 | #include <vm/vm_map.h> | |
71 | #endif /* CONFIG_FREEZE */ | |
72 | ||
73 | #include <sys/kern_memorystatus.h> | |
74 | #include <sys/kern_memorystatus_freeze.h> | |
75 | #include <sys/kern_memorystatus_notify.h> | |
76 | ||
77 | #if CONFIG_JETSAM | |
78 | ||
79 | extern unsigned int memorystatus_available_pages; | |
80 | extern unsigned int memorystatus_available_pages_pressure; | |
81 | extern unsigned int memorystatus_available_pages_critical; | |
82 | extern unsigned int memorystatus_available_pages_critical_base; | |
83 | extern unsigned int memorystatus_available_pages_critical_idle_offset; | |
84 | ||
85 | #else /* CONFIG_JETSAM */ | |
86 | ||
87 | extern uint64_t memorystatus_available_pages; | |
88 | extern uint64_t memorystatus_available_pages_pressure; | |
89 | extern uint64_t memorystatus_available_pages_critical; | |
90 | ||
91 | #endif /* CONFIG_JETSAM */ | |
92 | ||
93 | unsigned int memorystatus_frozen_count = 0; | |
94 | unsigned int memorystatus_suspended_count = 0; | |
95 | unsigned long freeze_threshold_percentage = 50; | |
96 | ||
97 | #if CONFIG_FREEZE | |
98 | ||
99 | lck_grp_attr_t *freezer_lck_grp_attr; | |
100 | lck_grp_t *freezer_lck_grp; | |
101 | static lck_mtx_t freezer_mutex; | |
102 | ||
103 | /* Thresholds */ | |
104 | unsigned int memorystatus_freeze_threshold = 0; | |
105 | unsigned int memorystatus_freeze_pages_min = 0; | |
106 | unsigned int memorystatus_freeze_pages_max = 0; | |
107 | unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; | |
108 | unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT; | |
109 | uint64_t memorystatus_freeze_budget_pages_remaining = 0; //remaining # of pages that can be frozen to disk | |
110 | boolean_t memorystatus_freeze_degradation = FALSE; //protected by the freezer mutex. Signals we are in a degraded freeze mode. | |
111 | ||
112 | unsigned int memorystatus_max_frozen_demotions_daily = 0; | |
113 | unsigned int memorystatus_thaw_count_demotion_threshold = 0; | |
114 | ||
115 | boolean_t memorystatus_freeze_enabled = FALSE; | |
116 | int memorystatus_freeze_wakeup = 0; | |
117 | int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */ | |
118 | ||
119 | #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */ | |
120 | ||
121 | #ifdef XNU_KERNEL_PRIVATE | |
122 | ||
123 | unsigned int memorystatus_frozen_processes_max = 0; | |
124 | unsigned int memorystatus_frozen_shared_mb = 0; | |
125 | unsigned int memorystatus_frozen_shared_mb_max = 0; | |
126 | unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */ | |
127 | unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */ | |
128 | unsigned int memorystatus_thaw_count = 0; | |
129 | unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */ | |
130 | ||
131 | #endif /* XNU_KERNEL_PRIVATE */ | |
132 | ||
133 | static inline boolean_t memorystatus_can_freeze_processes(void); | |
134 | static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low); | |
135 | static boolean_t memorystatus_is_process_eligible_for_freeze(proc_t p); | |
136 | static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused); | |
137 | ||
138 | void memorystatus_disable_freeze(void); | |
139 | ||
140 | /* Stats */ | |
141 | static uint64_t memorystatus_freeze_pageouts = 0; | |
142 | ||
143 | /* Throttling */ | |
144 | #define DEGRADED_WINDOW_MINS (30) | |
145 | #define NORMAL_WINDOW_MINS (24 * 60) | |
146 | ||
147 | static throttle_interval_t throttle_intervals[] = { | |
148 | { DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }}, | |
149 | { NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }}, | |
150 | }; | |
151 | throttle_interval_t *degraded_throttle_window = &throttle_intervals[0]; | |
152 | throttle_interval_t *normal_throttle_window = &throttle_intervals[1]; | |
153 | ||
154 | extern uint64_t vm_swap_get_free_space(void); | |
155 | extern boolean_t vm_swap_max_budget(uint64_t *); | |
156 | extern int i_coal_jetsam_get_taskrole(coalition_t coal, task_t task); | |
157 | ||
158 | static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed); | |
159 | static void memorystatus_demote_frozen_processes(boolean_t force_one); | |
160 | ||
161 | static uint64_t memorystatus_freezer_thread_next_run_ts = 0; | |
162 | ||
163 | /* Sysctls needed for aggd stats */ | |
164 | ||
165 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, ""); | |
166 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, ""); | |
167 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, ""); | |
168 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, ""); | |
169 | ||
170 | ||
171 | #if DEVELOPMENT || DEBUG | |
172 | ||
173 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_jetsam_band, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_jetsam_band, 0, ""); | |
174 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, ""); | |
175 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, ""); | |
176 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, ""); | |
177 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_min, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_pages_min, 0, ""); | |
178 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_pages_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_pages_max, 0, ""); | |
179 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, ""); | |
180 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_processes_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_processes_max, 0, ""); | |
181 | ||
182 | /* | |
183 | * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band. | |
184 | * "0" means no limit. | |
185 | * Default is 10% of system-wide task limit. | |
186 | */ | |
187 | ||
188 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, ""); | |
189 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, ""); | |
190 | ||
191 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, ""); | |
192 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_private_shared_pages_ratio, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_private_shared_pages_ratio, 0, ""); | |
193 | ||
194 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_min_processes, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_suspended_threshold, 0, ""); | |
195 | ||
196 | /* | |
197 | * max. # of frozen process demotions we will allow in our daily cycle. | |
198 | */ | |
199 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_max_freeze_demotions_daily, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_max_frozen_demotions_daily, 0, ""); | |
200 | /* | |
201 | * min # of thaws needed by a process to protect it from getting demoted into the IDLE band. | |
202 | */ | |
203 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count_demotion_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_thaw_count_demotion_threshold, 0, ""); | |
204 | ||
205 | boolean_t memorystatus_freeze_throttle_enabled = TRUE; | |
206 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, ""); | |
207 | ||
208 | /* | |
209 | * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk. | |
210 | * Exposed via the sysctl kern.memorystatus_freeze_to_memory. | |
211 | */ | |
212 | boolean_t memorystatus_freeze_to_memory = FALSE; | |
213 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, ""); | |
214 | ||
215 | #define VM_PAGES_FOR_ALL_PROCS (2) | |
216 | /* | |
217 | * Manual trigger of freeze and thaw for dev / debug kernels only. | |
218 | */ | |
219 | static int | |
220 | sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS | |
221 | { | |
222 | #pragma unused(arg1, arg2) | |
223 | int error, pid = 0; | |
224 | proc_t p; | |
225 | int freezer_error_code = 0; | |
226 | pid_t pid_list[MAX_XPC_SERVICE_PIDS]; | |
227 | int ntasks = 0; | |
228 | coalition_t coal = COALITION_NULL; | |
229 | ||
230 | if (memorystatus_freeze_enabled == FALSE) { | |
231 | printf("sysctl_freeze: Freeze is DISABLED\n"); | |
232 | return ENOTSUP; | |
233 | } | |
234 | ||
235 | error = sysctl_handle_int(oidp, &pid, 0, req); | |
236 | if (error || !req->newptr) { | |
237 | return error; | |
238 | } | |
239 | ||
240 | if (pid == VM_PAGES_FOR_ALL_PROCS) { | |
241 | vm_pageout_anonymous_pages(); | |
242 | ||
243 | return 0; | |
244 | } | |
245 | ||
246 | lck_mtx_lock(&freezer_mutex); | |
247 | ||
248 | again: | |
249 | p = proc_find(pid); | |
250 | if (p != NULL) { | |
251 | uint32_t purgeable, wired, clean, dirty, shared; | |
252 | uint32_t max_pages = 0, state = 0; | |
253 | ||
254 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
255 | /* | |
256 | * Freezer backed by the compressor and swap file(s) | |
257 | * will hold compressed data. | |
258 | * | |
259 | * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from | |
260 | * being swapped out to disk. Note that this disables freezer swap support globally, | |
261 | * not just for the process being frozen. | |
262 | * | |
263 | * | |
264 | * We don't care about the global freezer budget or the process's (min/max) budget here. | |
265 | * The freeze sysctl is meant to force-freeze a process. | |
266 | * | |
267 | * We also don't update any global or process stats on this path, so that the jetsam/ freeze | |
268 | * logic remains unaffected. The tasks we're performing here are: freeze the process, set the | |
269 | * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active). | |
270 | */ | |
271 | max_pages = memorystatus_freeze_pages_max; | |
272 | } else { | |
273 | /* | |
274 | * We only have the compressor without any swap. | |
275 | */ | |
276 | max_pages = UINT32_MAX - 1; | |
277 | } | |
278 | ||
279 | proc_list_lock(); | |
280 | state = p->p_memstat_state; | |
281 | proc_list_unlock(); | |
282 | ||
283 | /* | |
284 | * The jetsam path also verifies that the process is a suspended App. We don't care about that here. | |
285 | * We simply ensure that jetsam is not already working on the process and that the process has not | |
286 | * explicitly disabled freezing. | |
287 | */ | |
288 | if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) { | |
289 | printf("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n", | |
290 | (state & P_MEMSTAT_TERMINATED) ? " terminated" : "", | |
291 | (state & P_MEMSTAT_LOCKED) ? " locked" : "", | |
292 | (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : ""); | |
293 | ||
294 | proc_rele(p); | |
295 | lck_mtx_unlock(&freezer_mutex); | |
296 | return EPERM; | |
297 | } | |
298 | ||
299 | error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */); | |
300 | ||
301 | if (error) { | |
302 | char reason[128]; | |
303 | if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) { | |
304 | strlcpy(reason, "too much shared memory", 128); | |
305 | } | |
306 | ||
307 | if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) { | |
308 | strlcpy(reason, "low private-shared pages ratio", 128); | |
309 | } | |
310 | ||
311 | if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) { | |
312 | strlcpy(reason, "no compressor space", 128); | |
313 | } | |
314 | ||
315 | if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) { | |
316 | strlcpy(reason, "no swap space", 128); | |
317 | } | |
318 | ||
319 | printf("sysctl_freeze: task_freeze failed: %s\n", reason); | |
320 | ||
321 | if (error == KERN_NO_SPACE) { | |
322 | /* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */ | |
323 | error = ENOSPC; | |
324 | } else { | |
325 | error = EIO; | |
326 | } | |
327 | } else { | |
328 | proc_list_lock(); | |
329 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) { | |
330 | p->p_memstat_state |= P_MEMSTAT_FROZEN; | |
331 | memorystatus_frozen_count++; | |
332 | } | |
333 | p->p_memstat_frozen_count++; | |
334 | ||
335 | ||
336 | proc_list_unlock(); | |
337 | ||
338 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
339 | /* | |
340 | * We elevate only if we are going to swap out the data. | |
341 | */ | |
342 | error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, | |
343 | memorystatus_freeze_jetsam_band, TRUE); | |
344 | ||
345 | if (error) { | |
346 | printf("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error); | |
347 | } | |
348 | } | |
349 | } | |
350 | ||
351 | if ((error == 0) && (coal == NULL)) { | |
352 | /* | |
353 | * We froze a process and so we check to see if it was | |
354 | * a coalition leader and if it has XPC services that | |
355 | * might need freezing. | |
356 | * Only one leader can be frozen at a time and so we shouldn't | |
357 | * enter this block more than once per call. Hence the | |
358 | * check that 'coal' has to be NULL. We should make this an | |
359 | * assert() or panic() once we have a much more concrete way | |
360 | * to detect an app vs a daemon. | |
361 | */ | |
362 | ||
363 | task_t curr_task = NULL; | |
364 | ||
365 | curr_task = proc_task(p); | |
366 | coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM); | |
367 | if (coalition_is_leader(curr_task, coal)) { | |
368 | ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC, | |
369 | COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS); | |
370 | ||
371 | if (ntasks > MAX_XPC_SERVICE_PIDS) { | |
372 | ntasks = MAX_XPC_SERVICE_PIDS; | |
373 | } | |
374 | } | |
375 | } | |
376 | ||
377 | proc_rele(p); | |
378 | ||
379 | while (ntasks) { | |
380 | pid = pid_list[--ntasks]; | |
381 | goto again; | |
382 | } | |
383 | ||
384 | lck_mtx_unlock(&freezer_mutex); | |
385 | return error; | |
386 | } else { | |
387 | printf("sysctl_freeze: Invalid process\n"); | |
388 | } | |
389 | ||
390 | ||
391 | lck_mtx_unlock(&freezer_mutex); | |
392 | return EINVAL; | |
393 | } | |
394 | ||
395 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, | |
396 | 0, 0, &sysctl_memorystatus_freeze, "I", ""); | |
397 | ||
398 | /* | |
399 | * Manual trigger of agressive frozen demotion for dev / debug kernels only. | |
400 | */ | |
401 | static int | |
402 | sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS | |
403 | { | |
404 | #pragma unused(arg1, arg2, oidp, req) | |
405 | memorystatus_demote_frozen_processes(false); | |
406 | return 0; | |
407 | } | |
408 | ||
409 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", ""); | |
410 | ||
411 | static int | |
412 | sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS | |
413 | { | |
414 | #pragma unused(arg1, arg2) | |
415 | ||
416 | int error, pid = 0; | |
417 | proc_t p; | |
418 | ||
419 | if (memorystatus_freeze_enabled == FALSE) { | |
420 | return ENOTSUP; | |
421 | } | |
422 | ||
423 | error = sysctl_handle_int(oidp, &pid, 0, req); | |
424 | if (error || !req->newptr) { | |
425 | return error; | |
426 | } | |
427 | ||
428 | if (pid == VM_PAGES_FOR_ALL_PROCS) { | |
429 | do_fastwake_warmup_all(); | |
430 | return 0; | |
431 | } else { | |
432 | p = proc_find(pid); | |
433 | if (p != NULL) { | |
434 | error = task_thaw(p->task); | |
435 | ||
436 | if (error) { | |
437 | error = EIO; | |
438 | } else { | |
439 | /* | |
440 | * task_thaw() succeeded. | |
441 | * | |
442 | * We increment memorystatus_frozen_count on the sysctl freeze path. | |
443 | * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count | |
444 | * when this process exits. | |
445 | * | |
446 | * proc_list_lock(); | |
447 | * p->p_memstat_state &= ~P_MEMSTAT_FROZEN; | |
448 | * proc_list_unlock(); | |
449 | */ | |
450 | } | |
451 | proc_rele(p); | |
452 | return error; | |
453 | } | |
454 | } | |
455 | ||
456 | return EINVAL; | |
457 | } | |
458 | ||
459 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, | |
460 | 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", ""); | |
461 | ||
462 | ||
463 | typedef struct _global_freezable_status { | |
464 | boolean_t freeze_pages_threshold_crossed; | |
465 | boolean_t freeze_eligible_procs_available; | |
466 | boolean_t freeze_scheduled_in_future; | |
467 | }global_freezable_status_t; | |
468 | ||
469 | typedef struct _proc_freezable_status { | |
470 | boolean_t freeze_has_memstat_state; | |
471 | boolean_t freeze_has_pages_min; | |
472 | int freeze_has_probability; | |
473 | int freeze_leader_eligible; | |
474 | boolean_t freeze_attempted; | |
475 | uint32_t p_memstat_state; | |
476 | uint32_t p_pages; | |
477 | int p_freeze_error_code; | |
478 | int p_pid; | |
479 | int p_leader_pid; | |
480 | char p_name[MAXCOMLEN + 1]; | |
481 | }proc_freezable_status_t; | |
482 | ||
483 | #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */ | |
484 | ||
485 | /* | |
486 | * For coalition based freezing evaluations, we proceed as follows: | |
487 | * - detect that the process is a coalition member and a XPC service | |
488 | * - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN | |
489 | * - continue its freezability evaluation assuming its leader will be freezable too | |
490 | * | |
491 | * Once we are done evaluating all processes, we do a quick run thru all | |
492 | * processes and for a coalition member XPC service we look up the 'freezable' | |
493 | * status of its leader and iff: | |
494 | * - the xpc service is freezable i.e. its individual freeze evaluation worked | |
495 | * - and, its leader is also marked freezable | |
496 | * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS. | |
497 | */ | |
498 | ||
499 | #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN (-1) | |
500 | #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS (1) | |
501 | #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE (2) | |
502 | ||
503 | static int | |
504 | memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval) | |
505 | { | |
506 | uint32_t proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0; | |
507 | global_freezable_status_t *list_head; | |
508 | proc_freezable_status_t *list_entry, *list_entry_start; | |
509 | size_t list_size = 0; | |
510 | proc_t p, leader_proc; | |
511 | memstat_bucket_t *bucket; | |
512 | uint32_t state = 0, pages = 0, entry_count = 0; | |
513 | boolean_t try_freeze = TRUE, xpc_skip_size_probability_check = FALSE; | |
514 | int error = 0, probability_of_use = 0; | |
515 | pid_t leader_pid = 0; | |
516 | ||
517 | ||
518 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) { | |
519 | return ENOTSUP; | |
520 | } | |
521 | ||
522 | list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES); | |
523 | ||
524 | if (buffer_size < list_size) { | |
525 | return EINVAL; | |
526 | } | |
527 | ||
528 | list_head = (global_freezable_status_t*)kalloc(list_size); | |
529 | if (list_head == NULL) { | |
530 | return ENOMEM; | |
531 | } | |
532 | ||
533 | memset(list_head, 0, list_size); | |
534 | ||
535 | list_size = sizeof(global_freezable_status_t); | |
536 | ||
537 | proc_list_lock(); | |
538 | ||
539 | uint64_t curr_time = mach_absolute_time(); | |
540 | ||
541 | list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold); | |
542 | list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold); | |
543 | list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts); | |
544 | ||
545 | list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t)); | |
546 | list_entry = list_entry_start; | |
547 | ||
548 | bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; | |
549 | ||
550 | entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t)); | |
551 | ||
552 | p = memorystatus_get_first_proc_locked(&band, FALSE); | |
553 | proc_count++; | |
554 | ||
555 | while ((proc_count <= MAX_FREEZABLE_PROCESSES) && | |
556 | (p) && | |
557 | (list_size < buffer_size)) { | |
558 | if (isSysProc(p)) { | |
559 | /* | |
560 | * Daemon:- We will consider freezing it iff: | |
561 | * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation) | |
562 | * - its role in the coalition is XPC service. | |
563 | * | |
564 | * We skip memory size requirements in this case. | |
565 | */ | |
566 | ||
567 | coalition_t coal = COALITION_NULL; | |
568 | task_t leader_task = NULL, curr_task = NULL; | |
569 | int task_role_in_coalition = 0; | |
570 | ||
571 | curr_task = proc_task(p); | |
572 | coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM); | |
573 | ||
574 | if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) { | |
575 | /* | |
576 | * By default, XPC services without an app | |
577 | * will be the leader of their own single-member | |
578 | * coalition. | |
579 | */ | |
580 | goto skip_ineligible_xpc; | |
581 | } | |
582 | ||
583 | leader_task = coalition_get_leader(coal); | |
584 | if (leader_task == TASK_NULL) { | |
585 | /* | |
586 | * This jetsam coalition is currently leader-less. | |
587 | * This could happen if the app died, but XPC services | |
588 | * have not yet exited. | |
589 | */ | |
590 | goto skip_ineligible_xpc; | |
591 | } | |
592 | ||
593 | leader_proc = (proc_t)get_bsdtask_info(leader_task); | |
594 | task_deallocate(leader_task); | |
595 | ||
596 | if (leader_proc == PROC_NULL) { | |
597 | /* leader task is exiting */ | |
598 | goto skip_ineligible_xpc; | |
599 | } | |
600 | ||
601 | task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task); | |
602 | ||
603 | if (task_role_in_coalition == COALITION_TASKROLE_XPC) { | |
604 | xpc_skip_size_probability_check = TRUE; | |
605 | leader_pid = leader_proc->p_pid; | |
606 | goto continue_eval; | |
607 | } | |
608 | ||
609 | skip_ineligible_xpc: | |
610 | p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
611 | proc_count++; | |
612 | continue; | |
613 | } | |
614 | ||
615 | continue_eval: | |
616 | strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1); | |
617 | ||
618 | list_entry->p_pid = p->p_pid; | |
619 | ||
620 | state = p->p_memstat_state; | |
621 | ||
622 | if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) || | |
623 | !(state & P_MEMSTAT_SUSPENDED)) { | |
624 | try_freeze = list_entry->freeze_has_memstat_state = FALSE; | |
625 | } else { | |
626 | try_freeze = list_entry->freeze_has_memstat_state = TRUE; | |
627 | } | |
628 | ||
629 | list_entry->p_memstat_state = state; | |
630 | ||
631 | if (xpc_skip_size_probability_check == TRUE) { | |
632 | /* | |
633 | * Assuming the coalition leader is freezable | |
634 | * we don't care re. minimum pages and probability | |
635 | * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED. | |
636 | * XPC services have to be explicity opted-out of the disabled | |
637 | * state. And we checked that state above. | |
638 | */ | |
639 | list_entry->freeze_has_pages_min = TRUE; | |
640 | list_entry->p_pages = -1; | |
641 | list_entry->freeze_has_probability = -1; | |
642 | ||
643 | list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN; | |
644 | list_entry->p_leader_pid = leader_pid; | |
645 | ||
646 | xpc_skip_size_probability_check = FALSE; | |
647 | } else { | |
648 | list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */ | |
649 | list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */ | |
650 | ||
651 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL); | |
652 | if (pages < memorystatus_freeze_pages_min) { | |
653 | try_freeze = list_entry->freeze_has_pages_min = FALSE; | |
654 | } else { | |
655 | list_entry->freeze_has_pages_min = TRUE; | |
656 | } | |
657 | ||
658 | list_entry->p_pages = pages; | |
659 | ||
660 | if (entry_count) { | |
661 | uint32_t j = 0; | |
662 | for (j = 0; j < entry_count; j++) { | |
663 | if (strncmp(memorystatus_global_probabilities_table[j].proc_name, | |
664 | p->p_name, | |
665 | MAXCOMLEN + 1) == 0) { | |
666 | probability_of_use = memorystatus_global_probabilities_table[j].use_probability; | |
667 | break; | |
668 | } | |
669 | } | |
670 | ||
671 | list_entry->freeze_has_probability = probability_of_use; | |
672 | ||
673 | try_freeze = ((probability_of_use > 0) && try_freeze); | |
674 | } else { | |
675 | list_entry->freeze_has_probability = -1; | |
676 | } | |
677 | } | |
678 | ||
679 | if (try_freeze) { | |
680 | uint32_t purgeable, wired, clean, dirty, shared; | |
681 | uint32_t max_pages = 0; | |
682 | int freezer_error_code = 0; | |
683 | ||
684 | error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */); | |
685 | ||
686 | if (error) { | |
687 | list_entry->p_freeze_error_code = freezer_error_code; | |
688 | } | |
689 | ||
690 | list_entry->freeze_attempted = TRUE; | |
691 | } | |
692 | ||
693 | list_entry++; | |
694 | freeze_eligible_proc_considered++; | |
695 | ||
696 | list_size += sizeof(proc_freezable_status_t); | |
697 | ||
698 | p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
699 | proc_count++; | |
700 | } | |
701 | ||
702 | proc_list_unlock(); | |
703 | ||
704 | list_entry = list_entry_start; | |
705 | ||
706 | for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) { | |
707 | if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) { | |
708 | leader_pid = list_entry[xpc_index].p_leader_pid; | |
709 | ||
710 | leader_proc = proc_find(leader_pid); | |
711 | ||
712 | if (leader_proc) { | |
713 | if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) { | |
714 | /* | |
715 | * Leader has already been frozen. | |
716 | */ | |
717 | list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; | |
718 | proc_rele(leader_proc); | |
719 | continue; | |
720 | } | |
721 | proc_rele(leader_proc); | |
722 | } | |
723 | ||
724 | for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) { | |
725 | if (list_entry[leader_index].p_pid == leader_pid) { | |
726 | if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) { | |
727 | list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; | |
728 | } else { | |
729 | list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE; | |
730 | list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC; | |
731 | } | |
732 | break; | |
733 | } | |
734 | } | |
735 | ||
736 | /* | |
737 | * Didn't find the leader entry. This might be likely because | |
738 | * the leader never made it down to band 0. | |
739 | */ | |
740 | if (leader_index == freeze_eligible_proc_considered) { | |
741 | list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE; | |
742 | list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC; | |
743 | } | |
744 | } | |
745 | } | |
746 | ||
747 | buffer_size = list_size; | |
748 | ||
749 | error = copyout(list_head, buffer, buffer_size); | |
750 | if (error == 0) { | |
751 | *retval = buffer_size; | |
752 | } else { | |
753 | *retval = 0; | |
754 | } | |
755 | ||
756 | list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES); | |
757 | kfree(list_head, list_size); | |
758 | ||
759 | MEMORYSTATUS_DEBUG(1, "memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)*list_size); | |
760 | ||
761 | return error; | |
762 | } | |
763 | ||
764 | int | |
765 | memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval) | |
766 | { | |
767 | int err = ENOTSUP; | |
768 | ||
769 | if (flags == FREEZER_CONTROL_GET_STATUS) { | |
770 | err = memorystatus_freezer_get_status(buffer, buffer_size, retval); | |
771 | } | |
772 | ||
773 | return err; | |
774 | } | |
775 | ||
776 | #endif /* DEVELOPMENT || DEBUG */ | |
777 | ||
778 | extern void vm_swap_consider_defragmenting(int); | |
779 | extern boolean_t memorystatus_kill_elevated_process(uint32_t, os_reason_t, unsigned int, int, uint32_t *, uint64_t *); | |
780 | ||
781 | /* | |
782 | * This routine will _jetsam_ all frozen processes | |
783 | * and reclaim the swap space immediately. | |
784 | * | |
785 | * So freeze has to be DISABLED when we call this routine. | |
786 | */ | |
787 | ||
788 | void | |
789 | memorystatus_disable_freeze(void) | |
790 | { | |
791 | memstat_bucket_t *bucket; | |
792 | int bucket_count = 0, retries = 0; | |
793 | boolean_t retval = FALSE, killed = FALSE; | |
794 | uint32_t errors = 0, errors_over_prev_iteration = 0; | |
795 | os_reason_t jetsam_reason = 0; | |
796 | unsigned int band = 0; | |
797 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
798 | uint64_t memory_reclaimed = 0, footprint = 0; | |
799 | ||
800 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START, | |
801 | memorystatus_available_pages, 0, 0, 0, 0); | |
802 | ||
803 | assert(memorystatus_freeze_enabled == FALSE); | |
804 | ||
805 | jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE); | |
806 | if (jetsam_reason == OS_REASON_NULL) { | |
807 | printf("memorystatus_disable_freeze: failed to allocate jetsam reason\n"); | |
808 | } | |
809 | ||
810 | /* | |
811 | * Let's relocate all frozen processes into band 8. Demoted frozen processes | |
812 | * are sitting in band 0 currently and it's possible to have a frozen process | |
813 | * in the FG band being actively used. We don't reset its frozen state when | |
814 | * it is resumed because it has state on disk. | |
815 | * | |
816 | * We choose to do this relocation rather than implement a new 'kill frozen' | |
817 | * process function for these reasons: | |
818 | * - duplication of code: too many kill functions exist and we need to rework them better. | |
819 | * - disk-space-shortage kills are rare | |
820 | * - not having the 'real' jetsam band at time of the this frozen kill won't preclude us | |
821 | * from answering any imp. questions re. jetsam policy/effectiveness. | |
822 | * | |
823 | * This is essentially what memorystatus_update_inactive_jetsam_priority_band() does while | |
824 | * avoiding the application of memory limits. | |
825 | */ | |
826 | ||
827 | again: | |
828 | proc_list_lock(); | |
829 | ||
830 | band = JETSAM_PRIORITY_IDLE; | |
831 | p = PROC_NULL; | |
832 | next_p = PROC_NULL; | |
833 | ||
834 | next_p = memorystatus_get_first_proc_locked(&band, TRUE); | |
835 | while (next_p) { | |
836 | p = next_p; | |
837 | next_p = memorystatus_get_next_proc_locked(&band, p, TRUE); | |
838 | ||
839 | if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) { | |
840 | break; | |
841 | } | |
842 | ||
843 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) { | |
844 | continue; | |
845 | } | |
846 | ||
847 | if (p->p_memstat_state & P_MEMSTAT_ERROR) { | |
848 | p->p_memstat_state &= ~P_MEMSTAT_ERROR; | |
849 | } | |
850 | ||
851 | if (p->p_memstat_effectivepriority == memorystatus_freeze_jetsam_band) { | |
852 | continue; | |
853 | } | |
854 | ||
855 | /* | |
856 | * We explicitly add this flag here so the process looks like a normal | |
857 | * frozen process i.e. P_MEMSTAT_FROZEN and P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND. | |
858 | * We don't bother with assigning the 'active' memory | |
859 | * limits at this point because we are going to be killing it soon below. | |
860 | */ | |
861 | p->p_memstat_state |= P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND; | |
862 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
863 | ||
864 | memorystatus_update_priority_locked(p, memorystatus_freeze_jetsam_band, FALSE, TRUE); | |
865 | } | |
866 | ||
867 | bucket = &memstat_bucket[memorystatus_freeze_jetsam_band]; | |
868 | bucket_count = bucket->count; | |
869 | proc_list_unlock(); | |
870 | ||
871 | /* | |
872 | * Bucket count is already stale at this point. But, we don't expect | |
873 | * freezing to continue since we have already disabled the freeze functionality. | |
874 | * However, an existing freeze might be in progress. So we might miss that process | |
875 | * in the first go-around. We hope to catch it in the next. | |
876 | */ | |
877 | ||
878 | errors_over_prev_iteration = 0; | |
879 | while (bucket_count) { | |
880 | bucket_count--; | |
881 | ||
882 | /* | |
883 | * memorystatus_kill_elevated_process() drops a reference, | |
884 | * so take another one so we can continue to use this exit reason | |
885 | * even after it returns. | |
886 | */ | |
887 | ||
888 | os_reason_ref(jetsam_reason); | |
889 | retval = memorystatus_kill_elevated_process( | |
890 | kMemorystatusKilledDiskSpaceShortage, | |
891 | jetsam_reason, | |
892 | memorystatus_freeze_jetsam_band, | |
893 | 0, /* the iteration of aggressive jetsam..ignored here */ | |
894 | &errors, | |
895 | &footprint); | |
896 | ||
897 | if (errors > 0) { | |
898 | printf("memorystatus_disable_freeze: memorystatus_kill_elevated_process returned %d error(s)\n", errors); | |
899 | errors_over_prev_iteration += errors; | |
900 | errors = 0; | |
901 | } | |
902 | ||
903 | if (retval == 0) { | |
904 | /* | |
905 | * No frozen processes left to kill. | |
906 | */ | |
907 | break; | |
908 | } | |
909 | ||
910 | killed = TRUE; | |
911 | memory_reclaimed += footprint; | |
912 | } | |
913 | ||
914 | proc_list_lock(); | |
915 | ||
916 | if (memorystatus_frozen_count) { | |
917 | /* | |
918 | * A frozen process snuck in and so | |
919 | * go back around to kill it. That | |
920 | * process may have been resumed and | |
921 | * put into the FG band too. So we | |
922 | * have to do the relocation again. | |
923 | */ | |
924 | assert(memorystatus_freeze_enabled == FALSE); | |
925 | ||
926 | retries++; | |
927 | if (retries < 3) { | |
928 | proc_list_unlock(); | |
929 | goto again; | |
930 | } | |
931 | #if DEVELOPMENT || DEBUG | |
932 | panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d, errors = %d", | |
933 | memorystatus_frozen_count, errors_over_prev_iteration); | |
934 | #endif /* DEVELOPMENT || DEBUG */ | |
935 | } | |
936 | proc_list_unlock(); | |
937 | ||
938 | os_reason_free(jetsam_reason); | |
939 | ||
940 | if (killed) { | |
941 | vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM); | |
942 | ||
943 | proc_list_lock(); | |
944 | size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + | |
945 | sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count); | |
946 | uint64_t timestamp_now = mach_absolute_time(); | |
947 | memorystatus_jetsam_snapshot->notification_time = timestamp_now; | |
948 | memorystatus_jetsam_snapshot->js_gencount++; | |
949 | if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 || | |
950 | timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) { | |
951 | proc_list_unlock(); | |
952 | int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); | |
953 | if (!ret) { | |
954 | proc_list_lock(); | |
955 | memorystatus_jetsam_snapshot_last_timestamp = timestamp_now; | |
956 | proc_list_unlock(); | |
957 | } | |
958 | } else { | |
959 | proc_list_unlock(); | |
960 | } | |
961 | } | |
962 | ||
963 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END, | |
964 | memorystatus_available_pages, memory_reclaimed, 0, 0, 0); | |
965 | ||
966 | return; | |
967 | } | |
968 | ||
969 | __private_extern__ void | |
970 | memorystatus_freeze_init(void) | |
971 | { | |
972 | kern_return_t result; | |
973 | thread_t thread; | |
974 | ||
975 | freezer_lck_grp_attr = lck_grp_attr_alloc_init(); | |
976 | freezer_lck_grp = lck_grp_alloc_init("freezer", freezer_lck_grp_attr); | |
977 | ||
978 | lck_mtx_init(&freezer_mutex, freezer_lck_grp, NULL); | |
979 | ||
980 | /* | |
981 | * This is just the default value if the underlying | |
982 | * storage device doesn't have any specific budget. | |
983 | * We check with the storage layer in memorystatus_freeze_update_throttle() | |
984 | * before we start our freezing the first time. | |
985 | */ | |
986 | memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE; | |
987 | ||
988 | result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread); | |
989 | if (result == KERN_SUCCESS) { | |
990 | proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); | |
991 | proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE); | |
992 | thread_set_thread_name(thread, "VM_freezer"); | |
993 | ||
994 | thread_deallocate(thread); | |
995 | } else { | |
996 | panic("Could not create memorystatus_freeze_thread"); | |
997 | } | |
998 | } | |
999 | ||
1000 | static boolean_t | |
1001 | memorystatus_is_process_eligible_for_freeze(proc_t p) | |
1002 | { | |
1003 | /* | |
1004 | * Called with proc_list_lock held. | |
1005 | */ | |
1006 | ||
1007 | LCK_MTX_ASSERT(proc_list_mlock, LCK_MTX_ASSERT_OWNED); | |
1008 | ||
1009 | boolean_t should_freeze = FALSE; | |
1010 | uint32_t state = 0, entry_count = 0, pages = 0, i = 0; | |
1011 | int probability_of_use = 0; | |
1012 | ||
1013 | state = p->p_memstat_state; | |
1014 | ||
1015 | if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) { | |
1016 | goto out; | |
1017 | } | |
1018 | ||
1019 | if (isSysProc(p)) { | |
1020 | /* | |
1021 | * Daemon:- We consider freezing it if: | |
1022 | * - it belongs to a coalition and the leader is frozen, and, | |
1023 | * - its role in the coalition is XPC service. | |
1024 | * | |
1025 | * We skip memory size requirements in this case. | |
1026 | */ | |
1027 | ||
1028 | coalition_t coal = COALITION_NULL; | |
1029 | task_t leader_task = NULL, curr_task = NULL; | |
1030 | proc_t leader_proc = NULL; | |
1031 | int task_role_in_coalition = 0; | |
1032 | ||
1033 | curr_task = proc_task(p); | |
1034 | coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM); | |
1035 | ||
1036 | if (coal == NULL || coalition_is_leader(curr_task, coal)) { | |
1037 | /* | |
1038 | * By default, XPC services without an app | |
1039 | * will be the leader of their own single-member | |
1040 | * coalition. | |
1041 | */ | |
1042 | goto out; | |
1043 | } | |
1044 | ||
1045 | leader_task = coalition_get_leader(coal); | |
1046 | if (leader_task == TASK_NULL) { | |
1047 | /* | |
1048 | * This jetsam coalition is currently leader-less. | |
1049 | * This could happen if the app died, but XPC services | |
1050 | * have not yet exited. | |
1051 | */ | |
1052 | goto out; | |
1053 | } | |
1054 | ||
1055 | leader_proc = (proc_t)get_bsdtask_info(leader_task); | |
1056 | task_deallocate(leader_task); | |
1057 | ||
1058 | if (leader_proc == PROC_NULL) { | |
1059 | /* leader task is exiting */ | |
1060 | goto out; | |
1061 | } | |
1062 | ||
1063 | if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) { | |
1064 | goto out; | |
1065 | } | |
1066 | ||
1067 | task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task); | |
1068 | ||
1069 | if (task_role_in_coalition == COALITION_TASKROLE_XPC) { | |
1070 | should_freeze = TRUE; | |
1071 | } | |
1072 | ||
1073 | goto out; | |
1074 | } else { | |
1075 | /* | |
1076 | * Application. In addition to the above states we need to make | |
1077 | * sure we only consider suspended applications for freezing. | |
1078 | */ | |
1079 | if (!(state & P_MEMSTAT_SUSPENDED)) { | |
1080 | goto out; | |
1081 | } | |
1082 | } | |
1083 | ||
1084 | ||
1085 | /* Only freeze applications meeting our minimum resident page criteria */ | |
1086 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL); | |
1087 | if (pages < memorystatus_freeze_pages_min) { | |
1088 | goto out; | |
1089 | } | |
1090 | ||
1091 | /* Don't freeze processes that are already exiting on core. It may have started exiting | |
1092 | * after we chose it for freeze, but before we obtained the proc_list_lock. | |
1093 | * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync. | |
1094 | * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands. | |
1095 | */ | |
1096 | if ((p->p_listflag & P_LIST_EXITED) != 0) { | |
1097 | goto out; | |
1098 | } | |
1099 | ||
1100 | entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t)); | |
1101 | ||
1102 | if (entry_count) { | |
1103 | for (i = 0; i < entry_count; i++) { | |
1104 | if (strncmp(memorystatus_global_probabilities_table[i].proc_name, | |
1105 | p->p_name, | |
1106 | MAXCOMLEN + 1) == 0) { | |
1107 | probability_of_use = memorystatus_global_probabilities_table[i].use_probability; | |
1108 | break; | |
1109 | } | |
1110 | } | |
1111 | ||
1112 | if (probability_of_use == 0) { | |
1113 | goto out; | |
1114 | } | |
1115 | } | |
1116 | ||
1117 | should_freeze = TRUE; | |
1118 | out: | |
1119 | return should_freeze; | |
1120 | } | |
1121 | ||
1122 | /* | |
1123 | * Synchronously freeze the passed proc. Called with a reference to the proc held. | |
1124 | * | |
1125 | * Doesn't deal with: | |
1126 | * - re-freezing because this is called on a specific process and | |
1127 | * not by the freezer thread. If that changes, we'll have to teach it about | |
1128 | * refreezing a frozen process. | |
1129 | * | |
1130 | * - grouped/coalition freezing because we are hoping to deprecate this | |
1131 | * interface as it was used by user-space to freeze particular processes. But | |
1132 | * we have moved away from that approach to having the kernel choose the optimal | |
1133 | * candidates to be frozen. | |
1134 | * | |
1135 | * Returns EINVAL or the value returned by task_freeze(). | |
1136 | */ | |
1137 | int | |
1138 | memorystatus_freeze_process_sync(proc_t p) | |
1139 | { | |
1140 | int ret = EINVAL; | |
1141 | pid_t aPid = 0; | |
1142 | boolean_t memorystatus_freeze_swap_low = FALSE; | |
1143 | int freezer_error_code = 0; | |
1144 | ||
1145 | lck_mtx_lock(&freezer_mutex); | |
1146 | ||
1147 | if (p == NULL) { | |
1148 | printf("memorystatus_freeze_process_sync: Invalid process\n"); | |
1149 | goto exit; | |
1150 | } | |
1151 | ||
1152 | if (memorystatus_freeze_enabled == FALSE) { | |
1153 | printf("memorystatus_freeze_process_sync: Freezing is DISABLED\n"); | |
1154 | goto exit; | |
1155 | } | |
1156 | ||
1157 | if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { | |
1158 | printf("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n"); | |
1159 | goto exit; | |
1160 | } | |
1161 | ||
1162 | memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining); | |
1163 | if (!memorystatus_freeze_budget_pages_remaining) { | |
1164 | printf("memorystatus_freeze_process_sync: exit with NO available budget\n"); | |
1165 | goto exit; | |
1166 | } | |
1167 | ||
1168 | proc_list_lock(); | |
1169 | ||
1170 | if (p != NULL) { | |
1171 | uint32_t purgeable, wired, clean, dirty, shared; | |
1172 | uint32_t max_pages, i; | |
1173 | ||
1174 | aPid = p->p_pid; | |
1175 | ||
1176 | /* Ensure the process is eligible for freezing */ | |
1177 | if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) { | |
1178 | proc_list_unlock(); | |
1179 | goto exit; | |
1180 | } | |
1181 | ||
1182 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1183 | max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining); | |
1184 | } else { | |
1185 | /* | |
1186 | * We only have the compressor without any swap. | |
1187 | */ | |
1188 | max_pages = UINT32_MAX - 1; | |
1189 | } | |
1190 | ||
1191 | /* Mark as locked temporarily to avoid kill */ | |
1192 | p->p_memstat_state |= P_MEMSTAT_LOCKED; | |
1193 | proc_list_unlock(); | |
1194 | ||
1195 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, | |
1196 | memorystatus_available_pages, 0, 0, 0, 0); | |
1197 | ||
1198 | ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */); | |
1199 | ||
1200 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, | |
1201 | memorystatus_available_pages, aPid, 0, 0, 0); | |
1202 | ||
1203 | DTRACE_MEMORYSTATUS6(memorystatus_freeze, proc_t, p, unsigned int, memorystatus_available_pages, boolean_t, purgeable, unsigned int, wired, uint32_t, clean, uint32_t, dirty); | |
1204 | ||
1205 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_process_sync: task_freeze %s for pid %d [%s] - " | |
1206 | "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n", | |
1207 | (ret == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"), | |
1208 | memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared); | |
1209 | ||
1210 | proc_list_lock(); | |
1211 | ||
1212 | if (ret == KERN_SUCCESS) { | |
1213 | memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; | |
1214 | ||
1215 | p->p_memstat_freeze_sharedanon_pages += shared; | |
1216 | ||
1217 | memorystatus_frozen_shared_mb += shared; | |
1218 | ||
1219 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) { | |
1220 | p->p_memstat_state |= P_MEMSTAT_FROZEN; | |
1221 | memorystatus_frozen_count++; | |
1222 | } | |
1223 | ||
1224 | p->p_memstat_frozen_count++; | |
1225 | ||
1226 | /* | |
1227 | * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process | |
1228 | * to its higher jetsam band. | |
1229 | */ | |
1230 | proc_list_unlock(); | |
1231 | ||
1232 | memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); | |
1233 | ||
1234 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1235 | ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, | |
1236 | memorystatus_freeze_jetsam_band, TRUE); | |
1237 | ||
1238 | if (ret) { | |
1239 | printf("Elevating the frozen process failed with %d\n", ret); | |
1240 | /* not fatal */ | |
1241 | ret = 0; | |
1242 | } | |
1243 | ||
1244 | proc_list_lock(); | |
1245 | ||
1246 | /* Update stats */ | |
1247 | for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { | |
1248 | throttle_intervals[i].pageouts += dirty; | |
1249 | } | |
1250 | } else { | |
1251 | proc_list_lock(); | |
1252 | } | |
1253 | ||
1254 | memorystatus_freeze_pageouts += dirty; | |
1255 | ||
1256 | if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) { | |
1257 | /* | |
1258 | * Add some eviction logic here? At some point should we | |
1259 | * jetsam a process to get back its swap space so that we | |
1260 | * can freeze a more eligible process at this moment in time? | |
1261 | */ | |
1262 | } | |
1263 | ||
1264 | memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining); | |
1265 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s] done memorystatus_freeze_budget_pages_remaining %llu froze %u pages", | |
1266 | aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, dirty); | |
1267 | } else { | |
1268 | char reason[128]; | |
1269 | if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) { | |
1270 | strlcpy(reason, "too much shared memory", 128); | |
1271 | } | |
1272 | ||
1273 | if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) { | |
1274 | strlcpy(reason, "low private-shared pages ratio", 128); | |
1275 | } | |
1276 | ||
1277 | if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) { | |
1278 | strlcpy(reason, "no compressor space", 128); | |
1279 | } | |
1280 | ||
1281 | if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) { | |
1282 | strlcpy(reason, "no swap space", 128); | |
1283 | } | |
1284 | ||
1285 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s]...skipped (%s)", | |
1286 | aPid, ((p && *p->p_name) ? p->p_name : "unknown"), reason); | |
1287 | p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE; | |
1288 | } | |
1289 | ||
1290 | p->p_memstat_state &= ~P_MEMSTAT_LOCKED; | |
1291 | wakeup(&p->p_memstat_state); | |
1292 | proc_list_unlock(); | |
1293 | } | |
1294 | ||
1295 | exit: | |
1296 | lck_mtx_unlock(&freezer_mutex); | |
1297 | ||
1298 | return ret; | |
1299 | } | |
1300 | ||
1301 | static int | |
1302 | memorystatus_freeze_top_process(void) | |
1303 | { | |
1304 | pid_t aPid = 0, coal_xpc_pid = 0; | |
1305 | int ret = -1; | |
1306 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
1307 | unsigned int i = 0; | |
1308 | unsigned int band = JETSAM_PRIORITY_IDLE; | |
1309 | boolean_t refreeze_processes = FALSE; | |
1310 | task_t curr_task = NULL; | |
1311 | coalition_t coal = COALITION_NULL; | |
1312 | pid_t pid_list[MAX_XPC_SERVICE_PIDS]; | |
1313 | unsigned int ntasks = 0; | |
1314 | ||
1315 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0); | |
1316 | ||
1317 | proc_list_lock(); | |
1318 | ||
1319 | if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) { | |
1320 | /* | |
1321 | * Freezer is already full but we are here and so let's | |
1322 | * try to refreeze any processes we might have thawed | |
1323 | * in the past and push out their compressed state out. | |
1324 | */ | |
1325 | refreeze_processes = TRUE; | |
1326 | band = (unsigned int) memorystatus_freeze_jetsam_band; | |
1327 | } | |
1328 | ||
1329 | freeze_process: | |
1330 | ||
1331 | next_p = memorystatus_get_first_proc_locked(&band, FALSE); | |
1332 | while (next_p) { | |
1333 | kern_return_t kr; | |
1334 | uint32_t purgeable, wired, clean, dirty, shared; | |
1335 | uint32_t max_pages = 0; | |
1336 | int freezer_error_code = 0; | |
1337 | ||
1338 | p = next_p; | |
1339 | ||
1340 | if (coal == NULL) { | |
1341 | next_p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
1342 | } else { | |
1343 | /* | |
1344 | * We have frozen a coalition leader and now are | |
1345 | * dealing with its XPC services. We get our | |
1346 | * next_p for each XPC service from the pid_list | |
1347 | * acquired after a successful task_freeze call | |
1348 | * on the coalition leader. | |
1349 | */ | |
1350 | ||
1351 | if (ntasks > 0) { | |
1352 | coal_xpc_pid = pid_list[--ntasks]; | |
1353 | next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */); | |
1354 | /* | |
1355 | * We grab a reference when we are about to freeze the process. So, drop | |
1356 | * the reference that proc_findinternal() grabbed for us. | |
1357 | * We also have the proc_list_lock and so this process is stable. | |
1358 | */ | |
1359 | if (next_p) { | |
1360 | proc_rele_locked(next_p); | |
1361 | } | |
1362 | } else { | |
1363 | next_p = NULL; | |
1364 | } | |
1365 | } | |
1366 | ||
1367 | aPid = p->p_pid; | |
1368 | ||
1369 | if (p->p_memstat_effectivepriority != (int32_t) band) { | |
1370 | /* | |
1371 | * We shouldn't be freezing processes outside the | |
1372 | * prescribed band. | |
1373 | */ | |
1374 | break; | |
1375 | } | |
1376 | ||
1377 | /* Ensure the process is eligible for (re-)freezing */ | |
1378 | if (refreeze_processes) { | |
1379 | /* | |
1380 | * Has to have been frozen once before. | |
1381 | */ | |
1382 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) { | |
1383 | continue; | |
1384 | } | |
1385 | ||
1386 | /* | |
1387 | * Has to have been resumed once before. | |
1388 | */ | |
1389 | if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == FALSE) { | |
1390 | continue; | |
1391 | } | |
1392 | ||
1393 | /* | |
1394 | * Not currently being looked at for something. | |
1395 | */ | |
1396 | if (p->p_memstat_state & P_MEMSTAT_LOCKED) { | |
1397 | continue; | |
1398 | } | |
1399 | ||
1400 | /* | |
1401 | * We are going to try and refreeze and so re-evaluate | |
1402 | * the process. We don't want to double count the shared | |
1403 | * memory. So deduct the old snapshot here. | |
1404 | */ | |
1405 | memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages; | |
1406 | p->p_memstat_freeze_sharedanon_pages = 0; | |
1407 | ||
1408 | p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE; | |
1409 | memorystatus_refreeze_eligible_count--; | |
1410 | } else { | |
1411 | if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) { | |
1412 | continue; // with lock held | |
1413 | } | |
1414 | } | |
1415 | ||
1416 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1417 | /* | |
1418 | * Freezer backed by the compressor and swap file(s) | |
1419 | * will hold compressed data. | |
1420 | */ | |
1421 | ||
1422 | max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining); | |
1423 | } else { | |
1424 | /* | |
1425 | * We only have the compressor pool. | |
1426 | */ | |
1427 | max_pages = UINT32_MAX - 1; | |
1428 | } | |
1429 | ||
1430 | /* Mark as locked temporarily to avoid kill */ | |
1431 | p->p_memstat_state |= P_MEMSTAT_LOCKED; | |
1432 | ||
1433 | p = proc_ref_locked(p); | |
1434 | if (!p) { | |
1435 | break; | |
1436 | } | |
1437 | ||
1438 | proc_list_unlock(); | |
1439 | ||
1440 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, | |
1441 | memorystatus_available_pages, 0, 0, 0, 0); | |
1442 | ||
1443 | kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */); | |
1444 | ||
1445 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, | |
1446 | memorystatus_available_pages, aPid, 0, 0, 0); | |
1447 | ||
1448 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - " | |
1449 | "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n", | |
1450 | (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"), | |
1451 | memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared); | |
1452 | ||
1453 | proc_list_lock(); | |
1454 | ||
1455 | /* Success? */ | |
1456 | if (KERN_SUCCESS == kr) { | |
1457 | memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; | |
1458 | ||
1459 | p->p_memstat_freeze_sharedanon_pages += shared; | |
1460 | ||
1461 | memorystatus_frozen_shared_mb += shared; | |
1462 | ||
1463 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) { | |
1464 | p->p_memstat_state |= P_MEMSTAT_FROZEN; | |
1465 | memorystatus_frozen_count++; | |
1466 | } | |
1467 | ||
1468 | p->p_memstat_frozen_count++; | |
1469 | ||
1470 | /* | |
1471 | * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process | |
1472 | * to its higher jetsam band. | |
1473 | */ | |
1474 | proc_list_unlock(); | |
1475 | ||
1476 | memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); | |
1477 | ||
1478 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1479 | ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE); | |
1480 | ||
1481 | if (ret) { | |
1482 | printf("Elevating the frozen process failed with %d\n", ret); | |
1483 | /* not fatal */ | |
1484 | ret = 0; | |
1485 | } | |
1486 | ||
1487 | proc_list_lock(); | |
1488 | ||
1489 | /* Update stats */ | |
1490 | for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { | |
1491 | throttle_intervals[i].pageouts += dirty; | |
1492 | } | |
1493 | } else { | |
1494 | proc_list_lock(); | |
1495 | } | |
1496 | ||
1497 | memorystatus_freeze_pageouts += dirty; | |
1498 | ||
1499 | if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) { | |
1500 | /* | |
1501 | * Add some eviction logic here? At some point should we | |
1502 | * jetsam a process to get back its swap space so that we | |
1503 | * can freeze a more eligible process at this moment in time? | |
1504 | */ | |
1505 | } | |
1506 | ||
1507 | memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining); | |
1508 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n", | |
1509 | refreeze_processes? "re" : "", (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, refreeze_processes? "Re" : "", dirty); | |
1510 | ||
1511 | /* Return KERN_SUCCESS */ | |
1512 | ret = kr; | |
1513 | ||
1514 | /* | |
1515 | * We froze a process successfully. We can stop now | |
1516 | * and see if that helped if this process isn't part | |
1517 | * of a coalition. | |
1518 | * | |
1519 | * Else: | |
1520 | * - if it is a leader, get the list of XPC services | |
1521 | * that need to be frozen. | |
1522 | * - if it is a XPC service whose leader was frozen | |
1523 | * here, continue on to the next XPC service in the list. | |
1524 | */ | |
1525 | ||
1526 | if (coal == NULL) { | |
1527 | curr_task = proc_task(p); | |
1528 | coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM); | |
1529 | if (coalition_is_leader(curr_task, coal)) { | |
1530 | ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC, | |
1531 | COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS); | |
1532 | ||
1533 | if (ntasks > MAX_XPC_SERVICE_PIDS) { | |
1534 | ntasks = MAX_XPC_SERVICE_PIDS; | |
1535 | } | |
1536 | } | |
1537 | ||
1538 | next_p = NULL; | |
1539 | ||
1540 | if (ntasks > 0) { | |
1541 | /* | |
1542 | * Start off with our first next_p in this list. | |
1543 | */ | |
1544 | coal_xpc_pid = pid_list[--ntasks]; | |
1545 | next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */); | |
1546 | ||
1547 | /* | |
1548 | * We grab a reference when we are about to freeze the process. So drop | |
1549 | * the reference that proc_findinternal() grabbed for us. | |
1550 | * We also have the proc_list_lock and so this process is stable. | |
1551 | */ | |
1552 | if (next_p) { | |
1553 | proc_rele_locked(next_p); | |
1554 | } | |
1555 | } | |
1556 | } | |
1557 | ||
1558 | p->p_memstat_state &= ~P_MEMSTAT_LOCKED; | |
1559 | wakeup(&p->p_memstat_state); | |
1560 | proc_rele_locked(p); | |
1561 | ||
1562 | if (coal && next_p) { | |
1563 | continue; | |
1564 | } | |
1565 | ||
1566 | /* | |
1567 | * No coalition leader was frozen. So we don't | |
1568 | * need to evaluate any XPC services. | |
1569 | * | |
1570 | * OR | |
1571 | * | |
1572 | * We have frozen all eligible XPC services for | |
1573 | * the current coalition leader. | |
1574 | * | |
1575 | * Either way, we can break here and see if freezing | |
1576 | * helped. | |
1577 | */ | |
1578 | ||
1579 | break; | |
1580 | } else { | |
1581 | p->p_memstat_state &= ~P_MEMSTAT_LOCKED; | |
1582 | wakeup(&p->p_memstat_state); | |
1583 | ||
1584 | if (refreeze_processes == TRUE) { | |
1585 | if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) || | |
1586 | (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) { | |
1587 | /* | |
1588 | * Keeping this prior-frozen process in this high band when | |
1589 | * we failed to re-freeze it due to bad shared memory usage | |
1590 | * could cause excessive pressure on the lower bands. | |
1591 | * We need to demote it for now. It'll get re-evaluated next | |
1592 | * time because we don't set the P_MEMSTAT_FREEZE_IGNORE | |
1593 | * bit. | |
1594 | */ | |
1595 | ||
1596 | p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND; | |
1597 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1598 | memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE); | |
1599 | } | |
1600 | } else { | |
1601 | p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE; | |
1602 | } | |
1603 | ||
1604 | char reason[128]; | |
1605 | if (freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) { | |
1606 | strlcpy(reason, "too much shared memory", 128); | |
1607 | } | |
1608 | ||
1609 | if (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) { | |
1610 | strlcpy(reason, "low private-shared pages ratio", 128); | |
1611 | } | |
1612 | ||
1613 | if (freezer_error_code == FREEZER_ERROR_NO_COMPRESSOR_SPACE) { | |
1614 | strlcpy(reason, "no compressor space", 128); | |
1615 | } | |
1616 | ||
1617 | if (freezer_error_code == FREEZER_ERROR_NO_SWAP_SPACE) { | |
1618 | strlcpy(reason, "no swap space", 128); | |
1619 | } | |
1620 | ||
1621 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (%s) pid %d [%s]...skipped (%s)\n", | |
1622 | (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), reason); | |
1623 | ||
1624 | proc_rele_locked(p); | |
1625 | ||
1626 | if (vm_compressor_low_on_space() || vm_swap_low_on_space()) { | |
1627 | break; | |
1628 | } | |
1629 | } | |
1630 | } | |
1631 | ||
1632 | if ((ret == -1) && | |
1633 | (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD) && | |
1634 | (refreeze_processes == FALSE)) { | |
1635 | /* | |
1636 | * We failed to freeze a process from the IDLE | |
1637 | * band AND we have some thawed processes | |
1638 | * AND haven't tried refreezing as yet. | |
1639 | * Let's try and re-freeze processes in the | |
1640 | * frozen band that have been resumed in the past | |
1641 | * and so have brought in state from disk. | |
1642 | */ | |
1643 | ||
1644 | band = (unsigned int) memorystatus_freeze_jetsam_band; | |
1645 | ||
1646 | refreeze_processes = TRUE; | |
1647 | ||
1648 | goto freeze_process; | |
1649 | } | |
1650 | ||
1651 | proc_list_unlock(); | |
1652 | ||
1653 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages, aPid, 0, 0, 0); | |
1654 | ||
1655 | return ret; | |
1656 | } | |
1657 | ||
1658 | static inline boolean_t | |
1659 | memorystatus_can_freeze_processes(void) | |
1660 | { | |
1661 | boolean_t ret; | |
1662 | ||
1663 | proc_list_lock(); | |
1664 | ||
1665 | if (memorystatus_suspended_count) { | |
1666 | memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT); | |
1667 | ||
1668 | if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) { | |
1669 | ret = TRUE; | |
1670 | } else { | |
1671 | ret = FALSE; | |
1672 | } | |
1673 | } else { | |
1674 | ret = FALSE; | |
1675 | } | |
1676 | ||
1677 | proc_list_unlock(); | |
1678 | ||
1679 | return ret; | |
1680 | } | |
1681 | ||
1682 | static boolean_t | |
1683 | memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low) | |
1684 | { | |
1685 | boolean_t can_freeze = TRUE; | |
1686 | ||
1687 | /* Only freeze if we're sufficiently low on memory; this holds off freeze right | |
1688 | * after boot, and is generally is a no-op once we've reached steady state. */ | |
1689 | if (memorystatus_available_pages > memorystatus_freeze_threshold) { | |
1690 | return FALSE; | |
1691 | } | |
1692 | ||
1693 | /* Check minimum suspended process threshold. */ | |
1694 | if (!memorystatus_can_freeze_processes()) { | |
1695 | return FALSE; | |
1696 | } | |
1697 | assert(VM_CONFIG_COMPRESSOR_IS_PRESENT); | |
1698 | ||
1699 | if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1700 | /* | |
1701 | * In-core compressor used for freezing WITHOUT on-disk swap support. | |
1702 | */ | |
1703 | if (vm_compressor_low_on_space()) { | |
1704 | if (*memorystatus_freeze_swap_low) { | |
1705 | *memorystatus_freeze_swap_low = TRUE; | |
1706 | } | |
1707 | ||
1708 | can_freeze = FALSE; | |
1709 | } else { | |
1710 | if (*memorystatus_freeze_swap_low) { | |
1711 | *memorystatus_freeze_swap_low = FALSE; | |
1712 | } | |
1713 | ||
1714 | can_freeze = TRUE; | |
1715 | } | |
1716 | } else { | |
1717 | /* | |
1718 | * Freezing WITH on-disk swap support. | |
1719 | * | |
1720 | * In-core compressor fronts the swap. | |
1721 | */ | |
1722 | if (vm_swap_low_on_space()) { | |
1723 | if (*memorystatus_freeze_swap_low) { | |
1724 | *memorystatus_freeze_swap_low = TRUE; | |
1725 | } | |
1726 | ||
1727 | can_freeze = FALSE; | |
1728 | } | |
1729 | } | |
1730 | ||
1731 | return can_freeze; | |
1732 | } | |
1733 | ||
1734 | /* | |
1735 | * This function evaluates if the currently frozen processes deserve | |
1736 | * to stay in the higher jetsam band. There are 2 modes: | |
1737 | * - 'force one == TRUE': (urgent mode) | |
1738 | * We are out of budget and can't refreeze a process. The process's | |
1739 | * state, if it was resumed, will stay in compressed memory. If we let it | |
1740 | * remain up in the higher frozen jetsam band, it'll put a lot of pressure on | |
1741 | * the lower bands. So we force-demote the least-recently-used-and-thawed | |
1742 | * process. | |
1743 | * | |
1744 | * - 'force_one == FALSE': (normal mode) | |
1745 | * If the # of thaws of a process is below our threshold, then we | |
1746 | * will demote that process into the IDLE band. | |
1747 | * We don't immediately kill the process here because it already has | |
1748 | * state on disk and so it might be worth giving it another shot at | |
1749 | * getting thawed/resumed and used. | |
1750 | */ | |
1751 | static void | |
1752 | memorystatus_demote_frozen_processes(boolean_t force_one) | |
1753 | { | |
1754 | unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band; | |
1755 | unsigned int demoted_proc_count = 0; | |
1756 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
1757 | /* We demote to IDLE unless someone has asserted a higher priority on this process. */ | |
1758 | int maxpriority = JETSAM_PRIORITY_IDLE; | |
1759 | ||
1760 | proc_list_lock(); | |
1761 | ||
1762 | if (memorystatus_freeze_enabled == FALSE) { | |
1763 | /* | |
1764 | * Freeze has been disabled likely to | |
1765 | * reclaim swap space. So don't change | |
1766 | * any state on the frozen processes. | |
1767 | */ | |
1768 | proc_list_unlock(); | |
1769 | return; | |
1770 | } | |
1771 | ||
1772 | next_p = memorystatus_get_first_proc_locked(&band, FALSE); | |
1773 | while (next_p) { | |
1774 | p = next_p; | |
1775 | next_p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
1776 | ||
1777 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) { | |
1778 | continue; | |
1779 | } | |
1780 | ||
1781 | if (p->p_memstat_state & P_MEMSTAT_LOCKED) { | |
1782 | continue; | |
1783 | } | |
1784 | ||
1785 | if (force_one == TRUE) { | |
1786 | if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == 0) { | |
1787 | /* | |
1788 | * This process hasn't been thawed recently and so most of | |
1789 | * its state sits on NAND and so we skip it -- jetsamming it | |
1790 | * won't help with memory pressure. | |
1791 | */ | |
1792 | continue; | |
1793 | } | |
1794 | } else { | |
1795 | if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) { | |
1796 | /* | |
1797 | * This process has met / exceeded our thaw count demotion threshold | |
1798 | * and so we let it live in the higher bands. | |
1799 | */ | |
1800 | continue; | |
1801 | } | |
1802 | } | |
1803 | ||
1804 | p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND; | |
1805 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1806 | ||
1807 | maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority); | |
1808 | memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE); | |
1809 | #if DEVELOPMENT || DEBUG | |
1810 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_demote_frozen_process(%s) pid %d [%s]", | |
1811 | (force_one ? "urgent" : "normal"), (p ? p->p_pid : -1), ((p && *p->p_name) ? p->p_name : "unknown")); | |
1812 | #endif /* DEVELOPMENT || DEBUG */ | |
1813 | ||
1814 | /* | |
1815 | * The freezer thread will consider this a normal app to be frozen | |
1816 | * because it is in the IDLE band. So we don't need the | |
1817 | * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed | |
1818 | * we'll correctly count it as eligible for re-freeze again. | |
1819 | * | |
1820 | * We don't drop the frozen count because this process still has | |
1821 | * state on disk. So there's a chance it gets resumed and then it | |
1822 | * should land in the higher jetsam band. For that it needs to | |
1823 | * remain marked frozen. | |
1824 | */ | |
1825 | if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) { | |
1826 | p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE; | |
1827 | memorystatus_refreeze_eligible_count--; | |
1828 | } | |
1829 | ||
1830 | demoted_proc_count++; | |
1831 | ||
1832 | if ((force_one == TRUE) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) { | |
1833 | break; | |
1834 | } | |
1835 | } | |
1836 | ||
1837 | if (force_one == FALSE) { | |
1838 | /* | |
1839 | * We use this counter to track daily thaws. | |
1840 | * So we only reset it to 0 under the normal | |
1841 | * mode. | |
1842 | */ | |
1843 | memorystatus_thaw_count = 0; | |
1844 | } | |
1845 | ||
1846 | proc_list_unlock(); | |
1847 | } | |
1848 | ||
1849 | ||
1850 | /* | |
1851 | * This function will do 4 things: | |
1852 | * | |
1853 | * 1) check to see if we are currently in a degraded freezer mode, and if so: | |
1854 | * - check to see if our window has expired and we should exit this mode, OR, | |
1855 | * - return a budget based on the degraded throttle window's max. pageouts vs current pageouts. | |
1856 | * | |
1857 | * 2) check to see if we are in a NEW normal window and update the normal throttle window's params. | |
1858 | * | |
1859 | * 3) check what the current normal window allows for a budget. | |
1860 | * | |
1861 | * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below | |
1862 | * what we would normally expect, then we are running low on our daily budget and need to enter | |
1863 | * degraded perf. mode. | |
1864 | */ | |
1865 | ||
1866 | static void | |
1867 | memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed) | |
1868 | { | |
1869 | clock_sec_t sec; | |
1870 | clock_nsec_t nsec; | |
1871 | mach_timespec_t ts; | |
1872 | ||
1873 | unsigned int freeze_daily_pageouts_max = 0; | |
1874 | ||
1875 | #if DEVELOPMENT || DEBUG | |
1876 | if (!memorystatus_freeze_throttle_enabled) { | |
1877 | /* | |
1878 | * No throttling...we can use the full budget everytime. | |
1879 | */ | |
1880 | *budget_pages_allowed = UINT64_MAX; | |
1881 | return; | |
1882 | } | |
1883 | #endif | |
1884 | ||
1885 | clock_get_system_nanotime(&sec, &nsec); | |
1886 | ts.tv_sec = sec; | |
1887 | ts.tv_nsec = nsec; | |
1888 | ||
1889 | struct throttle_interval_t *interval = NULL; | |
1890 | ||
1891 | if (memorystatus_freeze_degradation == TRUE) { | |
1892 | interval = degraded_throttle_window; | |
1893 | ||
1894 | if (CMP_MACH_TIMESPEC(&ts, &interval->ts) >= 0) { | |
1895 | memorystatus_freeze_degradation = FALSE; | |
1896 | interval->pageouts = 0; | |
1897 | interval->max_pageouts = 0; | |
1898 | } else { | |
1899 | *budget_pages_allowed = interval->max_pageouts - interval->pageouts; | |
1900 | } | |
1901 | } | |
1902 | ||
1903 | interval = normal_throttle_window; | |
1904 | ||
1905 | if (CMP_MACH_TIMESPEC(&ts, &interval->ts) >= 0) { | |
1906 | /* | |
1907 | * New throttle window. | |
1908 | * Rollover any unused budget. | |
1909 | * Also ask the storage layer what the new budget needs to be. | |
1910 | */ | |
1911 | uint64_t freeze_daily_budget = 0; | |
1912 | unsigned int daily_budget_pageouts = 0; | |
1913 | ||
1914 | if (vm_swap_max_budget(&freeze_daily_budget)) { | |
1915 | memorystatus_freeze_daily_mb_max = (freeze_daily_budget / (1024 * 1024)); | |
1916 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max); | |
1917 | } | |
1918 | ||
1919 | freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE); | |
1920 | ||
1921 | daily_budget_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * freeze_daily_pageouts_max) / NORMAL_WINDOW_MINS)); | |
1922 | interval->max_pageouts = (interval->max_pageouts - interval->pageouts) + daily_budget_pageouts; | |
1923 | ||
1924 | interval->ts.tv_sec = interval->mins * 60; | |
1925 | interval->ts.tv_nsec = 0; | |
1926 | ADD_MACH_TIMESPEC(&interval->ts, &ts); | |
1927 | /* Since we update the throttle stats pre-freeze, adjust for overshoot here */ | |
1928 | if (interval->pageouts > interval->max_pageouts) { | |
1929 | interval->pageouts -= interval->max_pageouts; | |
1930 | } else { | |
1931 | interval->pageouts = 0; | |
1932 | } | |
1933 | *budget_pages_allowed = interval->max_pageouts; | |
1934 | ||
1935 | memorystatus_demote_frozen_processes(FALSE); /* normal mode...don't force a demotion */ | |
1936 | } else { | |
1937 | /* | |
1938 | * Current throttle window. | |
1939 | * Deny freezing if we have no budget left. | |
1940 | * Try graceful degradation if we are within 25% of: | |
1941 | * - the daily budget, and | |
1942 | * - the current budget left is below our normal budget expectations. | |
1943 | */ | |
1944 | ||
1945 | #if DEVELOPMENT || DEBUG | |
1946 | /* | |
1947 | * This can only happen in the INTERNAL configs because we allow modifying the daily budget for testing. | |
1948 | */ | |
1949 | ||
1950 | if (freeze_daily_pageouts_max > interval->max_pageouts) { | |
1951 | /* | |
1952 | * We just bumped the daily budget. Re-evaluate our normal window params. | |
1953 | */ | |
1954 | interval->max_pageouts = (interval->burst_multiple * (((uint64_t)interval->mins * freeze_daily_pageouts_max) / NORMAL_WINDOW_MINS)); | |
1955 | memorystatus_freeze_degradation = FALSE; //we'll re-evaluate this below... | |
1956 | } | |
1957 | #endif /* DEVELOPMENT || DEBUG */ | |
1958 | ||
1959 | if (memorystatus_freeze_degradation == FALSE) { | |
1960 | if (interval->pageouts >= interval->max_pageouts) { | |
1961 | *budget_pages_allowed = 0; | |
1962 | } else { | |
1963 | int budget_left = interval->max_pageouts - interval->pageouts; | |
1964 | int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100; | |
1965 | ||
1966 | mach_timespec_t time_left = {0, 0}; | |
1967 | ||
1968 | time_left.tv_sec = interval->ts.tv_sec; | |
1969 | time_left.tv_nsec = 0; | |
1970 | ||
1971 | SUB_MACH_TIMESPEC(&time_left, &ts); | |
1972 | ||
1973 | if (budget_left <= budget_threshold) { | |
1974 | /* | |
1975 | * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration. | |
1976 | * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full | |
1977 | * daily pageout budget. | |
1978 | */ | |
1979 | ||
1980 | unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS; | |
1981 | unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS; | |
1982 | ||
1983 | /* | |
1984 | * The current rate of pageouts is below what we would expect for | |
1985 | * the normal rate i.e. we have below normal budget left and so... | |
1986 | */ | |
1987 | ||
1988 | if (current_budget_rate_allowed < normal_budget_rate_allowed) { | |
1989 | memorystatus_freeze_degradation = TRUE; | |
1990 | degraded_throttle_window->max_pageouts = current_budget_rate_allowed; | |
1991 | degraded_throttle_window->pageouts = 0; | |
1992 | ||
1993 | /* | |
1994 | * Switch over to the degraded throttle window so the budget | |
1995 | * doled out is based on that window. | |
1996 | */ | |
1997 | interval = degraded_throttle_window; | |
1998 | } | |
1999 | } | |
2000 | ||
2001 | *budget_pages_allowed = interval->max_pageouts - interval->pageouts; | |
2002 | } | |
2003 | } | |
2004 | } | |
2005 | ||
2006 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n", | |
2007 | interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - ts->tv_sec) / 60, | |
2008 | interval->throttle ? "on" : "off"); | |
2009 | } | |
2010 | ||
2011 | static void | |
2012 | memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) | |
2013 | { | |
2014 | static boolean_t memorystatus_freeze_swap_low = FALSE; | |
2015 | ||
2016 | lck_mtx_lock(&freezer_mutex); | |
2017 | ||
2018 | if (memorystatus_freeze_enabled) { | |
2019 | if ((memorystatus_frozen_count < memorystatus_frozen_processes_max) || | |
2020 | (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD)) { | |
2021 | if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { | |
2022 | /* Only freeze if we've not exceeded our pageout budgets.*/ | |
2023 | memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining); | |
2024 | ||
2025 | if (memorystatus_freeze_budget_pages_remaining) { | |
2026 | memorystatus_freeze_top_process(); | |
2027 | } else { | |
2028 | memorystatus_demote_frozen_processes(TRUE); /* urgent mode..force one demotion */ | |
2029 | } | |
2030 | } | |
2031 | } | |
2032 | } | |
2033 | ||
2034 | /* | |
2035 | * We use memorystatus_apps_idle_delay_time because if/when we adopt aging for applications, | |
2036 | * it'll tie neatly into running the freezer once we age an application. | |
2037 | * | |
2038 | * Till then, it serves as a good interval that can be tuned via a sysctl too. | |
2039 | */ | |
2040 | memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + memorystatus_apps_idle_delay_time; | |
2041 | ||
2042 | assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT); | |
2043 | lck_mtx_unlock(&freezer_mutex); | |
2044 | ||
2045 | thread_block((thread_continue_t) memorystatus_freeze_thread); | |
2046 | } | |
2047 | ||
2048 | boolean_t | |
2049 | memorystatus_freeze_thread_should_run(void) | |
2050 | { | |
2051 | /* | |
2052 | * No freezer_mutex held here...see why near call-site | |
2053 | * within memorystatus_pages_update(). | |
2054 | */ | |
2055 | ||
2056 | boolean_t should_run = FALSE; | |
2057 | ||
2058 | if (memorystatus_freeze_enabled == FALSE) { | |
2059 | goto out; | |
2060 | } | |
2061 | ||
2062 | if (memorystatus_available_pages > memorystatus_freeze_threshold) { | |
2063 | goto out; | |
2064 | } | |
2065 | ||
2066 | if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max) && | |
2067 | (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD)) { | |
2068 | goto out; | |
2069 | } | |
2070 | ||
2071 | if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) { | |
2072 | goto out; | |
2073 | } | |
2074 | ||
2075 | uint64_t curr_time = mach_absolute_time(); | |
2076 | ||
2077 | if (curr_time < memorystatus_freezer_thread_next_run_ts) { | |
2078 | goto out; | |
2079 | } | |
2080 | ||
2081 | should_run = TRUE; | |
2082 | ||
2083 | out: | |
2084 | return should_run; | |
2085 | } | |
2086 | ||
2087 | int | |
2088 | memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable) | |
2089 | { | |
2090 | proc_t p = PROC_NULL; | |
2091 | ||
2092 | if (pid == 0) { | |
2093 | return EINVAL; | |
2094 | } | |
2095 | ||
2096 | p = proc_find(pid); | |
2097 | if (!p) { | |
2098 | return ESRCH; | |
2099 | } | |
2100 | ||
2101 | /* | |
2102 | * Only allow this on the current proc for now. | |
2103 | * We can check for privileges and allow targeting another process in the future. | |
2104 | */ | |
2105 | if (p != current_proc()) { | |
2106 | proc_rele(p); | |
2107 | return EPERM; | |
2108 | } | |
2109 | ||
2110 | proc_list_lock(); | |
2111 | *is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1); | |
2112 | proc_rele_locked(p); | |
2113 | proc_list_unlock(); | |
2114 | ||
2115 | return 0; | |
2116 | } | |
2117 | ||
2118 | int | |
2119 | memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable) | |
2120 | { | |
2121 | proc_t p = PROC_NULL; | |
2122 | ||
2123 | if (pid == 0) { | |
2124 | return EINVAL; | |
2125 | } | |
2126 | ||
2127 | /* | |
2128 | * To enable freezable status, you need to be root or an entitlement. | |
2129 | */ | |
2130 | if (is_freezable && | |
2131 | !kauth_cred_issuser(kauth_cred_get()) && | |
2132 | !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) { | |
2133 | return EPERM; | |
2134 | } | |
2135 | ||
2136 | p = proc_find(pid); | |
2137 | if (!p) { | |
2138 | return ESRCH; | |
2139 | } | |
2140 | ||
2141 | /* | |
2142 | * A process can change its own status. A coalition leader can | |
2143 | * change the status of coalition members. | |
2144 | */ | |
2145 | if (p != current_proc()) { | |
2146 | coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM); | |
2147 | if (!coalition_is_leader(proc_task(current_proc()), coal)) { | |
2148 | proc_rele(p); | |
2149 | return EPERM; | |
2150 | } | |
2151 | } | |
2152 | ||
2153 | proc_list_lock(); | |
2154 | if (is_freezable == FALSE) { | |
2155 | /* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */ | |
2156 | p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED; | |
2157 | printf("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n", | |
2158 | p->p_pid, (*p->p_name ? p->p_name : "unknown")); | |
2159 | } else { | |
2160 | p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED; | |
2161 | printf("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n", | |
2162 | p->p_pid, (*p->p_name ? p->p_name : "unknown")); | |
2163 | } | |
2164 | proc_rele_locked(p); | |
2165 | proc_list_unlock(); | |
2166 | ||
2167 | return 0; | |
2168 | } | |
2169 | ||
2170 | static int | |
2171 | sysctl_memorystatus_do_fastwake_warmup_all SYSCTL_HANDLER_ARGS | |
2172 | { | |
2173 | #pragma unused(oidp, arg1, arg2) | |
2174 | ||
2175 | if (!req->newptr) { | |
2176 | return EINVAL; | |
2177 | } | |
2178 | ||
2179 | /* Need to be root or have entitlement */ | |
2180 | if (!kauth_cred_issuser(kauth_cred_get()) && !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) { | |
2181 | return EPERM; | |
2182 | } | |
2183 | ||
2184 | if (memorystatus_freeze_enabled == FALSE) { | |
2185 | return ENOTSUP; | |
2186 | } | |
2187 | ||
2188 | do_fastwake_warmup_all(); | |
2189 | ||
2190 | return 0; | |
2191 | } | |
2192 | ||
2193 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, | |
2194 | 0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", ""); | |
2195 | ||
2196 | #endif /* CONFIG_FREEZE */ |