]>
Commit | Line | Data |
---|---|---|
cb323159 A |
1 | /* |
2 | * Copyright (c) 2006-2018 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | * | |
28 | */ | |
29 | ||
30 | #include <kern/sched_prim.h> | |
31 | #include <kern/kalloc.h> | |
32 | #include <kern/assert.h> | |
33 | #include <kern/debug.h> | |
34 | #include <kern/locks.h> | |
35 | #include <kern/task.h> | |
36 | #include <kern/thread.h> | |
37 | #include <kern/host.h> | |
38 | #include <kern/policy_internal.h> | |
39 | #include <kern/thread_group.h> | |
40 | ||
cb323159 A |
41 | #include <libkern/libkern.h> |
42 | #include <mach/coalition.h> | |
43 | #include <mach/mach_time.h> | |
44 | #include <mach/task.h> | |
45 | #include <mach/host_priv.h> | |
46 | #include <mach/mach_host.h> | |
47 | #include <os/log.h> | |
48 | #include <pexpert/pexpert.h> | |
49 | #include <sys/coalition.h> | |
50 | #include <sys/kern_event.h> | |
51 | #include <sys/proc.h> | |
52 | #include <sys/proc_info.h> | |
53 | #include <sys/reason.h> | |
54 | #include <sys/signal.h> | |
55 | #include <sys/signalvar.h> | |
56 | #include <sys/sysctl.h> | |
57 | #include <sys/sysproto.h> | |
58 | #include <sys/wait.h> | |
59 | #include <sys/tree.h> | |
60 | #include <sys/priv.h> | |
61 | #include <vm/vm_pageout.h> | |
62 | #include <vm/vm_protos.h> | |
63 | #include <mach/machine/sdt.h> | |
64 | #include <libkern/section_keywords.h> | |
65 | #include <stdatomic.h> | |
66 | ||
f427ee49 A |
67 | #include <IOKit/IOBSD.h> |
68 | ||
cb323159 A |
69 | #if CONFIG_FREEZE |
70 | #include <vm/vm_map.h> | |
71 | #endif /* CONFIG_FREEZE */ | |
72 | ||
73 | #include <sys/kern_memorystatus.h> | |
74 | #include <sys/kern_memorystatus_freeze.h> | |
75 | #include <sys/kern_memorystatus_notify.h> | |
76 | ||
77 | #if CONFIG_JETSAM | |
78 | ||
79 | extern unsigned int memorystatus_available_pages; | |
80 | extern unsigned int memorystatus_available_pages_pressure; | |
81 | extern unsigned int memorystatus_available_pages_critical; | |
82 | extern unsigned int memorystatus_available_pages_critical_base; | |
83 | extern unsigned int memorystatus_available_pages_critical_idle_offset; | |
84 | ||
85 | #else /* CONFIG_JETSAM */ | |
86 | ||
87 | extern uint64_t memorystatus_available_pages; | |
88 | extern uint64_t memorystatus_available_pages_pressure; | |
89 | extern uint64_t memorystatus_available_pages_critical; | |
90 | ||
91 | #endif /* CONFIG_JETSAM */ | |
92 | ||
93 | unsigned int memorystatus_frozen_count = 0; | |
94 | unsigned int memorystatus_suspended_count = 0; | |
95 | unsigned long freeze_threshold_percentage = 50; | |
96 | ||
97 | #if CONFIG_FREEZE | |
98 | ||
c3c9b80d A |
99 | static LCK_GRP_DECLARE(freezer_lck_grp, "freezer"); |
100 | static LCK_MTX_DECLARE(freezer_mutex, &freezer_lck_grp); | |
cb323159 A |
101 | |
102 | /* Thresholds */ | |
103 | unsigned int memorystatus_freeze_threshold = 0; | |
104 | unsigned int memorystatus_freeze_pages_min = 0; | |
105 | unsigned int memorystatus_freeze_pages_max = 0; | |
106 | unsigned int memorystatus_freeze_suspended_threshold = FREEZE_SUSPENDED_THRESHOLD_DEFAULT; | |
107 | unsigned int memorystatus_freeze_daily_mb_max = FREEZE_DAILY_MB_MAX_DEFAULT; | |
2a1bd2d3 A |
108 | uint64_t memorystatus_freeze_budget_pages_remaining = 0; /* Remaining # of pages that can be frozen to disk */ |
109 | boolean_t memorystatus_freeze_degradation = FALSE; /* Protected by the freezer mutex. Signals we are in a degraded freeze mode. */ | |
cb323159 A |
110 | |
111 | unsigned int memorystatus_max_frozen_demotions_daily = 0; | |
112 | unsigned int memorystatus_thaw_count_demotion_threshold = 0; | |
113 | ||
114 | boolean_t memorystatus_freeze_enabled = FALSE; | |
115 | int memorystatus_freeze_wakeup = 0; | |
116 | int memorystatus_freeze_jetsam_band = 0; /* the jetsam band which will contain P_MEMSTAT_FROZEN processes */ | |
117 | ||
118 | #define MAX_XPC_SERVICE_PIDS 10 /* Max. # of XPC services per coalition we'll consider freezing. */ | |
119 | ||
120 | #ifdef XNU_KERNEL_PRIVATE | |
121 | ||
122 | unsigned int memorystatus_frozen_processes_max = 0; | |
123 | unsigned int memorystatus_frozen_shared_mb = 0; | |
124 | unsigned int memorystatus_frozen_shared_mb_max = 0; | |
125 | unsigned int memorystatus_freeze_shared_mb_per_process_max = 0; /* Max. MB allowed per process to be freezer-eligible. */ | |
126 | unsigned int memorystatus_freeze_private_shared_pages_ratio = 2; /* Ratio of private:shared pages for a process to be freezer-eligible. */ | |
f427ee49 A |
127 | unsigned int memorystatus_thaw_count = 0; /* # of thaws in the current freezer interval */ |
128 | uint64_t memorystatus_thaw_count_since_boot = 0; /* The number of thaws since boot */ | |
cb323159 A |
129 | unsigned int memorystatus_refreeze_eligible_count = 0; /* # of processes currently thawed i.e. have state on disk & in-memory */ |
130 | ||
c3c9b80d | 131 | struct memorystatus_freezer_stats_t memorystatus_freezer_stats = {0}; |
ea3f0419 | 132 | |
cb323159 A |
133 | #endif /* XNU_KERNEL_PRIVATE */ |
134 | ||
135 | static inline boolean_t memorystatus_can_freeze_processes(void); | |
136 | static boolean_t memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low); | |
137 | static boolean_t memorystatus_is_process_eligible_for_freeze(proc_t p); | |
138 | static void memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused); | |
f427ee49 | 139 | static void memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts); |
cb323159 A |
140 | |
141 | void memorystatus_disable_freeze(void); | |
142 | ||
143 | /* Stats */ | |
144 | static uint64_t memorystatus_freeze_pageouts = 0; | |
145 | ||
146 | /* Throttling */ | |
147 | #define DEGRADED_WINDOW_MINS (30) | |
148 | #define NORMAL_WINDOW_MINS (24 * 60) | |
149 | ||
ea3f0419 | 150 | /* Protected by the freezer_mutex */ |
cb323159 A |
151 | static throttle_interval_t throttle_intervals[] = { |
152 | { DEGRADED_WINDOW_MINS, 1, 0, 0, { 0, 0 }}, | |
153 | { NORMAL_WINDOW_MINS, 1, 0, 0, { 0, 0 }}, | |
154 | }; | |
155 | throttle_interval_t *degraded_throttle_window = &throttle_intervals[0]; | |
156 | throttle_interval_t *normal_throttle_window = &throttle_intervals[1]; | |
c3c9b80d | 157 | uint32_t memorystatus_freeze_current_interval = 0; |
cb323159 A |
158 | |
159 | extern uint64_t vm_swap_get_free_space(void); | |
160 | extern boolean_t vm_swap_max_budget(uint64_t *); | |
161 | extern int i_coal_jetsam_get_taskrole(coalition_t coal, task_t task); | |
162 | ||
163 | static void memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed); | |
164 | static void memorystatus_demote_frozen_processes(boolean_t force_one); | |
165 | ||
2a1bd2d3 A |
166 | static void memorystatus_freeze_handle_error(proc_t p, const int freezer_error_code, bool was_refreeze, pid_t pid, const coalition_t coalition, const char* log_prefix); |
167 | static void memorystatus_freeze_out_of_slots(void); | |
cb323159 A |
168 | static uint64_t memorystatus_freezer_thread_next_run_ts = 0; |
169 | ||
170 | /* Sysctls needed for aggd stats */ | |
171 | ||
172 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_count, 0, ""); | |
173 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_thaw_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count, 0, ""); | |
f427ee49 | 174 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_thaw_count_since_boot, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_thaw_count_since_boot, ""); |
cb323159 | 175 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_pageouts, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_pageouts, ""); |
c3c9b80d | 176 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_interval, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_current_interval, 0, ""); |
f427ee49 A |
177 | #if DEVELOPMENT || DEBUG |
178 | static int sysctl_memorystatus_freeze_budget_pages_remaining SYSCTL_HANDLER_ARGS | |
179 | { | |
180 | #pragma unused(arg1, arg2, oidp) | |
181 | int error, changed; | |
182 | uint64_t new_budget = memorystatus_freeze_budget_pages_remaining; | |
183 | mach_timespec_t now_ts; | |
184 | clock_sec_t sec; | |
185 | clock_nsec_t nsec; | |
186 | ||
187 | lck_mtx_lock(&freezer_mutex); | |
188 | ||
189 | error = sysctl_io_number(req, memorystatus_freeze_budget_pages_remaining, sizeof(uint64_t), &new_budget, &changed); | |
190 | if (changed) { | |
191 | /* Start a new interval with this budget. */ | |
192 | clock_get_system_nanotime(&sec, &nsec); | |
193 | now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX)); | |
194 | now_ts.tv_nsec = nsec; | |
195 | memorystatus_freeze_start_normal_throttle_interval((uint32_t) MIN(new_budget, UINT32_MAX), now_ts); | |
196 | /* Don't carry over any excess pageouts since we're forcing a new budget */ | |
197 | normal_throttle_window->pageouts = 0; | |
198 | memorystatus_freeze_budget_pages_remaining = normal_throttle_window->max_pageouts; | |
199 | } | |
200 | ||
201 | lck_mtx_unlock(&freezer_mutex); | |
202 | return error; | |
203 | } | |
204 | ||
205 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freeze_budget_pages_remaining, "Q", ""); | |
206 | #else /* DEVELOPMENT || DEBUG */ | |
cb323159 | 207 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freeze_budget_pages_remaining, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_budget_pages_remaining, ""); |
f427ee49 | 208 | #endif /* DEVELOPMENT || DEBUG */ |
ea3f0419 A |
209 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_excess_shared_memory_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_excess_shared_memory_count, ""); |
210 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_private_shared_ratio_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count, ""); | |
211 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_compressor_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_compressor_space_count, ""); | |
212 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_no_swap_space_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_no_swap_space_count, ""); | |
213 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_below_min_pages_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_below_min_pages_count, ""); | |
214 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_low_probability_of_use_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_low_probability_of_use_count, ""); | |
215 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_error_other_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_error_other_count, ""); | |
216 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_process_considered_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_process_considered_count, ""); | |
217 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_below_threshold_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_below_threshold_count, ""); | |
218 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_full_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_full_count, ""); | |
219 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_skipped_shared_mb_high_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count, ""); | |
220 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_shared_pages_skipped, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_shared_pages_skipped, ""); | |
f427ee49 A |
221 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_bytes_refrozen, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_bytes_refrozen, ""); |
222 | SYSCTL_QUAD(_kern, OID_AUTO, memorystatus_freezer_refreeze_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freezer_stats.mfs_refreeze_count, ""); | |
223 | ||
2a1bd2d3 A |
224 | static_assert(_kMemorystatusFreezeSkipReasonMax <= UINT8_MAX); |
225 | ||
ea3f0419 A |
226 | |
227 | /* | |
228 | * Calculates the hit rate for the freezer. | |
229 | * The hit rate is defined as the percentage of procs that are currently in the | |
230 | * freezer which we have thawed. | |
231 | * A low hit rate means we're freezing bad candidates since they're not re-used. | |
232 | */ | |
233 | static int sysctl_memorystatus_freezer_thaw_percentage SYSCTL_HANDLER_ARGS | |
234 | { | |
235 | #pragma unused(arg1, arg2) | |
c3c9b80d | 236 | uint64_t thaw_count = 0, frozen_count = 0; |
ea3f0419 | 237 | int thaw_percentage = 100; |
c3c9b80d A |
238 | frozen_count = os_atomic_load(&(memorystatus_freezer_stats.mfs_processes_frozen), relaxed); |
239 | thaw_count = os_atomic_load(&(memorystatus_freezer_stats.mfs_processes_thawed), relaxed); | |
ea3f0419 | 240 | |
ea3f0419 | 241 | if (frozen_count > 0) { |
c3c9b80d A |
242 | if (thaw_count > frozen_count) { |
243 | /* | |
244 | * Both counts are using relaxed atomics & could be out of sync | |
245 | * causing us to see thaw_percentage > 100. | |
246 | */ | |
247 | thaw_percentage = 100; | |
248 | } else { | |
249 | thaw_percentage = (int)(100 * thaw_count / frozen_count); | |
250 | } | |
ea3f0419 A |
251 | } |
252 | return sysctl_handle_int(oidp, &thaw_percentage, 0, req); | |
253 | } | |
254 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freezer_thaw_percentage, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, &sysctl_memorystatus_freezer_thaw_percentage, "I", ""); | |
cb323159 | 255 | |
f427ee49 | 256 | #define FREEZER_ERROR_STRING_LENGTH 128 |
cb323159 | 257 | |
c3c9b80d A |
258 | EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_min, &memorystatus_freeze_pages_min, 0, UINT32_MAX, ""); |
259 | EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_pages_max, &memorystatus_freeze_pages_max, 0, UINT32_MAX, ""); | |
260 | EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_processes_max, &memorystatus_frozen_processes_max, 0, UINT32_MAX, ""); | |
261 | EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_jetsam_band, &memorystatus_freeze_jetsam_band, JETSAM_PRIORITY_IDLE, JETSAM_PRIORITY_MAX - 1, ""); | |
262 | EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_private_shared_pages_ratio, &memorystatus_freeze_private_shared_pages_ratio, 0, UINT32_MAX, ""); | |
263 | EXPERIMENT_FACTOR_UINT(_kern, memorystatus_freeze_min_processes, &memorystatus_freeze_suspended_threshold, 0, UINT32_MAX, ""); | |
264 | /* | |
265 | * max. # of frozen process demotions we will allow in our daily cycle. | |
266 | */ | |
267 | EXPERIMENT_FACTOR_UINT(_kern, memorystatus_max_freeze_demotions_daily, &memorystatus_max_frozen_demotions_daily, 0, UINT32_MAX, ""); | |
268 | ||
269 | /* | |
270 | * min # of thaws needed by a process to protect it from getting demoted into the IDLE band. | |
271 | */ | |
272 | EXPERIMENT_FACTOR_UINT(_kern, memorystatus_thaw_count_demotion_threshold, &memorystatus_thaw_count_demotion_threshold, 0, UINT32_MAX, ""); | |
273 | ||
cb323159 A |
274 | #if DEVELOPMENT || DEBUG |
275 | ||
cb323159 A |
276 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_daily_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_daily_mb_max, 0, ""); |
277 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_degraded_mode, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_freeze_degradation, 0, ""); | |
278 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_threshold, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_threshold, 0, ""); | |
cb323159 | 279 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_refreeze_eligible_count, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_refreeze_eligible_count, 0, ""); |
cb323159 A |
280 | |
281 | /* | |
282 | * Max. shared-anonymous memory in MB that can be held by frozen processes in the high jetsam band. | |
283 | * "0" means no limit. | |
284 | * Default is 10% of system-wide task limit. | |
285 | */ | |
286 | ||
287 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb_max, 0, ""); | |
288 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb, CTLFLAG_RD | CTLFLAG_LOCKED, &memorystatus_frozen_shared_mb, 0, ""); | |
289 | ||
290 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_shared_mb_per_process_max, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_shared_mb_per_process_max, 0, ""); | |
cb323159 A |
291 | |
292 | boolean_t memorystatus_freeze_throttle_enabled = TRUE; | |
293 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_throttle_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_throttle_enabled, 0, ""); | |
294 | ||
295 | /* | |
296 | * When set to true, this keeps frozen processes in the compressor pool in memory, instead of swapping them out to disk. | |
297 | * Exposed via the sysctl kern.memorystatus_freeze_to_memory. | |
298 | */ | |
299 | boolean_t memorystatus_freeze_to_memory = FALSE; | |
300 | SYSCTL_UINT(_kern, OID_AUTO, memorystatus_freeze_to_memory, CTLFLAG_RW | CTLFLAG_LOCKED, &memorystatus_freeze_to_memory, 0, ""); | |
301 | ||
302 | #define VM_PAGES_FOR_ALL_PROCS (2) | |
f427ee49 | 303 | |
cb323159 A |
304 | /* |
305 | * Manual trigger of freeze and thaw for dev / debug kernels only. | |
306 | */ | |
307 | static int | |
308 | sysctl_memorystatus_freeze SYSCTL_HANDLER_ARGS | |
309 | { | |
310 | #pragma unused(arg1, arg2) | |
311 | int error, pid = 0; | |
312 | proc_t p; | |
313 | int freezer_error_code = 0; | |
314 | pid_t pid_list[MAX_XPC_SERVICE_PIDS]; | |
315 | int ntasks = 0; | |
316 | coalition_t coal = COALITION_NULL; | |
317 | ||
318 | if (memorystatus_freeze_enabled == FALSE) { | |
319 | printf("sysctl_freeze: Freeze is DISABLED\n"); | |
320 | return ENOTSUP; | |
321 | } | |
322 | ||
323 | error = sysctl_handle_int(oidp, &pid, 0, req); | |
324 | if (error || !req->newptr) { | |
325 | return error; | |
326 | } | |
327 | ||
328 | if (pid == VM_PAGES_FOR_ALL_PROCS) { | |
329 | vm_pageout_anonymous_pages(); | |
330 | ||
331 | return 0; | |
332 | } | |
333 | ||
334 | lck_mtx_lock(&freezer_mutex); | |
335 | ||
336 | again: | |
337 | p = proc_find(pid); | |
338 | if (p != NULL) { | |
ea3f0419 | 339 | memorystatus_freezer_stats.mfs_process_considered_count++; |
cb323159 A |
340 | uint32_t purgeable, wired, clean, dirty, shared; |
341 | uint32_t max_pages = 0, state = 0; | |
342 | ||
343 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
344 | /* | |
345 | * Freezer backed by the compressor and swap file(s) | |
346 | * will hold compressed data. | |
347 | * | |
348 | * Set the sysctl kern.memorystatus_freeze_to_memory to true to keep compressed data from | |
349 | * being swapped out to disk. Note that this disables freezer swap support globally, | |
350 | * not just for the process being frozen. | |
351 | * | |
352 | * | |
353 | * We don't care about the global freezer budget or the process's (min/max) budget here. | |
354 | * The freeze sysctl is meant to force-freeze a process. | |
355 | * | |
356 | * We also don't update any global or process stats on this path, so that the jetsam/ freeze | |
357 | * logic remains unaffected. The tasks we're performing here are: freeze the process, set the | |
358 | * P_MEMSTAT_FROZEN bit, and elevate the process to a higher band (if the freezer is active). | |
359 | */ | |
360 | max_pages = memorystatus_freeze_pages_max; | |
361 | } else { | |
362 | /* | |
363 | * We only have the compressor without any swap. | |
364 | */ | |
365 | max_pages = UINT32_MAX - 1; | |
366 | } | |
367 | ||
368 | proc_list_lock(); | |
369 | state = p->p_memstat_state; | |
370 | proc_list_unlock(); | |
371 | ||
372 | /* | |
373 | * The jetsam path also verifies that the process is a suspended App. We don't care about that here. | |
374 | * We simply ensure that jetsam is not already working on the process and that the process has not | |
375 | * explicitly disabled freezing. | |
376 | */ | |
377 | if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED)) { | |
378 | printf("sysctl_freeze: p_memstat_state check failed, process is%s%s%s\n", | |
379 | (state & P_MEMSTAT_TERMINATED) ? " terminated" : "", | |
380 | (state & P_MEMSTAT_LOCKED) ? " locked" : "", | |
381 | (state & P_MEMSTAT_FREEZE_DISABLED) ? " unfreezable" : ""); | |
382 | ||
383 | proc_rele(p); | |
384 | lck_mtx_unlock(&freezer_mutex); | |
385 | return EPERM; | |
386 | } | |
387 | ||
388 | error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, FALSE /* eval only */); | |
ea3f0419 A |
389 | if (!error || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) { |
390 | memorystatus_freezer_stats.mfs_shared_pages_skipped += shared; | |
391 | } | |
cb323159 A |
392 | |
393 | if (error) { | |
2a1bd2d3 | 394 | memorystatus_freeze_handle_error(p, freezer_error_code, state & P_MEMSTAT_FROZEN, pid, coal, "sysctl_freeze"); |
cb323159 A |
395 | if (error == KERN_NO_SPACE) { |
396 | /* Make it easy to distinguish between failures due to low compressor/ swap space and other failures. */ | |
397 | error = ENOSPC; | |
398 | } else { | |
399 | error = EIO; | |
400 | } | |
401 | } else { | |
402 | proc_list_lock(); | |
403 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) { | |
404 | p->p_memstat_state |= P_MEMSTAT_FROZEN; | |
2a1bd2d3 | 405 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone; |
cb323159 | 406 | memorystatus_frozen_count++; |
c3c9b80d | 407 | os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed); |
2a1bd2d3 A |
408 | if (memorystatus_frozen_count == memorystatus_frozen_processes_max) { |
409 | memorystatus_freeze_out_of_slots(); | |
410 | } | |
f427ee49 A |
411 | } else { |
412 | // This was a re-freeze | |
413 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
414 | memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE; | |
415 | memorystatus_freezer_stats.mfs_refreeze_count++; | |
416 | } | |
cb323159 A |
417 | } |
418 | p->p_memstat_frozen_count++; | |
419 | ||
420 | ||
421 | proc_list_unlock(); | |
422 | ||
423 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
424 | /* | |
425 | * We elevate only if we are going to swap out the data. | |
426 | */ | |
427 | error = memorystatus_update_inactive_jetsam_priority_band(pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, | |
428 | memorystatus_freeze_jetsam_band, TRUE); | |
429 | ||
430 | if (error) { | |
431 | printf("sysctl_freeze: Elevating frozen process to higher jetsam band failed with %d\n", error); | |
432 | } | |
433 | } | |
434 | } | |
435 | ||
436 | if ((error == 0) && (coal == NULL)) { | |
437 | /* | |
438 | * We froze a process and so we check to see if it was | |
439 | * a coalition leader and if it has XPC services that | |
440 | * might need freezing. | |
441 | * Only one leader can be frozen at a time and so we shouldn't | |
442 | * enter this block more than once per call. Hence the | |
443 | * check that 'coal' has to be NULL. We should make this an | |
444 | * assert() or panic() once we have a much more concrete way | |
445 | * to detect an app vs a daemon. | |
446 | */ | |
447 | ||
448 | task_t curr_task = NULL; | |
449 | ||
450 | curr_task = proc_task(p); | |
451 | coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM); | |
452 | if (coalition_is_leader(curr_task, coal)) { | |
453 | ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC, | |
454 | COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS); | |
455 | ||
456 | if (ntasks > MAX_XPC_SERVICE_PIDS) { | |
457 | ntasks = MAX_XPC_SERVICE_PIDS; | |
458 | } | |
459 | } | |
460 | } | |
461 | ||
462 | proc_rele(p); | |
463 | ||
464 | while (ntasks) { | |
465 | pid = pid_list[--ntasks]; | |
466 | goto again; | |
467 | } | |
468 | ||
469 | lck_mtx_unlock(&freezer_mutex); | |
470 | return error; | |
471 | } else { | |
472 | printf("sysctl_freeze: Invalid process\n"); | |
473 | } | |
474 | ||
475 | ||
476 | lck_mtx_unlock(&freezer_mutex); | |
477 | return EINVAL; | |
478 | } | |
479 | ||
480 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_freeze, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, | |
481 | 0, 0, &sysctl_memorystatus_freeze, "I", ""); | |
482 | ||
483 | /* | |
484 | * Manual trigger of agressive frozen demotion for dev / debug kernels only. | |
485 | */ | |
486 | static int | |
487 | sysctl_memorystatus_demote_frozen_process SYSCTL_HANDLER_ARGS | |
488 | { | |
489 | #pragma unused(arg1, arg2, oidp, req) | |
ea3f0419 A |
490 | int error, val; |
491 | /* | |
492 | * Only demote on write to prevent demoting during `sysctl -a`. | |
493 | * The actual value written doesn't matter. | |
494 | */ | |
495 | error = sysctl_handle_int(oidp, &val, 0, req); | |
496 | if (error || !req->newptr) { | |
497 | return error; | |
498 | } | |
cb323159 A |
499 | memorystatus_demote_frozen_processes(false); |
500 | return 0; | |
501 | } | |
502 | ||
ea3f0419 | 503 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_demote_frozen_processes, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, 0, 0, &sysctl_memorystatus_demote_frozen_process, "I", ""); |
cb323159 A |
504 | |
505 | static int | |
506 | sysctl_memorystatus_available_pages_thaw SYSCTL_HANDLER_ARGS | |
507 | { | |
508 | #pragma unused(arg1, arg2) | |
509 | ||
510 | int error, pid = 0; | |
511 | proc_t p; | |
512 | ||
513 | if (memorystatus_freeze_enabled == FALSE) { | |
514 | return ENOTSUP; | |
515 | } | |
516 | ||
517 | error = sysctl_handle_int(oidp, &pid, 0, req); | |
518 | if (error || !req->newptr) { | |
519 | return error; | |
520 | } | |
521 | ||
522 | if (pid == VM_PAGES_FOR_ALL_PROCS) { | |
523 | do_fastwake_warmup_all(); | |
524 | return 0; | |
525 | } else { | |
526 | p = proc_find(pid); | |
527 | if (p != NULL) { | |
528 | error = task_thaw(p->task); | |
529 | ||
530 | if (error) { | |
531 | error = EIO; | |
532 | } else { | |
533 | /* | |
534 | * task_thaw() succeeded. | |
535 | * | |
536 | * We increment memorystatus_frozen_count on the sysctl freeze path. | |
537 | * And so we need the P_MEMSTAT_FROZEN to decrement the frozen count | |
538 | * when this process exits. | |
539 | * | |
540 | * proc_list_lock(); | |
541 | * p->p_memstat_state &= ~P_MEMSTAT_FROZEN; | |
542 | * proc_list_unlock(); | |
543 | */ | |
544 | } | |
545 | proc_rele(p); | |
546 | return error; | |
547 | } | |
548 | } | |
549 | ||
550 | return EINVAL; | |
551 | } | |
552 | ||
553 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_thaw, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, | |
554 | 0, 0, &sysctl_memorystatus_available_pages_thaw, "I", ""); | |
555 | ||
556 | ||
557 | typedef struct _global_freezable_status { | |
558 | boolean_t freeze_pages_threshold_crossed; | |
559 | boolean_t freeze_eligible_procs_available; | |
560 | boolean_t freeze_scheduled_in_future; | |
561 | }global_freezable_status_t; | |
562 | ||
563 | typedef struct _proc_freezable_status { | |
564 | boolean_t freeze_has_memstat_state; | |
565 | boolean_t freeze_has_pages_min; | |
566 | int freeze_has_probability; | |
567 | int freeze_leader_eligible; | |
568 | boolean_t freeze_attempted; | |
569 | uint32_t p_memstat_state; | |
570 | uint32_t p_pages; | |
571 | int p_freeze_error_code; | |
572 | int p_pid; | |
573 | int p_leader_pid; | |
574 | char p_name[MAXCOMLEN + 1]; | |
575 | }proc_freezable_status_t; | |
576 | ||
577 | #define MAX_FREEZABLE_PROCESSES 200 /* Total # of processes in band 0 that we evaluate for freezability */ | |
578 | ||
579 | /* | |
580 | * For coalition based freezing evaluations, we proceed as follows: | |
581 | * - detect that the process is a coalition member and a XPC service | |
582 | * - mark its 'freeze_leader_eligible' field with FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN | |
583 | * - continue its freezability evaluation assuming its leader will be freezable too | |
584 | * | |
585 | * Once we are done evaluating all processes, we do a quick run thru all | |
586 | * processes and for a coalition member XPC service we look up the 'freezable' | |
587 | * status of its leader and iff: | |
588 | * - the xpc service is freezable i.e. its individual freeze evaluation worked | |
589 | * - and, its leader is also marked freezable | |
590 | * we update its 'freeze_leader_eligible' to FREEZE_PROC_LEADER_FREEZABLE_SUCCESS. | |
591 | */ | |
592 | ||
593 | #define FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN (-1) | |
594 | #define FREEZE_PROC_LEADER_FREEZABLE_SUCCESS (1) | |
595 | #define FREEZE_PROC_LEADER_FREEZABLE_FAILURE (2) | |
596 | ||
597 | static int | |
598 | memorystatus_freezer_get_status(user_addr_t buffer, size_t buffer_size, int32_t *retval) | |
599 | { | |
600 | uint32_t proc_count = 0, freeze_eligible_proc_considered = 0, band = 0, xpc_index = 0, leader_index = 0; | |
601 | global_freezable_status_t *list_head; | |
602 | proc_freezable_status_t *list_entry, *list_entry_start; | |
f427ee49 | 603 | size_t list_size = 0, entry_count = 0; |
cb323159 A |
604 | proc_t p, leader_proc; |
605 | memstat_bucket_t *bucket; | |
f427ee49 | 606 | uint32_t state = 0, pages = 0; |
cb323159 A |
607 | boolean_t try_freeze = TRUE, xpc_skip_size_probability_check = FALSE; |
608 | int error = 0, probability_of_use = 0; | |
609 | pid_t leader_pid = 0; | |
610 | ||
611 | ||
612 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) { | |
613 | return ENOTSUP; | |
614 | } | |
615 | ||
616 | list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES); | |
617 | ||
618 | if (buffer_size < list_size) { | |
619 | return EINVAL; | |
620 | } | |
621 | ||
f427ee49 | 622 | list_head = kheap_alloc(KHEAP_TEMP, list_size, Z_WAITOK | Z_ZERO); |
cb323159 A |
623 | if (list_head == NULL) { |
624 | return ENOMEM; | |
625 | } | |
626 | ||
cb323159 A |
627 | list_size = sizeof(global_freezable_status_t); |
628 | ||
629 | proc_list_lock(); | |
630 | ||
631 | uint64_t curr_time = mach_absolute_time(); | |
632 | ||
633 | list_head->freeze_pages_threshold_crossed = (memorystatus_available_pages < memorystatus_freeze_threshold); | |
634 | list_head->freeze_eligible_procs_available = ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold); | |
635 | list_head->freeze_scheduled_in_future = (curr_time < memorystatus_freezer_thread_next_run_ts); | |
636 | ||
637 | list_entry_start = (proc_freezable_status_t*) ((uintptr_t)list_head + sizeof(global_freezable_status_t)); | |
638 | list_entry = list_entry_start; | |
639 | ||
640 | bucket = &memstat_bucket[JETSAM_PRIORITY_IDLE]; | |
641 | ||
642 | entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t)); | |
643 | ||
644 | p = memorystatus_get_first_proc_locked(&band, FALSE); | |
645 | proc_count++; | |
646 | ||
647 | while ((proc_count <= MAX_FREEZABLE_PROCESSES) && | |
648 | (p) && | |
649 | (list_size < buffer_size)) { | |
650 | if (isSysProc(p)) { | |
651 | /* | |
652 | * Daemon:- We will consider freezing it iff: | |
653 | * - it belongs to a coalition and the leader is freeze-eligible (delayed evaluation) | |
654 | * - its role in the coalition is XPC service. | |
655 | * | |
656 | * We skip memory size requirements in this case. | |
657 | */ | |
658 | ||
659 | coalition_t coal = COALITION_NULL; | |
660 | task_t leader_task = NULL, curr_task = NULL; | |
661 | int task_role_in_coalition = 0; | |
662 | ||
663 | curr_task = proc_task(p); | |
664 | coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM); | |
665 | ||
666 | if (coal == COALITION_NULL || coalition_is_leader(curr_task, coal)) { | |
667 | /* | |
668 | * By default, XPC services without an app | |
669 | * will be the leader of their own single-member | |
670 | * coalition. | |
671 | */ | |
672 | goto skip_ineligible_xpc; | |
673 | } | |
674 | ||
675 | leader_task = coalition_get_leader(coal); | |
676 | if (leader_task == TASK_NULL) { | |
677 | /* | |
678 | * This jetsam coalition is currently leader-less. | |
679 | * This could happen if the app died, but XPC services | |
680 | * have not yet exited. | |
681 | */ | |
682 | goto skip_ineligible_xpc; | |
683 | } | |
684 | ||
685 | leader_proc = (proc_t)get_bsdtask_info(leader_task); | |
686 | task_deallocate(leader_task); | |
687 | ||
688 | if (leader_proc == PROC_NULL) { | |
689 | /* leader task is exiting */ | |
690 | goto skip_ineligible_xpc; | |
691 | } | |
692 | ||
693 | task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task); | |
694 | ||
695 | if (task_role_in_coalition == COALITION_TASKROLE_XPC) { | |
696 | xpc_skip_size_probability_check = TRUE; | |
697 | leader_pid = leader_proc->p_pid; | |
698 | goto continue_eval; | |
699 | } | |
700 | ||
701 | skip_ineligible_xpc: | |
702 | p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
703 | proc_count++; | |
704 | continue; | |
705 | } | |
706 | ||
707 | continue_eval: | |
708 | strlcpy(list_entry->p_name, p->p_name, MAXCOMLEN + 1); | |
709 | ||
710 | list_entry->p_pid = p->p_pid; | |
711 | ||
712 | state = p->p_memstat_state; | |
713 | ||
714 | if ((state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) || | |
715 | !(state & P_MEMSTAT_SUSPENDED)) { | |
716 | try_freeze = list_entry->freeze_has_memstat_state = FALSE; | |
717 | } else { | |
718 | try_freeze = list_entry->freeze_has_memstat_state = TRUE; | |
719 | } | |
720 | ||
721 | list_entry->p_memstat_state = state; | |
722 | ||
723 | if (xpc_skip_size_probability_check == TRUE) { | |
724 | /* | |
725 | * Assuming the coalition leader is freezable | |
726 | * we don't care re. minimum pages and probability | |
727 | * as long as the process isn't marked P_MEMSTAT_FREEZE_DISABLED. | |
728 | * XPC services have to be explicity opted-out of the disabled | |
729 | * state. And we checked that state above. | |
730 | */ | |
731 | list_entry->freeze_has_pages_min = TRUE; | |
732 | list_entry->p_pages = -1; | |
733 | list_entry->freeze_has_probability = -1; | |
734 | ||
735 | list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN; | |
736 | list_entry->p_leader_pid = leader_pid; | |
737 | ||
738 | xpc_skip_size_probability_check = FALSE; | |
739 | } else { | |
740 | list_entry->freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; /* Apps are freeze eligible and their own leaders. */ | |
741 | list_entry->p_leader_pid = 0; /* Setting this to 0 signifies this isn't a coalition driven freeze. */ | |
742 | ||
743 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL); | |
744 | if (pages < memorystatus_freeze_pages_min) { | |
745 | try_freeze = list_entry->freeze_has_pages_min = FALSE; | |
746 | } else { | |
747 | list_entry->freeze_has_pages_min = TRUE; | |
748 | } | |
749 | ||
750 | list_entry->p_pages = pages; | |
751 | ||
752 | if (entry_count) { | |
753 | uint32_t j = 0; | |
754 | for (j = 0; j < entry_count; j++) { | |
755 | if (strncmp(memorystatus_global_probabilities_table[j].proc_name, | |
756 | p->p_name, | |
c3c9b80d | 757 | MAXCOMLEN) == 0) { |
cb323159 A |
758 | probability_of_use = memorystatus_global_probabilities_table[j].use_probability; |
759 | break; | |
760 | } | |
761 | } | |
762 | ||
763 | list_entry->freeze_has_probability = probability_of_use; | |
764 | ||
765 | try_freeze = ((probability_of_use > 0) && try_freeze); | |
766 | } else { | |
767 | list_entry->freeze_has_probability = -1; | |
768 | } | |
769 | } | |
770 | ||
771 | if (try_freeze) { | |
772 | uint32_t purgeable, wired, clean, dirty, shared; | |
773 | uint32_t max_pages = 0; | |
774 | int freezer_error_code = 0; | |
775 | ||
776 | error = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, max_pages, &shared, &freezer_error_code, TRUE /* eval only */); | |
777 | ||
778 | if (error) { | |
779 | list_entry->p_freeze_error_code = freezer_error_code; | |
780 | } | |
781 | ||
782 | list_entry->freeze_attempted = TRUE; | |
783 | } | |
784 | ||
785 | list_entry++; | |
786 | freeze_eligible_proc_considered++; | |
787 | ||
788 | list_size += sizeof(proc_freezable_status_t); | |
789 | ||
790 | p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
791 | proc_count++; | |
792 | } | |
793 | ||
794 | proc_list_unlock(); | |
795 | ||
796 | list_entry = list_entry_start; | |
797 | ||
798 | for (xpc_index = 0; xpc_index < freeze_eligible_proc_considered; xpc_index++) { | |
799 | if (list_entry[xpc_index].freeze_leader_eligible == FREEZE_PROC_LEADER_FREEZABLE_UNKNOWN) { | |
800 | leader_pid = list_entry[xpc_index].p_leader_pid; | |
801 | ||
802 | leader_proc = proc_find(leader_pid); | |
803 | ||
804 | if (leader_proc) { | |
805 | if (leader_proc->p_memstat_state & P_MEMSTAT_FROZEN) { | |
806 | /* | |
807 | * Leader has already been frozen. | |
808 | */ | |
809 | list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; | |
810 | proc_rele(leader_proc); | |
811 | continue; | |
812 | } | |
813 | proc_rele(leader_proc); | |
814 | } | |
815 | ||
816 | for (leader_index = 0; leader_index < freeze_eligible_proc_considered; leader_index++) { | |
817 | if (list_entry[leader_index].p_pid == leader_pid) { | |
818 | if (list_entry[leader_index].freeze_attempted && list_entry[leader_index].p_freeze_error_code == 0) { | |
819 | list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_SUCCESS; | |
820 | } else { | |
821 | list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE; | |
822 | list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC; | |
823 | } | |
824 | break; | |
825 | } | |
826 | } | |
827 | ||
828 | /* | |
829 | * Didn't find the leader entry. This might be likely because | |
830 | * the leader never made it down to band 0. | |
831 | */ | |
832 | if (leader_index == freeze_eligible_proc_considered) { | |
833 | list_entry[xpc_index].freeze_leader_eligible = FREEZE_PROC_LEADER_FREEZABLE_FAILURE; | |
834 | list_entry[xpc_index].p_freeze_error_code = FREEZER_ERROR_GENERIC; | |
835 | } | |
836 | } | |
837 | } | |
838 | ||
f427ee49 | 839 | buffer_size = MIN(list_size, INT32_MAX); |
cb323159 A |
840 | |
841 | error = copyout(list_head, buffer, buffer_size); | |
842 | if (error == 0) { | |
f427ee49 | 843 | *retval = (int32_t) buffer_size; |
cb323159 A |
844 | } else { |
845 | *retval = 0; | |
846 | } | |
847 | ||
848 | list_size = sizeof(global_freezable_status_t) + (sizeof(proc_freezable_status_t) * MAX_FREEZABLE_PROCESSES); | |
f427ee49 | 849 | kheap_free(KHEAP_TEMP, list_head, list_size); |
cb323159 A |
850 | |
851 | MEMORYSTATUS_DEBUG(1, "memorystatus_freezer_get_status: returning %d (%lu - size)\n", error, (unsigned long)*list_size); | |
852 | ||
853 | return error; | |
854 | } | |
855 | ||
f427ee49 A |
856 | #endif /* DEVELOPMENT || DEBUG */ |
857 | ||
858 | /* | |
859 | * Get a list of all processes in the freezer band which are currently frozen. | |
860 | * Used by powerlog to collect analytics on frozen process. | |
861 | */ | |
862 | static int | |
863 | memorystatus_freezer_get_procs(user_addr_t buffer, size_t buffer_size, int32_t *retval) | |
864 | { | |
865 | global_frozen_procs_t *frozen_procs = NULL; | |
866 | uint32_t band = memorystatus_freeze_jetsam_band; | |
867 | proc_t p; | |
868 | uint32_t state; | |
869 | int error; | |
870 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) { | |
871 | return ENOTSUP; | |
872 | } | |
873 | if (buffer_size < sizeof(global_frozen_procs_t)) { | |
874 | return EINVAL; | |
875 | } | |
876 | frozen_procs = kheap_alloc(KHEAP_TEMP, sizeof(global_frozen_procs_t), | |
877 | Z_WAITOK | Z_ZERO); | |
878 | if (frozen_procs == NULL) { | |
879 | return ENOMEM; | |
880 | } | |
881 | ||
882 | proc_list_lock(); | |
883 | p = memorystatus_get_first_proc_locked(&band, FALSE); | |
884 | while (p && frozen_procs->gfp_num_frozen < FREEZER_CONTROL_GET_PROCS_MAX_COUNT) { | |
885 | state = p->p_memstat_state; | |
886 | if (state & P_MEMSTAT_FROZEN) { | |
887 | frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_pid = p->p_pid; | |
888 | strlcpy(frozen_procs->gfp_procs[frozen_procs->gfp_num_frozen].fp_name, | |
889 | p->p_name, sizeof(proc_name_t)); | |
890 | frozen_procs->gfp_num_frozen++; | |
891 | } | |
892 | p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
893 | } | |
894 | proc_list_unlock(); | |
895 | ||
896 | buffer_size = MIN(buffer_size, sizeof(global_frozen_procs_t)); | |
897 | error = copyout(frozen_procs, buffer, buffer_size); | |
898 | if (error == 0) { | |
899 | *retval = (int32_t) buffer_size; | |
900 | } else { | |
901 | *retval = 0; | |
902 | } | |
903 | kheap_free(KHEAP_TEMP, frozen_procs, sizeof(global_frozen_procs_t)); | |
904 | ||
905 | return error; | |
906 | } | |
907 | ||
cb323159 A |
908 | int |
909 | memorystatus_freezer_control(int32_t flags, user_addr_t buffer, size_t buffer_size, int32_t *retval) | |
910 | { | |
911 | int err = ENOTSUP; | |
912 | ||
f427ee49 | 913 | #if DEVELOPMENT || DEBUG |
cb323159 A |
914 | if (flags == FREEZER_CONTROL_GET_STATUS) { |
915 | err = memorystatus_freezer_get_status(buffer, buffer_size, retval); | |
916 | } | |
f427ee49 A |
917 | #endif /* DEVELOPMENT || DEBUG */ |
918 | if (flags == FREEZER_CONTROL_GET_PROCS) { | |
919 | err = memorystatus_freezer_get_procs(buffer, buffer_size, retval); | |
920 | } | |
cb323159 A |
921 | |
922 | return err; | |
923 | } | |
924 | ||
cb323159 A |
925 | extern void vm_swap_consider_defragmenting(int); |
926 | extern boolean_t memorystatus_kill_elevated_process(uint32_t, os_reason_t, unsigned int, int, uint32_t *, uint64_t *); | |
927 | ||
928 | /* | |
929 | * This routine will _jetsam_ all frozen processes | |
930 | * and reclaim the swap space immediately. | |
931 | * | |
932 | * So freeze has to be DISABLED when we call this routine. | |
933 | */ | |
934 | ||
935 | void | |
936 | memorystatus_disable_freeze(void) | |
937 | { | |
938 | memstat_bucket_t *bucket; | |
939 | int bucket_count = 0, retries = 0; | |
940 | boolean_t retval = FALSE, killed = FALSE; | |
941 | uint32_t errors = 0, errors_over_prev_iteration = 0; | |
942 | os_reason_t jetsam_reason = 0; | |
943 | unsigned int band = 0; | |
944 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
945 | uint64_t memory_reclaimed = 0, footprint = 0; | |
946 | ||
947 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_START, | |
948 | memorystatus_available_pages, 0, 0, 0, 0); | |
949 | ||
950 | assert(memorystatus_freeze_enabled == FALSE); | |
951 | ||
952 | jetsam_reason = os_reason_create(OS_REASON_JETSAM, JETSAM_REASON_MEMORY_DISK_SPACE_SHORTAGE); | |
953 | if (jetsam_reason == OS_REASON_NULL) { | |
954 | printf("memorystatus_disable_freeze: failed to allocate jetsam reason\n"); | |
955 | } | |
956 | ||
957 | /* | |
958 | * Let's relocate all frozen processes into band 8. Demoted frozen processes | |
959 | * are sitting in band 0 currently and it's possible to have a frozen process | |
960 | * in the FG band being actively used. We don't reset its frozen state when | |
961 | * it is resumed because it has state on disk. | |
962 | * | |
963 | * We choose to do this relocation rather than implement a new 'kill frozen' | |
964 | * process function for these reasons: | |
965 | * - duplication of code: too many kill functions exist and we need to rework them better. | |
966 | * - disk-space-shortage kills are rare | |
967 | * - not having the 'real' jetsam band at time of the this frozen kill won't preclude us | |
968 | * from answering any imp. questions re. jetsam policy/effectiveness. | |
969 | * | |
970 | * This is essentially what memorystatus_update_inactive_jetsam_priority_band() does while | |
971 | * avoiding the application of memory limits. | |
972 | */ | |
973 | ||
974 | again: | |
975 | proc_list_lock(); | |
976 | ||
977 | band = JETSAM_PRIORITY_IDLE; | |
978 | p = PROC_NULL; | |
979 | next_p = PROC_NULL; | |
980 | ||
981 | next_p = memorystatus_get_first_proc_locked(&band, TRUE); | |
982 | while (next_p) { | |
983 | p = next_p; | |
984 | next_p = memorystatus_get_next_proc_locked(&band, p, TRUE); | |
985 | ||
986 | if (p->p_memstat_effectivepriority > JETSAM_PRIORITY_FOREGROUND) { | |
987 | break; | |
988 | } | |
989 | ||
990 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) { | |
991 | continue; | |
992 | } | |
993 | ||
994 | if (p->p_memstat_state & P_MEMSTAT_ERROR) { | |
995 | p->p_memstat_state &= ~P_MEMSTAT_ERROR; | |
996 | } | |
997 | ||
998 | if (p->p_memstat_effectivepriority == memorystatus_freeze_jetsam_band) { | |
999 | continue; | |
1000 | } | |
1001 | ||
1002 | /* | |
1003 | * We explicitly add this flag here so the process looks like a normal | |
1004 | * frozen process i.e. P_MEMSTAT_FROZEN and P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND. | |
1005 | * We don't bother with assigning the 'active' memory | |
1006 | * limits at this point because we are going to be killing it soon below. | |
1007 | */ | |
1008 | p->p_memstat_state |= P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND; | |
1009 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1010 | ||
1011 | memorystatus_update_priority_locked(p, memorystatus_freeze_jetsam_band, FALSE, TRUE); | |
1012 | } | |
1013 | ||
1014 | bucket = &memstat_bucket[memorystatus_freeze_jetsam_band]; | |
1015 | bucket_count = bucket->count; | |
1016 | proc_list_unlock(); | |
1017 | ||
1018 | /* | |
1019 | * Bucket count is already stale at this point. But, we don't expect | |
1020 | * freezing to continue since we have already disabled the freeze functionality. | |
1021 | * However, an existing freeze might be in progress. So we might miss that process | |
1022 | * in the first go-around. We hope to catch it in the next. | |
1023 | */ | |
1024 | ||
1025 | errors_over_prev_iteration = 0; | |
1026 | while (bucket_count) { | |
1027 | bucket_count--; | |
1028 | ||
1029 | /* | |
1030 | * memorystatus_kill_elevated_process() drops a reference, | |
1031 | * so take another one so we can continue to use this exit reason | |
1032 | * even after it returns. | |
1033 | */ | |
1034 | ||
1035 | os_reason_ref(jetsam_reason); | |
1036 | retval = memorystatus_kill_elevated_process( | |
1037 | kMemorystatusKilledDiskSpaceShortage, | |
1038 | jetsam_reason, | |
1039 | memorystatus_freeze_jetsam_band, | |
1040 | 0, /* the iteration of aggressive jetsam..ignored here */ | |
1041 | &errors, | |
1042 | &footprint); | |
1043 | ||
1044 | if (errors > 0) { | |
1045 | printf("memorystatus_disable_freeze: memorystatus_kill_elevated_process returned %d error(s)\n", errors); | |
1046 | errors_over_prev_iteration += errors; | |
1047 | errors = 0; | |
1048 | } | |
1049 | ||
1050 | if (retval == 0) { | |
1051 | /* | |
1052 | * No frozen processes left to kill. | |
1053 | */ | |
1054 | break; | |
1055 | } | |
1056 | ||
1057 | killed = TRUE; | |
1058 | memory_reclaimed += footprint; | |
1059 | } | |
1060 | ||
1061 | proc_list_lock(); | |
1062 | ||
1063 | if (memorystatus_frozen_count) { | |
1064 | /* | |
1065 | * A frozen process snuck in and so | |
1066 | * go back around to kill it. That | |
1067 | * process may have been resumed and | |
1068 | * put into the FG band too. So we | |
1069 | * have to do the relocation again. | |
1070 | */ | |
1071 | assert(memorystatus_freeze_enabled == FALSE); | |
1072 | ||
1073 | retries++; | |
1074 | if (retries < 3) { | |
1075 | proc_list_unlock(); | |
1076 | goto again; | |
1077 | } | |
1078 | #if DEVELOPMENT || DEBUG | |
1079 | panic("memorystatus_disable_freeze: Failed to kill all frozen processes, memorystatus_frozen_count = %d, errors = %d", | |
1080 | memorystatus_frozen_count, errors_over_prev_iteration); | |
1081 | #endif /* DEVELOPMENT || DEBUG */ | |
1082 | } | |
1083 | proc_list_unlock(); | |
1084 | ||
1085 | os_reason_free(jetsam_reason); | |
1086 | ||
1087 | if (killed) { | |
1088 | vm_swap_consider_defragmenting(VM_SWAP_FLAGS_FORCE_DEFRAG | VM_SWAP_FLAGS_FORCE_RECLAIM); | |
1089 | ||
1090 | proc_list_lock(); | |
1091 | size_t snapshot_size = sizeof(memorystatus_jetsam_snapshot_t) + | |
1092 | sizeof(memorystatus_jetsam_snapshot_entry_t) * (memorystatus_jetsam_snapshot_count); | |
1093 | uint64_t timestamp_now = mach_absolute_time(); | |
1094 | memorystatus_jetsam_snapshot->notification_time = timestamp_now; | |
1095 | memorystatus_jetsam_snapshot->js_gencount++; | |
1096 | if (memorystatus_jetsam_snapshot_count > 0 && (memorystatus_jetsam_snapshot_last_timestamp == 0 || | |
1097 | timestamp_now > memorystatus_jetsam_snapshot_last_timestamp + memorystatus_jetsam_snapshot_timeout)) { | |
1098 | proc_list_unlock(); | |
1099 | int ret = memorystatus_send_note(kMemorystatusSnapshotNote, &snapshot_size, sizeof(snapshot_size)); | |
1100 | if (!ret) { | |
1101 | proc_list_lock(); | |
1102 | memorystatus_jetsam_snapshot_last_timestamp = timestamp_now; | |
1103 | proc_list_unlock(); | |
1104 | } | |
1105 | } else { | |
1106 | proc_list_unlock(); | |
1107 | } | |
1108 | } | |
1109 | ||
1110 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_DISABLE) | DBG_FUNC_END, | |
1111 | memorystatus_available_pages, memory_reclaimed, 0, 0, 0); | |
1112 | ||
1113 | return; | |
1114 | } | |
1115 | ||
1116 | __private_extern__ void | |
1117 | memorystatus_freeze_init(void) | |
1118 | { | |
1119 | kern_return_t result; | |
1120 | thread_t thread; | |
1121 | ||
cb323159 A |
1122 | /* |
1123 | * This is just the default value if the underlying | |
1124 | * storage device doesn't have any specific budget. | |
1125 | * We check with the storage layer in memorystatus_freeze_update_throttle() | |
1126 | * before we start our freezing the first time. | |
1127 | */ | |
1128 | memorystatus_freeze_budget_pages_remaining = (memorystatus_freeze_daily_mb_max * 1024 * 1024) / PAGE_SIZE; | |
1129 | ||
1130 | result = kernel_thread_start(memorystatus_freeze_thread, NULL, &thread); | |
1131 | if (result == KERN_SUCCESS) { | |
1132 | proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_IO, THROTTLE_LEVEL_COMPRESSOR_TIER2); | |
1133 | proc_set_thread_policy(thread, TASK_POLICY_INTERNAL, TASK_POLICY_PASSIVE_IO, TASK_POLICY_ENABLE); | |
1134 | thread_set_thread_name(thread, "VM_freezer"); | |
1135 | ||
1136 | thread_deallocate(thread); | |
1137 | } else { | |
1138 | panic("Could not create memorystatus_freeze_thread"); | |
1139 | } | |
1140 | } | |
1141 | ||
1142 | static boolean_t | |
1143 | memorystatus_is_process_eligible_for_freeze(proc_t p) | |
1144 | { | |
1145 | /* | |
1146 | * Called with proc_list_lock held. | |
1147 | */ | |
1148 | ||
c3c9b80d | 1149 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED); |
cb323159 A |
1150 | |
1151 | boolean_t should_freeze = FALSE; | |
f427ee49 | 1152 | uint32_t state = 0, pages = 0; |
cb323159 | 1153 | int probability_of_use = 0; |
f427ee49 A |
1154 | size_t entry_count = 0, i = 0; |
1155 | bool first_consideration = true; | |
cb323159 A |
1156 | |
1157 | state = p->p_memstat_state; | |
1158 | ||
1159 | if (state & (P_MEMSTAT_TERMINATED | P_MEMSTAT_LOCKED | P_MEMSTAT_FREEZE_DISABLED | P_MEMSTAT_FREEZE_IGNORE)) { | |
2a1bd2d3 A |
1160 | if (state & P_MEMSTAT_FREEZE_DISABLED) { |
1161 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonDisabled; | |
1162 | } | |
cb323159 A |
1163 | goto out; |
1164 | } | |
1165 | ||
1166 | if (isSysProc(p)) { | |
1167 | /* | |
1168 | * Daemon:- We consider freezing it if: | |
1169 | * - it belongs to a coalition and the leader is frozen, and, | |
1170 | * - its role in the coalition is XPC service. | |
1171 | * | |
1172 | * We skip memory size requirements in this case. | |
1173 | */ | |
1174 | ||
1175 | coalition_t coal = COALITION_NULL; | |
1176 | task_t leader_task = NULL, curr_task = NULL; | |
1177 | proc_t leader_proc = NULL; | |
1178 | int task_role_in_coalition = 0; | |
1179 | ||
1180 | curr_task = proc_task(p); | |
1181 | coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM); | |
1182 | ||
1183 | if (coal == NULL || coalition_is_leader(curr_task, coal)) { | |
1184 | /* | |
1185 | * By default, XPC services without an app | |
1186 | * will be the leader of their own single-member | |
1187 | * coalition. | |
1188 | */ | |
1189 | goto out; | |
1190 | } | |
1191 | ||
1192 | leader_task = coalition_get_leader(coal); | |
1193 | if (leader_task == TASK_NULL) { | |
1194 | /* | |
1195 | * This jetsam coalition is currently leader-less. | |
1196 | * This could happen if the app died, but XPC services | |
1197 | * have not yet exited. | |
1198 | */ | |
1199 | goto out; | |
1200 | } | |
1201 | ||
1202 | leader_proc = (proc_t)get_bsdtask_info(leader_task); | |
1203 | task_deallocate(leader_task); | |
1204 | ||
1205 | if (leader_proc == PROC_NULL) { | |
1206 | /* leader task is exiting */ | |
1207 | goto out; | |
1208 | } | |
1209 | ||
1210 | if (!(leader_proc->p_memstat_state & P_MEMSTAT_FROZEN)) { | |
1211 | goto out; | |
1212 | } | |
1213 | ||
1214 | task_role_in_coalition = i_coal_jetsam_get_taskrole(coal, curr_task); | |
1215 | ||
1216 | if (task_role_in_coalition == COALITION_TASKROLE_XPC) { | |
1217 | should_freeze = TRUE; | |
1218 | } | |
1219 | ||
1220 | goto out; | |
1221 | } else { | |
1222 | /* | |
1223 | * Application. In addition to the above states we need to make | |
1224 | * sure we only consider suspended applications for freezing. | |
1225 | */ | |
1226 | if (!(state & P_MEMSTAT_SUSPENDED)) { | |
1227 | goto out; | |
1228 | } | |
1229 | } | |
1230 | ||
ea3f0419 A |
1231 | /* |
1232 | * This proc is a suspended application. | |
1233 | * We're interested in tracking what percentage of these | |
1234 | * actually get frozen. | |
f427ee49 A |
1235 | * To avoid skewing the metrics towards processes which |
1236 | * are considered more frequently, we only track failures once | |
1237 | * per process. | |
ea3f0419 | 1238 | */ |
f427ee49 A |
1239 | first_consideration = !(state & P_MEMSTAT_FREEZE_CONSIDERED); |
1240 | ||
1241 | if (first_consideration) { | |
1242 | memorystatus_freezer_stats.mfs_process_considered_count++; | |
1243 | p->p_memstat_state |= P_MEMSTAT_FREEZE_CONSIDERED; | |
1244 | } | |
cb323159 A |
1245 | |
1246 | /* Only freeze applications meeting our minimum resident page criteria */ | |
1247 | memorystatus_get_task_page_counts(p->task, &pages, NULL, NULL); | |
1248 | if (pages < memorystatus_freeze_pages_min) { | |
f427ee49 A |
1249 | if (first_consideration) { |
1250 | memorystatus_freezer_stats.mfs_error_below_min_pages_count++; | |
1251 | } | |
2a1bd2d3 | 1252 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonBelowMinPages; |
cb323159 A |
1253 | goto out; |
1254 | } | |
1255 | ||
1256 | /* Don't freeze processes that are already exiting on core. It may have started exiting | |
1257 | * after we chose it for freeze, but before we obtained the proc_list_lock. | |
1258 | * NB: This is only possible if we're coming in from memorystatus_freeze_process_sync. | |
1259 | * memorystatus_freeze_top_process holds the proc_list_lock while it traverses the bands. | |
1260 | */ | |
1261 | if ((p->p_listflag & P_LIST_EXITED) != 0) { | |
f427ee49 A |
1262 | if (first_consideration) { |
1263 | memorystatus_freezer_stats.mfs_error_other_count++; | |
1264 | } | |
2a1bd2d3 | 1265 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOther; |
cb323159 A |
1266 | goto out; |
1267 | } | |
1268 | ||
1269 | entry_count = (memorystatus_global_probabilities_size / sizeof(memorystatus_internal_probabilities_t)); | |
1270 | ||
1271 | if (entry_count) { | |
1272 | for (i = 0; i < entry_count; i++) { | |
c3c9b80d A |
1273 | /* |
1274 | * NB: memorystatus_internal_probabilities.proc_name is MAXCOMLEN + 1 bytes | |
1275 | * proc_t.p_name is 2*MAXCOMLEN + 1 bytes. So we only compare the first | |
1276 | * MAXCOMLEN bytes here since the name in the probabilities table could | |
1277 | * be truncated from the proc_t's p_name. | |
1278 | */ | |
cb323159 A |
1279 | if (strncmp(memorystatus_global_probabilities_table[i].proc_name, |
1280 | p->p_name, | |
c3c9b80d | 1281 | MAXCOMLEN) == 0) { |
cb323159 A |
1282 | probability_of_use = memorystatus_global_probabilities_table[i].use_probability; |
1283 | break; | |
1284 | } | |
1285 | } | |
1286 | ||
1287 | if (probability_of_use == 0) { | |
f427ee49 A |
1288 | if (first_consideration) { |
1289 | memorystatus_freezer_stats.mfs_error_low_probability_of_use_count++; | |
1290 | } | |
2a1bd2d3 | 1291 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonLowProbOfUse; |
cb323159 A |
1292 | goto out; |
1293 | } | |
1294 | } | |
1295 | ||
1296 | should_freeze = TRUE; | |
1297 | out: | |
2a1bd2d3 | 1298 | if (should_freeze && !(state & P_MEMSTAT_FROZEN)) { |
f427ee49 | 1299 | /* |
2a1bd2d3 A |
1300 | * Reset the skip reason. If it's killed before we manage to actually freeze it |
1301 | * we failed to consider it early enough. | |
f427ee49 | 1302 | */ |
2a1bd2d3 A |
1303 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone; |
1304 | if (!first_consideration) { | |
1305 | /* | |
1306 | * We're freezing this for the first time and we previously considered it ineligible. | |
1307 | * Bump the considered count so that we track this as 1 failure | |
1308 | * and 1 success. | |
1309 | */ | |
1310 | memorystatus_freezer_stats.mfs_process_considered_count++; | |
1311 | } | |
f427ee49 | 1312 | } |
cb323159 A |
1313 | return should_freeze; |
1314 | } | |
1315 | ||
1316 | /* | |
1317 | * Synchronously freeze the passed proc. Called with a reference to the proc held. | |
1318 | * | |
1319 | * Doesn't deal with: | |
1320 | * - re-freezing because this is called on a specific process and | |
1321 | * not by the freezer thread. If that changes, we'll have to teach it about | |
1322 | * refreezing a frozen process. | |
1323 | * | |
1324 | * - grouped/coalition freezing because we are hoping to deprecate this | |
1325 | * interface as it was used by user-space to freeze particular processes. But | |
1326 | * we have moved away from that approach to having the kernel choose the optimal | |
1327 | * candidates to be frozen. | |
1328 | * | |
1329 | * Returns EINVAL or the value returned by task_freeze(). | |
1330 | */ | |
1331 | int | |
1332 | memorystatus_freeze_process_sync(proc_t p) | |
1333 | { | |
1334 | int ret = EINVAL; | |
1335 | pid_t aPid = 0; | |
1336 | boolean_t memorystatus_freeze_swap_low = FALSE; | |
1337 | int freezer_error_code = 0; | |
1338 | ||
1339 | lck_mtx_lock(&freezer_mutex); | |
1340 | ||
1341 | if (p == NULL) { | |
1342 | printf("memorystatus_freeze_process_sync: Invalid process\n"); | |
1343 | goto exit; | |
1344 | } | |
1345 | ||
1346 | if (memorystatus_freeze_enabled == FALSE) { | |
1347 | printf("memorystatus_freeze_process_sync: Freezing is DISABLED\n"); | |
1348 | goto exit; | |
1349 | } | |
1350 | ||
1351 | if (!memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { | |
1352 | printf("memorystatus_freeze_process_sync: Low compressor and/or low swap space...skipping freeze\n"); | |
1353 | goto exit; | |
1354 | } | |
1355 | ||
1356 | memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining); | |
1357 | if (!memorystatus_freeze_budget_pages_remaining) { | |
1358 | printf("memorystatus_freeze_process_sync: exit with NO available budget\n"); | |
1359 | goto exit; | |
1360 | } | |
1361 | ||
1362 | proc_list_lock(); | |
1363 | ||
1364 | if (p != NULL) { | |
1365 | uint32_t purgeable, wired, clean, dirty, shared; | |
f427ee49 A |
1366 | uint32_t i; |
1367 | uint64_t max_pages; | |
cb323159 A |
1368 | |
1369 | aPid = p->p_pid; | |
1370 | ||
1371 | /* Ensure the process is eligible for freezing */ | |
1372 | if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) { | |
1373 | proc_list_unlock(); | |
1374 | goto exit; | |
1375 | } | |
1376 | ||
1377 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1378 | max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining); | |
1379 | } else { | |
1380 | /* | |
1381 | * We only have the compressor without any swap. | |
1382 | */ | |
1383 | max_pages = UINT32_MAX - 1; | |
1384 | } | |
1385 | ||
1386 | /* Mark as locked temporarily to avoid kill */ | |
1387 | p->p_memstat_state |= P_MEMSTAT_LOCKED; | |
1388 | proc_list_unlock(); | |
1389 | ||
1390 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, | |
1391 | memorystatus_available_pages, 0, 0, 0, 0); | |
1392 | ||
f427ee49 A |
1393 | max_pages = MIN(max_pages, UINT32_MAX); |
1394 | ret = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */); | |
ea3f0419 A |
1395 | if (ret == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) { |
1396 | memorystatus_freezer_stats.mfs_shared_pages_skipped += shared; | |
1397 | } | |
cb323159 A |
1398 | |
1399 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, | |
1400 | memorystatus_available_pages, aPid, 0, 0, 0); | |
1401 | ||
1402 | DTRACE_MEMORYSTATUS6(memorystatus_freeze, proc_t, p, unsigned int, memorystatus_available_pages, boolean_t, purgeable, unsigned int, wired, uint32_t, clean, uint32_t, dirty); | |
1403 | ||
1404 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_process_sync: task_freeze %s for pid %d [%s] - " | |
1405 | "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n", | |
1406 | (ret == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"), | |
1407 | memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared); | |
1408 | ||
1409 | proc_list_lock(); | |
1410 | ||
1411 | if (ret == KERN_SUCCESS) { | |
1412 | memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; | |
1413 | ||
1414 | p->p_memstat_freeze_sharedanon_pages += shared; | |
1415 | ||
1416 | memorystatus_frozen_shared_mb += shared; | |
1417 | ||
1418 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) { | |
1419 | p->p_memstat_state |= P_MEMSTAT_FROZEN; | |
2a1bd2d3 | 1420 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone; |
cb323159 | 1421 | memorystatus_frozen_count++; |
c3c9b80d | 1422 | os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed); |
2a1bd2d3 A |
1423 | if (memorystatus_frozen_count == memorystatus_frozen_processes_max) { |
1424 | memorystatus_freeze_out_of_slots(); | |
1425 | } | |
f427ee49 A |
1426 | } else { |
1427 | // This was a re-freeze | |
1428 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1429 | memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE; | |
1430 | memorystatus_freezer_stats.mfs_refreeze_count++; | |
1431 | } | |
cb323159 A |
1432 | } |
1433 | ||
1434 | p->p_memstat_frozen_count++; | |
1435 | ||
1436 | /* | |
1437 | * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process | |
1438 | * to its higher jetsam band. | |
1439 | */ | |
1440 | proc_list_unlock(); | |
1441 | ||
1442 | memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); | |
1443 | ||
1444 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1445 | ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, | |
1446 | memorystatus_freeze_jetsam_band, TRUE); | |
1447 | ||
1448 | if (ret) { | |
1449 | printf("Elevating the frozen process failed with %d\n", ret); | |
1450 | /* not fatal */ | |
1451 | ret = 0; | |
1452 | } | |
1453 | ||
cb323159 A |
1454 | |
1455 | /* Update stats */ | |
1456 | for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { | |
1457 | throttle_intervals[i].pageouts += dirty; | |
1458 | } | |
cb323159 | 1459 | } |
ea3f0419 A |
1460 | memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining); |
1461 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: freezing (specific) pid %d [%s] done memorystatus_freeze_budget_pages_remaining %llu froze %u pages", | |
1462 | aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, dirty); | |
1463 | ||
1464 | proc_list_lock(); | |
cb323159 A |
1465 | |
1466 | memorystatus_freeze_pageouts += dirty; | |
1467 | ||
1468 | if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) { | |
1469 | /* | |
1470 | * Add some eviction logic here? At some point should we | |
1471 | * jetsam a process to get back its swap space so that we | |
1472 | * can freeze a more eligible process at this moment in time? | |
1473 | */ | |
1474 | } | |
cb323159 | 1475 | } else { |
2a1bd2d3 | 1476 | memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, NULL, "memorystatus_freeze_process_sync"); |
cb323159 A |
1477 | p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE; |
1478 | } | |
1479 | ||
1480 | p->p_memstat_state &= ~P_MEMSTAT_LOCKED; | |
1481 | wakeup(&p->p_memstat_state); | |
1482 | proc_list_unlock(); | |
1483 | } | |
1484 | ||
1485 | exit: | |
1486 | lck_mtx_unlock(&freezer_mutex); | |
1487 | ||
1488 | return ret; | |
1489 | } | |
1490 | ||
ea3f0419 A |
1491 | /* |
1492 | * Caller must hold the freezer_mutex and it will be locked on return. | |
1493 | */ | |
cb323159 A |
1494 | static int |
1495 | memorystatus_freeze_top_process(void) | |
1496 | { | |
1497 | pid_t aPid = 0, coal_xpc_pid = 0; | |
1498 | int ret = -1; | |
1499 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
1500 | unsigned int i = 0; | |
1501 | unsigned int band = JETSAM_PRIORITY_IDLE; | |
2a1bd2d3 | 1502 | bool refreeze_processes = false; |
cb323159 A |
1503 | task_t curr_task = NULL; |
1504 | coalition_t coal = COALITION_NULL; | |
1505 | pid_t pid_list[MAX_XPC_SERVICE_PIDS]; | |
1506 | unsigned int ntasks = 0; | |
ea3f0419 | 1507 | LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED); |
cb323159 A |
1508 | |
1509 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_START, memorystatus_available_pages, 0, 0, 0, 0); | |
1510 | ||
1511 | proc_list_lock(); | |
1512 | ||
1513 | if (memorystatus_frozen_count >= memorystatus_frozen_processes_max) { | |
1514 | /* | |
1515 | * Freezer is already full but we are here and so let's | |
1516 | * try to refreeze any processes we might have thawed | |
1517 | * in the past and push out their compressed state out. | |
1518 | */ | |
2a1bd2d3 | 1519 | refreeze_processes = true; |
cb323159 A |
1520 | band = (unsigned int) memorystatus_freeze_jetsam_band; |
1521 | } | |
1522 | ||
1523 | freeze_process: | |
1524 | ||
1525 | next_p = memorystatus_get_first_proc_locked(&band, FALSE); | |
1526 | while (next_p) { | |
1527 | kern_return_t kr; | |
1528 | uint32_t purgeable, wired, clean, dirty, shared; | |
f427ee49 | 1529 | uint64_t max_pages = 0; |
cb323159 | 1530 | int freezer_error_code = 0; |
2a1bd2d3 | 1531 | bool was_refreeze = false; |
cb323159 A |
1532 | |
1533 | p = next_p; | |
1534 | ||
1535 | if (coal == NULL) { | |
1536 | next_p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
1537 | } else { | |
1538 | /* | |
1539 | * We have frozen a coalition leader and now are | |
1540 | * dealing with its XPC services. We get our | |
1541 | * next_p for each XPC service from the pid_list | |
1542 | * acquired after a successful task_freeze call | |
1543 | * on the coalition leader. | |
1544 | */ | |
1545 | ||
1546 | if (ntasks > 0) { | |
1547 | coal_xpc_pid = pid_list[--ntasks]; | |
1548 | next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */); | |
1549 | /* | |
1550 | * We grab a reference when we are about to freeze the process. So, drop | |
1551 | * the reference that proc_findinternal() grabbed for us. | |
1552 | * We also have the proc_list_lock and so this process is stable. | |
1553 | */ | |
1554 | if (next_p) { | |
1555 | proc_rele_locked(next_p); | |
1556 | } | |
1557 | } else { | |
1558 | next_p = NULL; | |
1559 | } | |
1560 | } | |
1561 | ||
1562 | aPid = p->p_pid; | |
1563 | ||
1564 | if (p->p_memstat_effectivepriority != (int32_t) band) { | |
1565 | /* | |
1566 | * We shouldn't be freezing processes outside the | |
1567 | * prescribed band. | |
1568 | */ | |
1569 | break; | |
1570 | } | |
1571 | ||
1572 | /* Ensure the process is eligible for (re-)freezing */ | |
1573 | if (refreeze_processes) { | |
1574 | /* | |
1575 | * Has to have been frozen once before. | |
1576 | */ | |
1577 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) { | |
1578 | continue; | |
1579 | } | |
1580 | ||
1581 | /* | |
1582 | * Has to have been resumed once before. | |
1583 | */ | |
1584 | if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == FALSE) { | |
1585 | continue; | |
1586 | } | |
1587 | ||
1588 | /* | |
1589 | * Not currently being looked at for something. | |
1590 | */ | |
1591 | if (p->p_memstat_state & P_MEMSTAT_LOCKED) { | |
1592 | continue; | |
1593 | } | |
1594 | ||
1595 | /* | |
1596 | * We are going to try and refreeze and so re-evaluate | |
1597 | * the process. We don't want to double count the shared | |
1598 | * memory. So deduct the old snapshot here. | |
1599 | */ | |
1600 | memorystatus_frozen_shared_mb -= p->p_memstat_freeze_sharedanon_pages; | |
1601 | p->p_memstat_freeze_sharedanon_pages = 0; | |
1602 | ||
1603 | p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE; | |
1604 | memorystatus_refreeze_eligible_count--; | |
1605 | } else { | |
1606 | if (memorystatus_is_process_eligible_for_freeze(p) == FALSE) { | |
1607 | continue; // with lock held | |
1608 | } | |
1609 | } | |
1610 | ||
1611 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1612 | /* | |
1613 | * Freezer backed by the compressor and swap file(s) | |
1614 | * will hold compressed data. | |
1615 | */ | |
1616 | ||
1617 | max_pages = MIN(memorystatus_freeze_pages_max, memorystatus_freeze_budget_pages_remaining); | |
1618 | } else { | |
1619 | /* | |
1620 | * We only have the compressor pool. | |
1621 | */ | |
1622 | max_pages = UINT32_MAX - 1; | |
1623 | } | |
1624 | ||
1625 | /* Mark as locked temporarily to avoid kill */ | |
1626 | p->p_memstat_state |= P_MEMSTAT_LOCKED; | |
1627 | ||
1628 | p = proc_ref_locked(p); | |
1629 | if (!p) { | |
ea3f0419 | 1630 | memorystatus_freezer_stats.mfs_error_other_count++; |
cb323159 A |
1631 | break; |
1632 | } | |
1633 | ||
1634 | proc_list_unlock(); | |
1635 | ||
1636 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_START, | |
1637 | memorystatus_available_pages, 0, 0, 0, 0); | |
1638 | ||
f427ee49 A |
1639 | max_pages = MIN(max_pages, UINT32_MAX); |
1640 | kr = task_freeze(p->task, &purgeable, &wired, &clean, &dirty, (uint32_t) max_pages, &shared, &freezer_error_code, FALSE /* eval only */); | |
ea3f0419 A |
1641 | if (kr == KERN_SUCCESS || freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO) { |
1642 | memorystatus_freezer_stats.mfs_shared_pages_skipped += shared; | |
1643 | } | |
cb323159 A |
1644 | |
1645 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE) | DBG_FUNC_END, | |
1646 | memorystatus_available_pages, aPid, 0, 0, 0); | |
1647 | ||
1648 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_top_process: task_freeze %s for pid %d [%s] - " | |
1649 | "memorystatus_pages: %d, purgeable: %d, wired: %d, clean: %d, dirty: %d, max_pages %d, shared %d\n", | |
1650 | (kr == KERN_SUCCESS) ? "SUCCEEDED" : "FAILED", aPid, (*p->p_name ? p->p_name : "(unknown)"), | |
1651 | memorystatus_available_pages, purgeable, wired, clean, dirty, max_pages, shared); | |
1652 | ||
1653 | proc_list_lock(); | |
1654 | ||
1655 | /* Success? */ | |
1656 | if (KERN_SUCCESS == kr) { | |
1657 | memorystatus_freeze_entry_t data = { aPid, TRUE, dirty }; | |
1658 | ||
1659 | p->p_memstat_freeze_sharedanon_pages += shared; | |
1660 | ||
1661 | memorystatus_frozen_shared_mb += shared; | |
1662 | ||
1663 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == 0) { | |
1664 | p->p_memstat_state |= P_MEMSTAT_FROZEN; | |
2a1bd2d3 | 1665 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone; |
cb323159 | 1666 | memorystatus_frozen_count++; |
c3c9b80d | 1667 | os_atomic_inc(&memorystatus_freezer_stats.mfs_processes_frozen, relaxed); |
2a1bd2d3 A |
1668 | if (memorystatus_frozen_count == memorystatus_frozen_processes_max) { |
1669 | memorystatus_freeze_out_of_slots(); | |
1670 | } | |
f427ee49 A |
1671 | } else { |
1672 | // This was a re-freeze | |
1673 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1674 | memorystatus_freezer_stats.mfs_bytes_refrozen += dirty * PAGE_SIZE; | |
1675 | memorystatus_freezer_stats.mfs_refreeze_count++; | |
1676 | } | |
2a1bd2d3 | 1677 | was_refreeze = true; |
cb323159 A |
1678 | } |
1679 | ||
1680 | p->p_memstat_frozen_count++; | |
1681 | ||
1682 | /* | |
1683 | * Still keeping the P_MEMSTAT_LOCKED bit till we are actually done elevating this frozen process | |
1684 | * to its higher jetsam band. | |
1685 | */ | |
1686 | proc_list_unlock(); | |
1687 | ||
1688 | memorystatus_send_note(kMemorystatusFreezeNote, &data, sizeof(data)); | |
1689 | ||
1690 | if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1691 | ret = memorystatus_update_inactive_jetsam_priority_band(p->p_pid, MEMORYSTATUS_CMD_ELEVATED_INACTIVEJETSAMPRIORITY_ENABLE, memorystatus_freeze_jetsam_band, TRUE); | |
1692 | ||
1693 | if (ret) { | |
1694 | printf("Elevating the frozen process failed with %d\n", ret); | |
1695 | /* not fatal */ | |
1696 | ret = 0; | |
1697 | } | |
1698 | ||
cb323159 A |
1699 | /* Update stats */ |
1700 | for (i = 0; i < sizeof(throttle_intervals) / sizeof(struct throttle_interval_t); i++) { | |
1701 | throttle_intervals[i].pageouts += dirty; | |
1702 | } | |
cb323159 | 1703 | } |
ea3f0419 A |
1704 | memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining); |
1705 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: %sfreezing (%s) pid %d [%s] done, memorystatus_freeze_budget_pages_remaining %llu %sfroze %u pages\n", | |
2a1bd2d3 | 1706 | was_refreeze ? "re" : "", (coal == NULL ? "general" : "coalition-driven"), aPid, ((p && *p->p_name) ? p->p_name : "unknown"), memorystatus_freeze_budget_pages_remaining, was_refreeze ? "Re" : "", dirty); |
ea3f0419 A |
1707 | |
1708 | proc_list_lock(); | |
cb323159 A |
1709 | |
1710 | memorystatus_freeze_pageouts += dirty; | |
1711 | ||
1712 | if (memorystatus_frozen_count == (memorystatus_frozen_processes_max - 1)) { | |
1713 | /* | |
1714 | * Add some eviction logic here? At some point should we | |
1715 | * jetsam a process to get back its swap space so that we | |
1716 | * can freeze a more eligible process at this moment in time? | |
1717 | */ | |
1718 | } | |
1719 | ||
cb323159 A |
1720 | /* Return KERN_SUCCESS */ |
1721 | ret = kr; | |
1722 | ||
1723 | /* | |
1724 | * We froze a process successfully. We can stop now | |
1725 | * and see if that helped if this process isn't part | |
1726 | * of a coalition. | |
1727 | * | |
1728 | * Else: | |
1729 | * - if it is a leader, get the list of XPC services | |
1730 | * that need to be frozen. | |
1731 | * - if it is a XPC service whose leader was frozen | |
1732 | * here, continue on to the next XPC service in the list. | |
1733 | */ | |
1734 | ||
1735 | if (coal == NULL) { | |
1736 | curr_task = proc_task(p); | |
1737 | coal = task_get_coalition(curr_task, COALITION_TYPE_JETSAM); | |
1738 | if (coalition_is_leader(curr_task, coal)) { | |
1739 | ntasks = coalition_get_pid_list(coal, COALITION_ROLEMASK_XPC, | |
1740 | COALITION_SORT_DEFAULT, pid_list, MAX_XPC_SERVICE_PIDS); | |
1741 | ||
1742 | if (ntasks > MAX_XPC_SERVICE_PIDS) { | |
1743 | ntasks = MAX_XPC_SERVICE_PIDS; | |
1744 | } | |
1745 | } | |
1746 | ||
1747 | next_p = NULL; | |
1748 | ||
1749 | if (ntasks > 0) { | |
1750 | /* | |
1751 | * Start off with our first next_p in this list. | |
1752 | */ | |
1753 | coal_xpc_pid = pid_list[--ntasks]; | |
1754 | next_p = proc_findinternal(coal_xpc_pid, 1 /* proc_list_lock held */); | |
1755 | ||
1756 | /* | |
1757 | * We grab a reference when we are about to freeze the process. So drop | |
1758 | * the reference that proc_findinternal() grabbed for us. | |
1759 | * We also have the proc_list_lock and so this process is stable. | |
1760 | */ | |
1761 | if (next_p) { | |
1762 | proc_rele_locked(next_p); | |
1763 | } | |
1764 | } | |
1765 | } | |
1766 | ||
1767 | p->p_memstat_state &= ~P_MEMSTAT_LOCKED; | |
1768 | wakeup(&p->p_memstat_state); | |
1769 | proc_rele_locked(p); | |
1770 | ||
1771 | if (coal && next_p) { | |
1772 | continue; | |
1773 | } | |
1774 | ||
1775 | /* | |
1776 | * No coalition leader was frozen. So we don't | |
1777 | * need to evaluate any XPC services. | |
1778 | * | |
1779 | * OR | |
1780 | * | |
1781 | * We have frozen all eligible XPC services for | |
1782 | * the current coalition leader. | |
1783 | * | |
1784 | * Either way, we can break here and see if freezing | |
1785 | * helped. | |
1786 | */ | |
1787 | ||
1788 | break; | |
1789 | } else { | |
1790 | p->p_memstat_state &= ~P_MEMSTAT_LOCKED; | |
1791 | wakeup(&p->p_memstat_state); | |
1792 | ||
2a1bd2d3 | 1793 | if (refreeze_processes) { |
cb323159 A |
1794 | if ((freezer_error_code == FREEZER_ERROR_EXCESS_SHARED_MEMORY) || |
1795 | (freezer_error_code == FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO)) { | |
1796 | /* | |
1797 | * Keeping this prior-frozen process in this high band when | |
1798 | * we failed to re-freeze it due to bad shared memory usage | |
1799 | * could cause excessive pressure on the lower bands. | |
1800 | * We need to demote it for now. It'll get re-evaluated next | |
1801 | * time because we don't set the P_MEMSTAT_FREEZE_IGNORE | |
1802 | * bit. | |
1803 | */ | |
1804 | ||
1805 | p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND; | |
1806 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
1807 | memorystatus_update_priority_locked(p, JETSAM_PRIORITY_IDLE, TRUE, TRUE); | |
1808 | } | |
1809 | } else { | |
1810 | p->p_memstat_state |= P_MEMSTAT_FREEZE_IGNORE; | |
1811 | } | |
c3c9b80d | 1812 | memorystatus_freeze_handle_error(p, freezer_error_code, p->p_memstat_state & P_MEMSTAT_FROZEN, aPid, coal, "memorystatus_freeze_top_process"); |
cb323159 A |
1813 | |
1814 | proc_rele_locked(p); | |
1815 | ||
1816 | if (vm_compressor_low_on_space() || vm_swap_low_on_space()) { | |
1817 | break; | |
1818 | } | |
1819 | } | |
1820 | } | |
1821 | ||
1822 | if ((ret == -1) && | |
1823 | (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD) && | |
2a1bd2d3 | 1824 | (!refreeze_processes)) { |
cb323159 A |
1825 | /* |
1826 | * We failed to freeze a process from the IDLE | |
1827 | * band AND we have some thawed processes | |
1828 | * AND haven't tried refreezing as yet. | |
1829 | * Let's try and re-freeze processes in the | |
1830 | * frozen band that have been resumed in the past | |
1831 | * and so have brought in state from disk. | |
1832 | */ | |
1833 | ||
1834 | band = (unsigned int) memorystatus_freeze_jetsam_band; | |
1835 | ||
2a1bd2d3 | 1836 | refreeze_processes = true; |
cb323159 A |
1837 | |
1838 | goto freeze_process; | |
1839 | } | |
1840 | ||
1841 | proc_list_unlock(); | |
1842 | ||
1843 | KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_MEMSTAT, BSD_MEMSTAT_FREEZE_SCAN) | DBG_FUNC_END, memorystatus_available_pages, aPid, 0, 0, 0); | |
1844 | ||
1845 | return ret; | |
1846 | } | |
1847 | ||
c3c9b80d A |
1848 | #if DEVELOPMENT || DEBUG |
1849 | /* For testing memorystatus_freeze_top_process */ | |
1850 | static int | |
1851 | sysctl_memorystatus_freeze_top_process SYSCTL_HANDLER_ARGS | |
1852 | { | |
1853 | #pragma unused(arg1, arg2) | |
1854 | int error, val; | |
1855 | /* | |
1856 | * Only freeze on write to prevent freezing during `sysctl -a`. | |
1857 | * The actual value written doesn't matter. | |
1858 | */ | |
1859 | error = sysctl_handle_int(oidp, &val, 0, req); | |
1860 | if (error || !req->newptr) { | |
1861 | return error; | |
1862 | } | |
1863 | lck_mtx_lock(&freezer_mutex); | |
1864 | int ret = memorystatus_freeze_top_process(); | |
1865 | lck_mtx_unlock(&freezer_mutex); | |
1866 | if (ret == -1) { | |
1867 | ret = ESRCH; | |
1868 | } | |
1869 | return ret; | |
1870 | } | |
1871 | SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_top_process, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED, | |
1872 | 0, 0, &sysctl_memorystatus_freeze_top_process, "I", ""); | |
1873 | #endif /* DEVELOPMENT || DEBUG */ | |
1874 | ||
cb323159 A |
1875 | static inline boolean_t |
1876 | memorystatus_can_freeze_processes(void) | |
1877 | { | |
1878 | boolean_t ret; | |
1879 | ||
1880 | proc_list_lock(); | |
1881 | ||
1882 | if (memorystatus_suspended_count) { | |
1883 | memorystatus_freeze_suspended_threshold = MIN(memorystatus_freeze_suspended_threshold, FREEZE_SUSPENDED_THRESHOLD_DEFAULT); | |
1884 | ||
1885 | if ((memorystatus_suspended_count - memorystatus_frozen_count) > memorystatus_freeze_suspended_threshold) { | |
1886 | ret = TRUE; | |
1887 | } else { | |
1888 | ret = FALSE; | |
1889 | } | |
1890 | } else { | |
1891 | ret = FALSE; | |
1892 | } | |
1893 | ||
1894 | proc_list_unlock(); | |
1895 | ||
1896 | return ret; | |
1897 | } | |
1898 | ||
1899 | static boolean_t | |
1900 | memorystatus_can_freeze(boolean_t *memorystatus_freeze_swap_low) | |
1901 | { | |
1902 | boolean_t can_freeze = TRUE; | |
1903 | ||
1904 | /* Only freeze if we're sufficiently low on memory; this holds off freeze right | |
1905 | * after boot, and is generally is a no-op once we've reached steady state. */ | |
1906 | if (memorystatus_available_pages > memorystatus_freeze_threshold) { | |
1907 | return FALSE; | |
1908 | } | |
1909 | ||
1910 | /* Check minimum suspended process threshold. */ | |
1911 | if (!memorystatus_can_freeze_processes()) { | |
1912 | return FALSE; | |
1913 | } | |
1914 | assert(VM_CONFIG_COMPRESSOR_IS_PRESENT); | |
1915 | ||
1916 | if (!VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) { | |
1917 | /* | |
1918 | * In-core compressor used for freezing WITHOUT on-disk swap support. | |
1919 | */ | |
1920 | if (vm_compressor_low_on_space()) { | |
1921 | if (*memorystatus_freeze_swap_low) { | |
1922 | *memorystatus_freeze_swap_low = TRUE; | |
1923 | } | |
1924 | ||
1925 | can_freeze = FALSE; | |
1926 | } else { | |
1927 | if (*memorystatus_freeze_swap_low) { | |
1928 | *memorystatus_freeze_swap_low = FALSE; | |
1929 | } | |
1930 | ||
1931 | can_freeze = TRUE; | |
1932 | } | |
1933 | } else { | |
1934 | /* | |
1935 | * Freezing WITH on-disk swap support. | |
1936 | * | |
1937 | * In-core compressor fronts the swap. | |
1938 | */ | |
1939 | if (vm_swap_low_on_space()) { | |
1940 | if (*memorystatus_freeze_swap_low) { | |
1941 | *memorystatus_freeze_swap_low = TRUE; | |
1942 | } | |
1943 | ||
1944 | can_freeze = FALSE; | |
1945 | } | |
1946 | } | |
1947 | ||
1948 | return can_freeze; | |
1949 | } | |
1950 | ||
1951 | /* | |
1952 | * This function evaluates if the currently frozen processes deserve | |
1953 | * to stay in the higher jetsam band. There are 2 modes: | |
1954 | * - 'force one == TRUE': (urgent mode) | |
1955 | * We are out of budget and can't refreeze a process. The process's | |
1956 | * state, if it was resumed, will stay in compressed memory. If we let it | |
1957 | * remain up in the higher frozen jetsam band, it'll put a lot of pressure on | |
1958 | * the lower bands. So we force-demote the least-recently-used-and-thawed | |
1959 | * process. | |
1960 | * | |
1961 | * - 'force_one == FALSE': (normal mode) | |
1962 | * If the # of thaws of a process is below our threshold, then we | |
1963 | * will demote that process into the IDLE band. | |
1964 | * We don't immediately kill the process here because it already has | |
1965 | * state on disk and so it might be worth giving it another shot at | |
1966 | * getting thawed/resumed and used. | |
1967 | */ | |
1968 | static void | |
1969 | memorystatus_demote_frozen_processes(boolean_t force_one) | |
1970 | { | |
1971 | unsigned int band = (unsigned int) memorystatus_freeze_jetsam_band; | |
1972 | unsigned int demoted_proc_count = 0; | |
1973 | proc_t p = PROC_NULL, next_p = PROC_NULL; | |
1974 | /* We demote to IDLE unless someone has asserted a higher priority on this process. */ | |
1975 | int maxpriority = JETSAM_PRIORITY_IDLE; | |
1976 | ||
1977 | proc_list_lock(); | |
1978 | ||
1979 | if (memorystatus_freeze_enabled == FALSE) { | |
1980 | /* | |
1981 | * Freeze has been disabled likely to | |
1982 | * reclaim swap space. So don't change | |
1983 | * any state on the frozen processes. | |
1984 | */ | |
1985 | proc_list_unlock(); | |
1986 | return; | |
1987 | } | |
1988 | ||
1989 | next_p = memorystatus_get_first_proc_locked(&band, FALSE); | |
1990 | while (next_p) { | |
1991 | p = next_p; | |
1992 | next_p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
1993 | ||
1994 | if ((p->p_memstat_state & P_MEMSTAT_FROZEN) == FALSE) { | |
1995 | continue; | |
1996 | } | |
1997 | ||
1998 | if (p->p_memstat_state & P_MEMSTAT_LOCKED) { | |
1999 | continue; | |
2000 | } | |
2001 | ||
2002 | if (force_one == TRUE) { | |
2003 | if ((p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) == 0) { | |
2004 | /* | |
2005 | * This process hasn't been thawed recently and so most of | |
2006 | * its state sits on NAND and so we skip it -- jetsamming it | |
2007 | * won't help with memory pressure. | |
2008 | */ | |
2009 | continue; | |
2010 | } | |
2011 | } else { | |
2012 | if (p->p_memstat_thaw_count >= memorystatus_thaw_count_demotion_threshold) { | |
2013 | /* | |
2014 | * This process has met / exceeded our thaw count demotion threshold | |
2015 | * and so we let it live in the higher bands. | |
2016 | */ | |
2017 | continue; | |
2018 | } | |
2019 | } | |
2020 | ||
2021 | p->p_memstat_state &= ~P_MEMSTAT_USE_ELEVATED_INACTIVE_BAND; | |
2022 | memorystatus_invalidate_idle_demotion_locked(p, TRUE); | |
2023 | ||
2024 | maxpriority = MAX(p->p_memstat_assertionpriority, maxpriority); | |
2025 | memorystatus_update_priority_locked(p, maxpriority, FALSE, FALSE); | |
2026 | #if DEVELOPMENT || DEBUG | |
2027 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus_demote_frozen_process(%s) pid %d [%s]", | |
2028 | (force_one ? "urgent" : "normal"), (p ? p->p_pid : -1), ((p && *p->p_name) ? p->p_name : "unknown")); | |
2029 | #endif /* DEVELOPMENT || DEBUG */ | |
2030 | ||
2031 | /* | |
2032 | * The freezer thread will consider this a normal app to be frozen | |
2033 | * because it is in the IDLE band. So we don't need the | |
2034 | * P_MEMSTAT_REFREEZE_ELIGIBLE state here. Also, if it gets resumed | |
2035 | * we'll correctly count it as eligible for re-freeze again. | |
2036 | * | |
2037 | * We don't drop the frozen count because this process still has | |
2038 | * state on disk. So there's a chance it gets resumed and then it | |
2039 | * should land in the higher jetsam band. For that it needs to | |
2040 | * remain marked frozen. | |
2041 | */ | |
2042 | if (p->p_memstat_state & P_MEMSTAT_REFREEZE_ELIGIBLE) { | |
2043 | p->p_memstat_state &= ~P_MEMSTAT_REFREEZE_ELIGIBLE; | |
2044 | memorystatus_refreeze_eligible_count--; | |
2045 | } | |
2046 | ||
2047 | demoted_proc_count++; | |
2048 | ||
2049 | if ((force_one == TRUE) || (demoted_proc_count == memorystatus_max_frozen_demotions_daily)) { | |
2050 | break; | |
2051 | } | |
2052 | } | |
2053 | ||
2054 | if (force_one == FALSE) { | |
2055 | /* | |
ea3f0419 A |
2056 | * We use these counters to track daily hit rates. |
2057 | * So we only reset them to 0 under the normal | |
cb323159 A |
2058 | * mode. |
2059 | */ | |
2060 | memorystatus_thaw_count = 0; | |
2061 | } | |
2062 | ||
2063 | proc_list_unlock(); | |
2064 | } | |
2065 | ||
ea3f0419 A |
2066 | /* |
2067 | * Calculate a new freezer budget. | |
2068 | * @param time_since_last_interval_expired_sec How long has it been (in seconds) since the previous interval expired. | |
2069 | * @param burst_multiple The burst_multiple for the new period | |
2070 | * @param interval_duration_min How many minutes will the new interval be? | |
2071 | * @param rollover The amount to rollover from the previous budget. | |
2072 | * | |
2073 | * @return A budget for the new interval. | |
2074 | */ | |
2075 | static uint32_t | |
2076 | memorystatus_freeze_calculate_new_budget( | |
2077 | unsigned int time_since_last_interval_expired_sec, | |
2078 | unsigned int burst_multiple, | |
2079 | unsigned int interval_duration_min, | |
2080 | uint32_t rollover) | |
2081 | { | |
f427ee49 | 2082 | uint64_t freeze_daily_budget = 0, freeze_daily_budget_mb = 0, daily_budget_pageouts = 0, budget_missed = 0, freeze_daily_pageouts_max = 0, new_budget = 0; |
ea3f0419 A |
2083 | const static unsigned int kNumSecondsInDay = 60 * 60 * 24; |
2084 | /* Precision factor for days_missed. 2 decimal points. */ | |
2085 | const static unsigned int kFixedPointFactor = 100; | |
f427ee49 | 2086 | unsigned int days_missed; |
ea3f0419 A |
2087 | |
2088 | /* Get the daily budget from the storage layer */ | |
2089 | if (vm_swap_max_budget(&freeze_daily_budget)) { | |
f427ee49 A |
2090 | freeze_daily_budget_mb = freeze_daily_budget / (1024 * 1024); |
2091 | assert(freeze_daily_budget_mb <= UINT32_MAX); | |
2092 | memorystatus_freeze_daily_mb_max = (unsigned int) freeze_daily_budget_mb; | |
ea3f0419 A |
2093 | os_log_with_startup_serial(OS_LOG_DEFAULT, "memorystatus: memorystatus_freeze_daily_mb_max set to %dMB\n", memorystatus_freeze_daily_mb_max); |
2094 | } | |
2095 | /* Calculate the daily pageout budget */ | |
2096 | freeze_daily_pageouts_max = memorystatus_freeze_daily_mb_max * (1024 * 1024 / PAGE_SIZE); | |
2097 | ||
2098 | daily_budget_pageouts = (burst_multiple * (((uint64_t) interval_duration_min * freeze_daily_pageouts_max) / (kNumSecondsInDay / 60))); | |
2099 | ||
2100 | /* | |
2101 | * Add additional budget for time since the interval expired. | |
2102 | * For example, if the interval expired n days ago, we should get an additional n days | |
2103 | * of budget since we didn't use any budget during those n days. | |
2104 | */ | |
2105 | days_missed = time_since_last_interval_expired_sec * kFixedPointFactor / kNumSecondsInDay; | |
2106 | budget_missed = days_missed * freeze_daily_pageouts_max / kFixedPointFactor; | |
f427ee49 A |
2107 | new_budget = rollover + daily_budget_pageouts + budget_missed; |
2108 | return (uint32_t) MIN(new_budget, UINT32_MAX); | |
2109 | } | |
2110 | ||
2a1bd2d3 A |
2111 | /* |
2112 | * Mark all non frozen, freezer-eligible processes as skipped for the given reason. | |
2113 | * Used when we hit some system freeze limit and know that we won't be considering remaining processes. | |
2114 | * If you're using this for a new reason, make sure to add it to memorystatus_freeze_init_proc so that | |
2115 | * it gets set for new processes. | |
2116 | * NB: These processes will retain this skip reason until they are reconsidered by memorystatus_is_process_eligible_for_freeze. | |
2117 | */ | |
f427ee49 | 2118 | static void |
2a1bd2d3 A |
2119 | memorystatus_freeze_mark_eligible_processes_with_skip_reason(memorystatus_freeze_skip_reason_t reason, bool locked) |
2120 | { | |
2121 | LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED); | |
c3c9b80d | 2122 | LCK_MTX_ASSERT(&proc_list_mlock, locked ? LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED); |
2a1bd2d3 A |
2123 | unsigned int band = JETSAM_PRIORITY_IDLE; |
2124 | proc_t p; | |
2125 | ||
2126 | if (!locked) { | |
2127 | proc_list_lock(); | |
2128 | } | |
2129 | p = memorystatus_get_first_proc_locked(&band, FALSE); | |
2130 | while (p) { | |
2131 | assert(p->p_memstat_effectivepriority == (int32_t) band); | |
2132 | if (!(p->p_memstat_state & P_MEMSTAT_FROZEN) && memorystatus_is_process_eligible_for_freeze(p)) { | |
2133 | assert(p->p_memstat_freeze_skip_reason == kMemorystatusFreezeSkipReasonNone); | |
2134 | p->p_memstat_freeze_skip_reason = (uint8_t) reason; | |
2135 | } | |
2136 | p = memorystatus_get_next_proc_locked(&band, p, FALSE); | |
2137 | } | |
2138 | if (!locked) { | |
2139 | proc_list_unlock(); | |
2140 | } | |
2141 | } | |
2142 | ||
2143 | /* | |
2144 | * Called after we fail to freeze a process. | |
2145 | * Logs the failure, marks the process with the failure reason, and updates freezer stats. | |
2146 | */ | |
2147 | static void | |
2148 | memorystatus_freeze_handle_error( | |
2149 | proc_t p, | |
f427ee49 | 2150 | const int freezer_error_code, |
2a1bd2d3 A |
2151 | bool was_refreeze, |
2152 | pid_t pid, | |
2153 | const coalition_t coalition, | |
2154 | const char* log_prefix) | |
f427ee49 | 2155 | { |
2a1bd2d3 A |
2156 | const char *reason; |
2157 | memorystatus_freeze_skip_reason_t skip_reason; | |
2158 | ||
2159 | switch (freezer_error_code) { | |
2160 | case FREEZER_ERROR_EXCESS_SHARED_MEMORY: | |
f427ee49 | 2161 | memorystatus_freezer_stats.mfs_error_excess_shared_memory_count++; |
2a1bd2d3 A |
2162 | reason = "too much shared memory"; |
2163 | skip_reason = kMemorystatusFreezeSkipReasonExcessSharedMemory; | |
2164 | break; | |
2165 | case FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO: | |
f427ee49 | 2166 | memorystatus_freezer_stats.mfs_error_low_private_shared_ratio_count++; |
2a1bd2d3 A |
2167 | reason = "private-shared pages ratio"; |
2168 | skip_reason = kMemorystatusFreezeSkipReasonLowPrivateSharedRatio; | |
2169 | break; | |
2170 | case FREEZER_ERROR_NO_COMPRESSOR_SPACE: | |
f427ee49 | 2171 | memorystatus_freezer_stats.mfs_error_no_compressor_space_count++; |
2a1bd2d3 A |
2172 | reason = "no compressor space"; |
2173 | skip_reason = kMemorystatusFreezeSkipReasonNoCompressorSpace; | |
2174 | break; | |
2175 | case FREEZER_ERROR_NO_SWAP_SPACE: | |
f427ee49 | 2176 | memorystatus_freezer_stats.mfs_error_no_swap_space_count++; |
2a1bd2d3 A |
2177 | reason = "no swap space"; |
2178 | skip_reason = kMemorystatusFreezeSkipReasonNoSwapSpace; | |
2179 | break; | |
2180 | default: | |
2181 | reason = "unknown error"; | |
2182 | skip_reason = kMemorystatusFreezeSkipReasonOther; | |
f427ee49 | 2183 | } |
2a1bd2d3 A |
2184 | |
2185 | p->p_memstat_freeze_skip_reason = (uint8_t) skip_reason; | |
2186 | ||
2187 | os_log_with_startup_serial(OS_LOG_DEFAULT, "%s: %sfreezing (%s) pid %d [%s]...skipped (%s)\n", | |
2188 | log_prefix, was_refreeze ? "re" : "", | |
2189 | (coalition == NULL ? "general" : "coalition-driven"), pid, | |
2190 | ((p && *p->p_name) ? p->p_name : "unknown"), reason); | |
f427ee49 A |
2191 | } |
2192 | ||
2193 | /* | |
2194 | * Start a new normal throttle interval with the given budget. | |
2195 | * Caller must hold the freezer mutex | |
2196 | */ | |
2197 | static void | |
2198 | memorystatus_freeze_start_normal_throttle_interval(uint32_t new_budget, mach_timespec_t start_ts) | |
2199 | { | |
2200 | LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED); | |
c3c9b80d | 2201 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED); |
f427ee49 A |
2202 | |
2203 | normal_throttle_window->max_pageouts = new_budget; | |
2204 | normal_throttle_window->ts.tv_sec = normal_throttle_window->mins * 60; | |
2205 | normal_throttle_window->ts.tv_nsec = 0; | |
2206 | ADD_MACH_TIMESPEC(&normal_throttle_window->ts, &start_ts); | |
2207 | /* Since we update the throttle stats pre-freeze, adjust for overshoot here */ | |
2208 | if (normal_throttle_window->pageouts > normal_throttle_window->max_pageouts) { | |
2209 | normal_throttle_window->pageouts -= normal_throttle_window->max_pageouts; | |
2210 | } else { | |
2211 | normal_throttle_window->pageouts = 0; | |
2212 | } | |
2213 | /* Ensure the normal window is now active. */ | |
2214 | memorystatus_freeze_degradation = FALSE; | |
c3c9b80d A |
2215 | memorystatus_freezer_stats.mfs_shared_pages_skipped = 0; |
2216 | /* | |
2217 | * Reset the thawed percentage to 0 so we re-evaluate in the new interval. | |
2218 | */ | |
2219 | os_atomic_store(&memorystatus_freezer_stats.mfs_processes_thawed, 0, release); | |
2220 | os_atomic_store(&memorystatus_freezer_stats.mfs_processes_frozen, memorystatus_frozen_count, release); | |
2221 | os_atomic_inc(&memorystatus_freeze_current_interval, release); | |
ea3f0419 A |
2222 | } |
2223 | ||
2224 | #if DEVELOPMENT || DEBUG | |
2225 | ||
2226 | static int | |
2227 | sysctl_memorystatus_freeze_calculate_new_budget SYSCTL_HANDLER_ARGS | |
2228 | { | |
2229 | #pragma unused(arg1, arg2) | |
2230 | int error = 0; | |
2231 | unsigned int time_since_last_interval_expired_sec = 0; | |
2232 | unsigned int new_budget; | |
2233 | ||
2234 | error = sysctl_handle_int(oidp, &time_since_last_interval_expired_sec, 0, req); | |
2235 | if (error || !req->newptr) { | |
2236 | return error; | |
2237 | } | |
2238 | new_budget = memorystatus_freeze_calculate_new_budget(time_since_last_interval_expired_sec, 1, NORMAL_WINDOW_MINS, 0); | |
2239 | return copyout(&new_budget, req->oldptr, MIN(sizeof(req->oldlen), sizeof(new_budget))); | |
2240 | } | |
2241 | ||
2242 | SYSCTL_PROC(_vm, OID_AUTO, memorystatus_freeze_calculate_new_budget, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MASKED, | |
2243 | 0, 0, &sysctl_memorystatus_freeze_calculate_new_budget, "I", ""); | |
2244 | ||
2245 | #endif /* DEVELOPMENT || DEBUG */ | |
cb323159 | 2246 | |
2a1bd2d3 A |
2247 | /* |
2248 | * Called when we first run out of budget in an interval. | |
2249 | * Marks idle processes as not frozen due to lack of budget. | |
2250 | * NB: It might be worth having a CA event here. | |
2251 | */ | |
2252 | static void | |
2253 | memorystatus_freeze_out_of_budget(const struct throttle_interval_t *interval) | |
2254 | { | |
2255 | LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED); | |
c3c9b80d | 2256 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED); |
2a1bd2d3 A |
2257 | |
2258 | mach_timespec_t time_left = {0, 0}; | |
2259 | mach_timespec_t now_ts; | |
2260 | clock_sec_t sec; | |
2261 | clock_nsec_t nsec; | |
2262 | ||
2263 | time_left.tv_sec = interval->ts.tv_sec; | |
2264 | time_left.tv_nsec = 0; | |
2265 | clock_get_system_nanotime(&sec, &nsec); | |
2266 | now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX)); | |
2267 | now_ts.tv_nsec = nsec; | |
2268 | ||
2269 | SUB_MACH_TIMESPEC(&time_left, &now_ts); | |
2270 | os_log(OS_LOG_DEFAULT, | |
2271 | "memorystatus_freeze: Out of NAND write budget with %u minutes left in the current freezer interval. %u procs are frozen.\n", | |
2272 | time_left.tv_sec / 60, memorystatus_frozen_count); | |
2273 | ||
2274 | memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfBudget, false); | |
2275 | } | |
2276 | ||
2277 | /* | |
2278 | * Called when we cross over the threshold of maximum frozen processes allowed. | |
2279 | * Marks remaining idle processes as not frozen due to lack of slots. | |
2280 | */ | |
2281 | static void | |
2282 | memorystatus_freeze_out_of_slots(void) | |
2283 | { | |
2284 | LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED); | |
c3c9b80d | 2285 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_OWNED); |
2a1bd2d3 A |
2286 | assert(memorystatus_frozen_count == memorystatus_frozen_processes_max); |
2287 | ||
2288 | os_log(OS_LOG_DEFAULT, | |
2289 | "memorystatus_freeze: Out of slots in the freezer. %u procs are frozen.\n", | |
2290 | memorystatus_frozen_count); | |
2291 | ||
2292 | memorystatus_freeze_mark_eligible_processes_with_skip_reason(kMemorystatusFreezeSkipReasonOutOfSlots, true); | |
2293 | } | |
2294 | ||
cb323159 A |
2295 | /* |
2296 | * This function will do 4 things: | |
2297 | * | |
2298 | * 1) check to see if we are currently in a degraded freezer mode, and if so: | |
2299 | * - check to see if our window has expired and we should exit this mode, OR, | |
2300 | * - return a budget based on the degraded throttle window's max. pageouts vs current pageouts. | |
2301 | * | |
2302 | * 2) check to see if we are in a NEW normal window and update the normal throttle window's params. | |
2303 | * | |
2304 | * 3) check what the current normal window allows for a budget. | |
2305 | * | |
2306 | * 4) calculate the current rate of pageouts for DEGRADED_WINDOW_MINS duration. If that rate is below | |
2307 | * what we would normally expect, then we are running low on our daily budget and need to enter | |
2308 | * degraded perf. mode. | |
ea3f0419 A |
2309 | * |
2310 | * Caller must hold the freezer mutex | |
2311 | * Caller must not hold the proc_list lock | |
cb323159 A |
2312 | */ |
2313 | ||
2314 | static void | |
2315 | memorystatus_freeze_update_throttle(uint64_t *budget_pages_allowed) | |
2316 | { | |
2317 | clock_sec_t sec; | |
2318 | clock_nsec_t nsec; | |
ea3f0419 A |
2319 | mach_timespec_t now_ts; |
2320 | LCK_MTX_ASSERT(&freezer_mutex, LCK_MTX_ASSERT_OWNED); | |
c3c9b80d | 2321 | LCK_MTX_ASSERT(&proc_list_mlock, LCK_MTX_ASSERT_NOTOWNED); |
cb323159 A |
2322 | |
2323 | unsigned int freeze_daily_pageouts_max = 0; | |
f427ee49 | 2324 | uint32_t budget_rollover = 0; |
2a1bd2d3 | 2325 | bool started_with_budget = (*budget_pages_allowed > 0); |
cb323159 A |
2326 | |
2327 | #if DEVELOPMENT || DEBUG | |
2328 | if (!memorystatus_freeze_throttle_enabled) { | |
2329 | /* | |
2330 | * No throttling...we can use the full budget everytime. | |
2331 | */ | |
2332 | *budget_pages_allowed = UINT64_MAX; | |
2333 | return; | |
2334 | } | |
2335 | #endif | |
2336 | ||
2337 | clock_get_system_nanotime(&sec, &nsec); | |
f427ee49 | 2338 | now_ts.tv_sec = (unsigned int)(MIN(sec, UINT32_MAX)); |
ea3f0419 | 2339 | now_ts.tv_nsec = nsec; |
cb323159 A |
2340 | |
2341 | struct throttle_interval_t *interval = NULL; | |
2342 | ||
2343 | if (memorystatus_freeze_degradation == TRUE) { | |
2344 | interval = degraded_throttle_window; | |
2345 | ||
ea3f0419 | 2346 | if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) { |
cb323159 A |
2347 | interval->pageouts = 0; |
2348 | interval->max_pageouts = 0; | |
2349 | } else { | |
2350 | *budget_pages_allowed = interval->max_pageouts - interval->pageouts; | |
2351 | } | |
2352 | } | |
2353 | ||
2354 | interval = normal_throttle_window; | |
2355 | ||
ea3f0419 A |
2356 | if (CMP_MACH_TIMESPEC(&now_ts, &interval->ts) >= 0) { |
2357 | /* How long has it been since the previous interval expired? */ | |
2358 | mach_timespec_t expiration_period_ts = now_ts; | |
2359 | SUB_MACH_TIMESPEC(&expiration_period_ts, &interval->ts); | |
f427ee49 A |
2360 | /* Get unused budget. Clamp to 0. We'll adjust for overused budget in the next interval. */ |
2361 | budget_rollover = interval->pageouts > interval->max_pageouts ? | |
2362 | 0 : interval->max_pageouts - interval->pageouts; | |
2363 | ||
2364 | memorystatus_freeze_start_normal_throttle_interval(memorystatus_freeze_calculate_new_budget( | |
2365 | expiration_period_ts.tv_sec, interval->burst_multiple, | |
2366 | interval->mins, budget_rollover), | |
2367 | now_ts); | |
cb323159 A |
2368 | *budget_pages_allowed = interval->max_pageouts; |
2369 | ||
2370 | memorystatus_demote_frozen_processes(FALSE); /* normal mode...don't force a demotion */ | |
2371 | } else { | |
2372 | /* | |
2373 | * Current throttle window. | |
2374 | * Deny freezing if we have no budget left. | |
2375 | * Try graceful degradation if we are within 25% of: | |
2376 | * - the daily budget, and | |
2377 | * - the current budget left is below our normal budget expectations. | |
2378 | */ | |
2379 | ||
cb323159 A |
2380 | if (memorystatus_freeze_degradation == FALSE) { |
2381 | if (interval->pageouts >= interval->max_pageouts) { | |
2382 | *budget_pages_allowed = 0; | |
2a1bd2d3 A |
2383 | if (started_with_budget) { |
2384 | memorystatus_freeze_out_of_budget(interval); | |
2385 | } | |
cb323159 A |
2386 | } else { |
2387 | int budget_left = interval->max_pageouts - interval->pageouts; | |
2388 | int budget_threshold = (freeze_daily_pageouts_max * FREEZE_DEGRADATION_BUDGET_THRESHOLD) / 100; | |
2389 | ||
2390 | mach_timespec_t time_left = {0, 0}; | |
2391 | ||
2392 | time_left.tv_sec = interval->ts.tv_sec; | |
2393 | time_left.tv_nsec = 0; | |
2394 | ||
ea3f0419 | 2395 | SUB_MACH_TIMESPEC(&time_left, &now_ts); |
cb323159 A |
2396 | |
2397 | if (budget_left <= budget_threshold) { | |
2398 | /* | |
2399 | * For the current normal window, calculate how much we would pageout in a DEGRADED_WINDOW_MINS duration. | |
2400 | * And also calculate what we would pageout for the same DEGRADED_WINDOW_MINS duration if we had the full | |
2401 | * daily pageout budget. | |
2402 | */ | |
2403 | ||
2404 | unsigned int current_budget_rate_allowed = ((budget_left / time_left.tv_sec) / 60) * DEGRADED_WINDOW_MINS; | |
2405 | unsigned int normal_budget_rate_allowed = (freeze_daily_pageouts_max / NORMAL_WINDOW_MINS) * DEGRADED_WINDOW_MINS; | |
2406 | ||
2407 | /* | |
2408 | * The current rate of pageouts is below what we would expect for | |
2409 | * the normal rate i.e. we have below normal budget left and so... | |
2410 | */ | |
2411 | ||
2412 | if (current_budget_rate_allowed < normal_budget_rate_allowed) { | |
2413 | memorystatus_freeze_degradation = TRUE; | |
2414 | degraded_throttle_window->max_pageouts = current_budget_rate_allowed; | |
2415 | degraded_throttle_window->pageouts = 0; | |
2416 | ||
2417 | /* | |
2418 | * Switch over to the degraded throttle window so the budget | |
2419 | * doled out is based on that window. | |
2420 | */ | |
2421 | interval = degraded_throttle_window; | |
2422 | } | |
2423 | } | |
2424 | ||
2425 | *budget_pages_allowed = interval->max_pageouts - interval->pageouts; | |
2426 | } | |
2427 | } | |
2428 | } | |
2429 | ||
2430 | MEMORYSTATUS_DEBUG(1, "memorystatus_freeze_update_throttle_interval: throttle updated - %d frozen (%d max) within %dm; %dm remaining; throttle %s\n", | |
ea3f0419 | 2431 | interval->pageouts, interval->max_pageouts, interval->mins, (interval->ts.tv_sec - now_ts->tv_sec) / 60, |
cb323159 A |
2432 | interval->throttle ? "on" : "off"); |
2433 | } | |
2434 | ||
2435 | static void | |
2436 | memorystatus_freeze_thread(void *param __unused, wait_result_t wr __unused) | |
2437 | { | |
2438 | static boolean_t memorystatus_freeze_swap_low = FALSE; | |
2439 | ||
2440 | lck_mtx_lock(&freezer_mutex); | |
2441 | ||
2442 | if (memorystatus_freeze_enabled) { | |
2443 | if ((memorystatus_frozen_count < memorystatus_frozen_processes_max) || | |
2444 | (memorystatus_refreeze_eligible_count >= MIN_THAW_REFREEZE_THRESHOLD)) { | |
2445 | if (memorystatus_can_freeze(&memorystatus_freeze_swap_low)) { | |
2446 | /* Only freeze if we've not exceeded our pageout budgets.*/ | |
2447 | memorystatus_freeze_update_throttle(&memorystatus_freeze_budget_pages_remaining); | |
2448 | ||
2449 | if (memorystatus_freeze_budget_pages_remaining) { | |
2450 | memorystatus_freeze_top_process(); | |
2451 | } else { | |
2452 | memorystatus_demote_frozen_processes(TRUE); /* urgent mode..force one demotion */ | |
2453 | } | |
2454 | } | |
2455 | } | |
2456 | } | |
2457 | ||
2458 | /* | |
2a1bd2d3 A |
2459 | * Give applications currently in the aging band a chance to age out into the idle band before |
2460 | * running the freezer again. | |
cb323159 A |
2461 | */ |
2462 | memorystatus_freezer_thread_next_run_ts = mach_absolute_time() + memorystatus_apps_idle_delay_time; | |
2463 | ||
2464 | assert_wait((event_t) &memorystatus_freeze_wakeup, THREAD_UNINT); | |
2465 | lck_mtx_unlock(&freezer_mutex); | |
2466 | ||
2467 | thread_block((thread_continue_t) memorystatus_freeze_thread); | |
2468 | } | |
2469 | ||
2470 | boolean_t | |
2471 | memorystatus_freeze_thread_should_run(void) | |
2472 | { | |
2473 | /* | |
2474 | * No freezer_mutex held here...see why near call-site | |
2475 | * within memorystatus_pages_update(). | |
2476 | */ | |
2477 | ||
2478 | boolean_t should_run = FALSE; | |
2479 | ||
2480 | if (memorystatus_freeze_enabled == FALSE) { | |
2481 | goto out; | |
2482 | } | |
2483 | ||
2484 | if (memorystatus_available_pages > memorystatus_freeze_threshold) { | |
2485 | goto out; | |
2486 | } | |
2487 | ||
ea3f0419 A |
2488 | memorystatus_freezer_stats.mfs_below_threshold_count++; |
2489 | ||
2490 | if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) { | |
2491 | /* | |
2492 | * Consider this as a skip even if we wake up to refreeze because | |
2493 | * we won't freeze any new procs. | |
2494 | */ | |
2495 | memorystatus_freezer_stats.mfs_skipped_full_count++; | |
2496 | if (memorystatus_refreeze_eligible_count < MIN_THAW_REFREEZE_THRESHOLD) { | |
2497 | goto out; | |
2498 | } | |
cb323159 A |
2499 | } |
2500 | ||
2501 | if (memorystatus_frozen_shared_mb_max && (memorystatus_frozen_shared_mb >= memorystatus_frozen_shared_mb_max)) { | |
ea3f0419 | 2502 | memorystatus_freezer_stats.mfs_skipped_shared_mb_high_count++; |
cb323159 A |
2503 | goto out; |
2504 | } | |
2505 | ||
2506 | uint64_t curr_time = mach_absolute_time(); | |
2507 | ||
2508 | if (curr_time < memorystatus_freezer_thread_next_run_ts) { | |
2509 | goto out; | |
2510 | } | |
2511 | ||
2512 | should_run = TRUE; | |
2513 | ||
2514 | out: | |
2515 | return should_run; | |
2516 | } | |
2517 | ||
2518 | int | |
2519 | memorystatus_get_process_is_freezable(pid_t pid, int *is_freezable) | |
2520 | { | |
2521 | proc_t p = PROC_NULL; | |
2522 | ||
2523 | if (pid == 0) { | |
2524 | return EINVAL; | |
2525 | } | |
2526 | ||
2527 | p = proc_find(pid); | |
2528 | if (!p) { | |
2529 | return ESRCH; | |
2530 | } | |
2531 | ||
2532 | /* | |
2533 | * Only allow this on the current proc for now. | |
2534 | * We can check for privileges and allow targeting another process in the future. | |
2535 | */ | |
2536 | if (p != current_proc()) { | |
2537 | proc_rele(p); | |
2538 | return EPERM; | |
2539 | } | |
2540 | ||
2541 | proc_list_lock(); | |
2542 | *is_freezable = ((p->p_memstat_state & P_MEMSTAT_FREEZE_DISABLED) ? 0 : 1); | |
2543 | proc_rele_locked(p); | |
2544 | proc_list_unlock(); | |
2545 | ||
2546 | return 0; | |
2547 | } | |
2548 | ||
2a1bd2d3 A |
2549 | errno_t |
2550 | memorystatus_get_process_is_frozen(pid_t pid, int *is_frozen) | |
2551 | { | |
2552 | proc_t p = PROC_NULL; | |
2553 | ||
2554 | if (pid == 0) { | |
2555 | return EINVAL; | |
2556 | } | |
2557 | ||
2558 | /* | |
2559 | * Only allow this on the current proc for now. | |
2560 | * We can check for privileges and allow targeting another process in the future. | |
2561 | */ | |
2562 | p = current_proc(); | |
2563 | if (p->p_pid != pid) { | |
2564 | return EPERM; | |
2565 | } | |
2566 | ||
2567 | proc_list_lock(); | |
2568 | *is_frozen = (p->p_memstat_state & P_MEMSTAT_FROZEN) != 0; | |
2569 | proc_list_unlock(); | |
2570 | ||
2571 | return 0; | |
2572 | } | |
2573 | ||
cb323159 A |
2574 | int |
2575 | memorystatus_set_process_is_freezable(pid_t pid, boolean_t is_freezable) | |
2576 | { | |
2577 | proc_t p = PROC_NULL; | |
2578 | ||
2579 | if (pid == 0) { | |
2580 | return EINVAL; | |
2581 | } | |
2582 | ||
2583 | /* | |
2584 | * To enable freezable status, you need to be root or an entitlement. | |
2585 | */ | |
2586 | if (is_freezable && | |
2587 | !kauth_cred_issuser(kauth_cred_get()) && | |
2588 | !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) { | |
2589 | return EPERM; | |
2590 | } | |
2591 | ||
2592 | p = proc_find(pid); | |
2593 | if (!p) { | |
2594 | return ESRCH; | |
2595 | } | |
2596 | ||
2597 | /* | |
2598 | * A process can change its own status. A coalition leader can | |
2599 | * change the status of coalition members. | |
2600 | */ | |
2601 | if (p != current_proc()) { | |
2602 | coalition_t coal = task_get_coalition(proc_task(p), COALITION_TYPE_JETSAM); | |
2603 | if (!coalition_is_leader(proc_task(current_proc()), coal)) { | |
2604 | proc_rele(p); | |
2605 | return EPERM; | |
2606 | } | |
2607 | } | |
2608 | ||
2609 | proc_list_lock(); | |
2610 | if (is_freezable == FALSE) { | |
2611 | /* Freeze preference set to FALSE. Set the P_MEMSTAT_FREEZE_DISABLED bit. */ | |
2612 | p->p_memstat_state |= P_MEMSTAT_FREEZE_DISABLED; | |
2613 | printf("memorystatus_set_process_is_freezable: disabling freeze for pid %d [%s]\n", | |
2614 | p->p_pid, (*p->p_name ? p->p_name : "unknown")); | |
2615 | } else { | |
2616 | p->p_memstat_state &= ~P_MEMSTAT_FREEZE_DISABLED; | |
2617 | printf("memorystatus_set_process_is_freezable: enabling freeze for pid %d [%s]\n", | |
2618 | p->p_pid, (*p->p_name ? p->p_name : "unknown")); | |
2619 | } | |
2620 | proc_rele_locked(p); | |
2621 | proc_list_unlock(); | |
2622 | ||
2623 | return 0; | |
2624 | } | |
2625 | ||
2a1bd2d3 A |
2626 | /* |
2627 | * Called when process is created before it is added to a memorystatus bucket. | |
2628 | */ | |
2629 | void | |
2630 | memorystatus_freeze_init_proc(proc_t p) | |
2631 | { | |
2632 | /* NB: Process is not on the memorystatus lists yet so it's safe to modify the skip reason without the freezer mutex. */ | |
2633 | if (memorystatus_freeze_budget_pages_remaining == 0) { | |
2634 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfBudget; | |
2635 | } else if ((memorystatus_frozen_count >= memorystatus_frozen_processes_max)) { | |
2636 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonOutOfSlots; | |
2637 | } else { | |
2638 | p->p_memstat_freeze_skip_reason = kMemorystatusFreezeSkipReasonNone; | |
2639 | } | |
2640 | } | |
2641 | ||
2642 | ||
cb323159 A |
2643 | static int |
2644 | sysctl_memorystatus_do_fastwake_warmup_all SYSCTL_HANDLER_ARGS | |
2645 | { | |
2646 | #pragma unused(oidp, arg1, arg2) | |
2647 | ||
2648 | if (!req->newptr) { | |
2649 | return EINVAL; | |
2650 | } | |
2651 | ||
2652 | /* Need to be root or have entitlement */ | |
2653 | if (!kauth_cred_issuser(kauth_cred_get()) && !IOTaskHasEntitlement(current_task(), MEMORYSTATUS_ENTITLEMENT)) { | |
2654 | return EPERM; | |
2655 | } | |
2656 | ||
2657 | if (memorystatus_freeze_enabled == FALSE) { | |
2658 | return ENOTSUP; | |
2659 | } | |
2660 | ||
2661 | do_fastwake_warmup_all(); | |
2662 | ||
2663 | return 0; | |
2664 | } | |
2665 | ||
2666 | SYSCTL_PROC(_kern, OID_AUTO, memorystatus_do_fastwake_warmup_all, CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_LOCKED | CTLFLAG_MASKED, | |
2667 | 0, 0, &sysctl_memorystatus_do_fastwake_warmup_all, "I", ""); | |
2668 | ||
2669 | #endif /* CONFIG_FREEZE */ |