]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2016 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <kern/policy_internal.h> | |
30 | #include <mach/task_policy.h> | |
31 | ||
32 | #include <mach/mach_types.h> | |
33 | #include <mach/task_server.h> | |
34 | ||
35 | #include <kern/host.h> /* host_priv_self() */ | |
36 | #include <mach/host_priv.h> /* host_get_special_port() */ | |
37 | #include <mach/host_special_ports.h> /* RESOURCE_NOTIFY_PORT */ | |
38 | #include <kern/sched.h> | |
39 | #include <kern/task.h> | |
40 | #include <mach/thread_policy.h> | |
41 | #include <sys/errno.h> | |
42 | #include <sys/resource.h> | |
43 | #include <machine/limits.h> | |
44 | #include <kern/ledger.h> | |
45 | #include <kern/thread_call.h> | |
46 | #include <kern/sfi.h> | |
47 | #include <kern/coalition.h> | |
48 | #if CONFIG_TELEMETRY | |
49 | #include <kern/telemetry.h> | |
50 | #endif | |
51 | ||
52 | #if IMPORTANCE_INHERITANCE | |
53 | #include <ipc/ipc_importance.h> | |
54 | #if IMPORTANCE_DEBUG | |
55 | #include <mach/machine/sdt.h> | |
56 | #endif /* IMPORTANCE_DEBUG */ | |
57 | #endif /* IMPORTANCE_INHERITACE */ | |
58 | ||
59 | #include <sys/kdebug.h> | |
60 | ||
61 | /* | |
62 | * Task Policy | |
63 | * | |
64 | * This subsystem manages task and thread IO priority and backgrounding, | |
65 | * as well as importance inheritance, process suppression, task QoS, and apptype. | |
66 | * These properties have a suprising number of complex interactions, so they are | |
67 | * centralized here in one state machine to simplify the implementation of those interactions. | |
68 | * | |
69 | * Architecture: | |
70 | * Threads and tasks have two policy fields: requested, effective. | |
71 | * Requested represents the wishes of each interface that influences task policy. | |
72 | * Effective represents the distillation of that policy into a set of behaviors. | |
73 | * | |
74 | * Each thread making a modification in the policy system passes a 'pending' struct, | |
75 | * which tracks updates that will be applied after dropping the policy engine lock. | |
76 | * | |
77 | * Each interface that has an input into the task policy state machine controls a field in requested. | |
78 | * If the interface has a getter, it returns what is in the field in requested, but that is | |
79 | * not necessarily what is actually in effect. | |
80 | * | |
81 | * All kernel subsystems that behave differently based on task policy call into | |
82 | * the proc_get_effective_(task|thread)_policy functions, which return the decision of the task policy state machine | |
83 | * for that subsystem by querying only the 'effective' field. | |
84 | * | |
85 | * Policy change operations: | |
86 | * Here are the steps to change a policy on a task or thread: | |
87 | * 1) Lock task | |
88 | * 2) Change requested field for the relevant policy | |
89 | * 3) Run a task policy update, which recalculates effective based on requested, | |
90 | * then takes a diff between the old and new versions of requested and calls the relevant | |
91 | * other subsystems to apply these changes, and updates the pending field. | |
92 | * 4) Unlock task | |
93 | * 5) Run task policy update complete, which looks at the pending field to update | |
94 | * subsystems which cannot be touched while holding the task lock. | |
95 | * | |
96 | * To add a new requested policy, add the field in the requested struct, the flavor in task.h, | |
97 | * the setter and getter in proc_(set|get)_task_policy*, | |
98 | * then set up the effects of that behavior in task_policy_update*. If the policy manifests | |
99 | * itself as a distinct effective policy, add it to the effective struct and add it to the | |
100 | * proc_get_effective_task_policy accessor. | |
101 | * | |
102 | * Most policies are set via proc_set_task_policy, but policies that don't fit that interface | |
103 | * roll their own lock/set/update/unlock/complete code inside this file. | |
104 | * | |
105 | * | |
106 | * Suppression policy | |
107 | * | |
108 | * These are a set of behaviors that can be requested for a task. They currently have specific | |
109 | * implied actions when they're enabled, but they may be made customizable in the future. | |
110 | * | |
111 | * When the affected task is boosted, we temporarily disable the suppression behaviors | |
112 | * so that the affected process has a chance to run so it can call the API to permanently | |
113 | * disable the suppression behaviors. | |
114 | * | |
115 | * Locking | |
116 | * | |
117 | * Changing task policy on a task takes the task lock. | |
118 | * Changing task policy on a thread takes the thread mutex. | |
119 | * Task policy changes that affect threads will take each thread's mutex to update it if necessary. | |
120 | * | |
121 | * Querying the effective policy does not take a lock, because callers | |
122 | * may run in interrupt context or other place where locks are not OK. | |
123 | * | |
124 | * This means that any notification of state change needs to be externally synchronized. | |
125 | * We do this by idempotent callouts after the state has changed to ask | |
126 | * other subsystems to update their view of the world. | |
127 | * | |
128 | * TODO: Move all cpu/wakes/io monitor code into a separate file | |
129 | * TODO: Move all importance code over to importance subsystem | |
130 | * TODO: Move all taskwatch code into a separate file | |
131 | * TODO: Move all VM importance code into a separate file | |
132 | */ | |
133 | ||
134 | /* Task policy related helper functions */ | |
135 | static void proc_set_task_policy_locked(task_t task, int category, int flavor, int value, int value2); | |
136 | ||
137 | static void task_policy_update_locked(task_t task, task_pend_token_t pend_token); | |
138 | static void task_policy_update_internal_locked(task_t task, boolean_t in_create, task_pend_token_t pend_token); | |
139 | ||
140 | /* For attributes that have two scalars as input/output */ | |
141 | static void proc_set_task_policy2(task_t task, int category, int flavor, int value1, int value2); | |
142 | static void proc_get_task_policy2(task_t task, int category, int flavor, int *value1, int *value2); | |
143 | ||
144 | #if CONFIG_SCHED_SFI | |
145 | static boolean_t task_policy_update_coalition_focal_tasks(task_t task, int prev_role, int next_role); | |
146 | #endif | |
147 | ||
148 | static uint64_t task_requested_bitfield(task_t task); | |
149 | static uint64_t task_effective_bitfield(task_t task); | |
150 | ||
151 | /* Convenience functions for munging a policy bitfield into a tracepoint */ | |
152 | static uintptr_t trequested_0(task_t task); | |
153 | static uintptr_t trequested_1(task_t task); | |
154 | static uintptr_t teffective_0(task_t task); | |
155 | static uintptr_t teffective_1(task_t task); | |
156 | ||
157 | /* CPU limits helper functions */ | |
158 | static int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled); | |
159 | static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope); | |
160 | static int task_enable_cpumon_locked(task_t task); | |
161 | static int task_disable_cpumon(task_t task); | |
162 | static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled); | |
163 | static int task_apply_resource_actions(task_t task, int type); | |
164 | static void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1); | |
165 | ||
166 | #ifdef MACH_BSD | |
167 | typedef struct proc * proc_t; | |
168 | int proc_pid(void *proc); | |
169 | extern int proc_selfpid(void); | |
170 | extern char * proc_name_address(void *p); | |
171 | extern char * proc_best_name(proc_t proc); | |
172 | ||
173 | extern int proc_pidpathinfo_internal(proc_t p, uint64_t arg, | |
174 | char *buffer, uint32_t buffersize, | |
175 | int32_t *retval); | |
176 | #endif /* MACH_BSD */ | |
177 | ||
178 | ||
179 | ||
180 | /* Importance Inheritance related helper functions */ | |
181 | ||
182 | #if IMPORTANCE_INHERITANCE | |
183 | ||
184 | static void task_importance_mark_live_donor(task_t task, boolean_t donating); | |
185 | static void task_importance_mark_receiver(task_t task, boolean_t receiving); | |
186 | static void task_importance_mark_denap_receiver(task_t task, boolean_t denap); | |
187 | ||
188 | static boolean_t task_is_marked_live_importance_donor(task_t task); | |
189 | static boolean_t task_is_importance_receiver(task_t task); | |
190 | static boolean_t task_is_importance_denap_receiver(task_t task); | |
191 | ||
192 | static int task_importance_hold_internal_assertion(task_t target_task, uint32_t count); | |
193 | ||
194 | static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp); | |
195 | static void task_importance_update_live_donor(task_t target_task); | |
196 | ||
197 | static void task_set_boost_locked(task_t task, boolean_t boost_active); | |
198 | ||
199 | #endif /* IMPORTANCE_INHERITANCE */ | |
200 | ||
201 | #if IMPORTANCE_DEBUG | |
202 | #define __impdebug_only | |
203 | #else | |
204 | #define __impdebug_only __unused | |
205 | #endif | |
206 | ||
207 | #if IMPORTANCE_INHERITANCE | |
208 | #define __imp_only | |
209 | #else | |
210 | #define __imp_only __unused | |
211 | #endif | |
212 | ||
213 | /* | |
214 | * Default parameters for certain policies | |
215 | */ | |
216 | ||
217 | int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1; | |
218 | int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1; | |
219 | int proc_tal_disk_tier = THROTTLE_LEVEL_TIER1; | |
220 | ||
221 | int proc_graphics_timer_qos = (LATENCY_QOS_TIER_0 & 0xFF); | |
222 | ||
223 | const int proc_default_bg_iotier = THROTTLE_LEVEL_TIER2; | |
224 | ||
225 | /* Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation */ | |
226 | const struct task_requested_policy default_task_requested_policy = { | |
227 | .trp_bg_iotier = proc_default_bg_iotier | |
228 | }; | |
229 | const struct task_effective_policy default_task_effective_policy = {}; | |
230 | ||
231 | /* | |
232 | * Default parameters for CPU usage monitor. | |
233 | * | |
234 | * Default setting is 50% over 3 minutes. | |
235 | */ | |
236 | #define DEFAULT_CPUMON_PERCENTAGE 50 | |
237 | #define DEFAULT_CPUMON_INTERVAL (3 * 60) | |
238 | ||
239 | uint8_t proc_max_cpumon_percentage; | |
240 | uint64_t proc_max_cpumon_interval; | |
241 | ||
242 | ||
243 | kern_return_t | |
244 | qos_latency_policy_validate(task_latency_qos_t ltier) { | |
245 | if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) && | |
246 | ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0))) | |
247 | return KERN_INVALID_ARGUMENT; | |
248 | ||
249 | return KERN_SUCCESS; | |
250 | } | |
251 | ||
252 | kern_return_t | |
253 | qos_throughput_policy_validate(task_throughput_qos_t ttier) { | |
254 | if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) && | |
255 | ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0))) | |
256 | return KERN_INVALID_ARGUMENT; | |
257 | ||
258 | return KERN_SUCCESS; | |
259 | } | |
260 | ||
261 | static kern_return_t | |
262 | task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count) { | |
263 | if (count < TASK_QOS_POLICY_COUNT) | |
264 | return KERN_INVALID_ARGUMENT; | |
265 | ||
266 | task_latency_qos_t ltier = qosinfo->task_latency_qos_tier; | |
267 | task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier; | |
268 | ||
269 | kern_return_t kr = qos_latency_policy_validate(ltier); | |
270 | ||
271 | if (kr != KERN_SUCCESS) | |
272 | return kr; | |
273 | ||
274 | kr = qos_throughput_policy_validate(ttier); | |
275 | ||
276 | return kr; | |
277 | } | |
278 | ||
279 | uint32_t | |
280 | qos_extract(uint32_t qv) { | |
281 | return (qv & 0xFF); | |
282 | } | |
283 | ||
284 | uint32_t | |
285 | qos_latency_policy_package(uint32_t qv) { | |
286 | return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv); | |
287 | } | |
288 | ||
289 | uint32_t | |
290 | qos_throughput_policy_package(uint32_t qv) { | |
291 | return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv); | |
292 | } | |
293 | ||
294 | /* TEMPORARY boot-arg controlling task_policy suppression (App Nap) */ | |
295 | static boolean_t task_policy_suppression_disable = FALSE; | |
296 | ||
297 | kern_return_t | |
298 | task_policy_set( | |
299 | task_t task, | |
300 | task_policy_flavor_t flavor, | |
301 | task_policy_t policy_info, | |
302 | mach_msg_type_number_t count) | |
303 | { | |
304 | kern_return_t result = KERN_SUCCESS; | |
305 | ||
306 | if (task == TASK_NULL || task == kernel_task) | |
307 | return (KERN_INVALID_ARGUMENT); | |
308 | ||
309 | switch (flavor) { | |
310 | ||
311 | case TASK_CATEGORY_POLICY: { | |
312 | task_category_policy_t info = (task_category_policy_t)policy_info; | |
313 | ||
314 | if (count < TASK_CATEGORY_POLICY_COUNT) | |
315 | return (KERN_INVALID_ARGUMENT); | |
316 | ||
317 | ||
318 | switch(info->role) { | |
319 | case TASK_FOREGROUND_APPLICATION: | |
320 | case TASK_BACKGROUND_APPLICATION: | |
321 | case TASK_DEFAULT_APPLICATION: | |
322 | proc_set_task_policy(task, | |
323 | TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, | |
324 | info->role); | |
325 | break; | |
326 | ||
327 | case TASK_CONTROL_APPLICATION: | |
328 | if (task != current_task() || task->sec_token.val[0] != 0) | |
329 | result = KERN_INVALID_ARGUMENT; | |
330 | else | |
331 | proc_set_task_policy(task, | |
332 | TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, | |
333 | info->role); | |
334 | break; | |
335 | ||
336 | case TASK_GRAPHICS_SERVER: | |
337 | /* TODO: Restrict this role to FCFS <rdar://problem/12552788> */ | |
338 | if (task != current_task() || task->sec_token.val[0] != 0) | |
339 | result = KERN_INVALID_ARGUMENT; | |
340 | else | |
341 | proc_set_task_policy(task, | |
342 | TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, | |
343 | info->role); | |
344 | break; | |
345 | default: | |
346 | result = KERN_INVALID_ARGUMENT; | |
347 | break; | |
348 | } /* switch (info->role) */ | |
349 | ||
350 | break; | |
351 | } | |
352 | ||
353 | /* Desired energy-efficiency/performance "quality-of-service" */ | |
354 | case TASK_BASE_QOS_POLICY: | |
355 | case TASK_OVERRIDE_QOS_POLICY: | |
356 | { | |
357 | task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; | |
358 | kern_return_t kr = task_qos_policy_validate(qosinfo, count); | |
359 | ||
360 | if (kr != KERN_SUCCESS) | |
361 | return kr; | |
362 | ||
363 | ||
364 | uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier); | |
365 | uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier); | |
366 | ||
367 | proc_set_task_policy2(task, TASK_POLICY_ATTRIBUTE, | |
368 | flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, | |
369 | lqos, tqos); | |
370 | } | |
371 | break; | |
372 | ||
373 | case TASK_BASE_LATENCY_QOS_POLICY: | |
374 | { | |
375 | task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; | |
376 | kern_return_t kr = task_qos_policy_validate(qosinfo, count); | |
377 | ||
378 | if (kr != KERN_SUCCESS) | |
379 | return kr; | |
380 | ||
381 | uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier); | |
382 | ||
383 | proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, lqos); | |
384 | } | |
385 | break; | |
386 | ||
387 | case TASK_BASE_THROUGHPUT_QOS_POLICY: | |
388 | { | |
389 | task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; | |
390 | kern_return_t kr = task_qos_policy_validate(qosinfo, count); | |
391 | ||
392 | if (kr != KERN_SUCCESS) | |
393 | return kr; | |
394 | ||
395 | uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier); | |
396 | ||
397 | proc_set_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, tqos); | |
398 | } | |
399 | break; | |
400 | ||
401 | case TASK_SUPPRESSION_POLICY: | |
402 | { | |
403 | ||
404 | task_suppression_policy_t info = (task_suppression_policy_t)policy_info; | |
405 | ||
406 | if (count < TASK_SUPPRESSION_POLICY_COUNT) | |
407 | return (KERN_INVALID_ARGUMENT); | |
408 | ||
409 | struct task_qos_policy qosinfo; | |
410 | ||
411 | qosinfo.task_latency_qos_tier = info->timer_throttle; | |
412 | qosinfo.task_throughput_qos_tier = info->throughput_qos; | |
413 | ||
414 | kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT); | |
415 | ||
416 | if (kr != KERN_SUCCESS) | |
417 | return kr; | |
418 | ||
419 | /* TEMPORARY disablement of task suppression */ | |
420 | if (task_policy_suppression_disable && info->active) | |
421 | return KERN_SUCCESS; | |
422 | ||
423 | struct task_pend_token pend_token = {}; | |
424 | ||
425 | task_lock(task); | |
426 | ||
427 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
428 | (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START, | |
429 | proc_selfpid(), task_pid(task), trequested_0(task), | |
430 | trequested_1(task), 0); | |
431 | ||
432 | task->requested_policy.trp_sup_active = (info->active) ? 1 : 0; | |
433 | task->requested_policy.trp_sup_lowpri_cpu = (info->lowpri_cpu) ? 1 : 0; | |
434 | task->requested_policy.trp_sup_timer = qos_extract(info->timer_throttle); | |
435 | task->requested_policy.trp_sup_disk = (info->disk_throttle) ? 1 : 0; | |
436 | task->requested_policy.trp_sup_throughput = qos_extract(info->throughput_qos); | |
437 | task->requested_policy.trp_sup_cpu = (info->suppressed_cpu) ? 1 : 0; | |
438 | task->requested_policy.trp_sup_bg_sockets = (info->background_sockets) ? 1 : 0; | |
439 | ||
440 | task_policy_update_locked(task, &pend_token); | |
441 | ||
442 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
443 | (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END, | |
444 | proc_selfpid(), task_pid(task), trequested_0(task), | |
445 | trequested_1(task), 0); | |
446 | ||
447 | task_unlock(task); | |
448 | ||
449 | task_policy_update_complete_unlocked(task, &pend_token); | |
450 | ||
451 | break; | |
452 | ||
453 | } | |
454 | ||
455 | default: | |
456 | result = KERN_INVALID_ARGUMENT; | |
457 | break; | |
458 | } | |
459 | ||
460 | return (result); | |
461 | } | |
462 | ||
463 | /* Sets BSD 'nice' value on the task */ | |
464 | kern_return_t | |
465 | task_importance( | |
466 | task_t task, | |
467 | integer_t importance) | |
468 | { | |
469 | if (task == TASK_NULL || task == kernel_task) | |
470 | return (KERN_INVALID_ARGUMENT); | |
471 | ||
472 | task_lock(task); | |
473 | ||
474 | if (!task->active) { | |
475 | task_unlock(task); | |
476 | ||
477 | return (KERN_TERMINATED); | |
478 | } | |
479 | ||
480 | if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) { | |
481 | task_unlock(task); | |
482 | ||
483 | return (KERN_INVALID_ARGUMENT); | |
484 | } | |
485 | ||
486 | task->importance = importance; | |
487 | ||
488 | struct task_pend_token pend_token = {}; | |
489 | ||
490 | task_policy_update_locked(task, &pend_token); | |
491 | ||
492 | task_unlock(task); | |
493 | ||
494 | task_policy_update_complete_unlocked(task, &pend_token); | |
495 | ||
496 | return (KERN_SUCCESS); | |
497 | } | |
498 | ||
499 | kern_return_t | |
500 | task_policy_get( | |
501 | task_t task, | |
502 | task_policy_flavor_t flavor, | |
503 | task_policy_t policy_info, | |
504 | mach_msg_type_number_t *count, | |
505 | boolean_t *get_default) | |
506 | { | |
507 | if (task == TASK_NULL || task == kernel_task) | |
508 | return (KERN_INVALID_ARGUMENT); | |
509 | ||
510 | switch (flavor) { | |
511 | ||
512 | case TASK_CATEGORY_POLICY: | |
513 | { | |
514 | task_category_policy_t info = (task_category_policy_t)policy_info; | |
515 | ||
516 | if (*count < TASK_CATEGORY_POLICY_COUNT) | |
517 | return (KERN_INVALID_ARGUMENT); | |
518 | ||
519 | if (*get_default) | |
520 | info->role = TASK_UNSPECIFIED; | |
521 | else | |
522 | info->role = proc_get_task_policy(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE); | |
523 | break; | |
524 | } | |
525 | ||
526 | case TASK_BASE_QOS_POLICY: /* FALLTHRU */ | |
527 | case TASK_OVERRIDE_QOS_POLICY: | |
528 | { | |
529 | task_qos_policy_t info = (task_qos_policy_t)policy_info; | |
530 | ||
531 | if (*count < TASK_QOS_POLICY_COUNT) | |
532 | return (KERN_INVALID_ARGUMENT); | |
533 | ||
534 | if (*get_default) { | |
535 | info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED; | |
536 | info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED; | |
537 | } else if (flavor == TASK_BASE_QOS_POLICY) { | |
538 | int value1, value2; | |
539 | ||
540 | proc_get_task_policy2(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2); | |
541 | ||
542 | info->task_latency_qos_tier = qos_latency_policy_package(value1); | |
543 | info->task_throughput_qos_tier = qos_throughput_policy_package(value2); | |
544 | ||
545 | } else if (flavor == TASK_OVERRIDE_QOS_POLICY) { | |
546 | int value1, value2; | |
547 | ||
548 | proc_get_task_policy2(task, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2); | |
549 | ||
550 | info->task_latency_qos_tier = qos_latency_policy_package(value1); | |
551 | info->task_throughput_qos_tier = qos_throughput_policy_package(value2); | |
552 | } | |
553 | ||
554 | break; | |
555 | } | |
556 | ||
557 | case TASK_POLICY_STATE: | |
558 | { | |
559 | task_policy_state_t info = (task_policy_state_t)policy_info; | |
560 | ||
561 | if (*count < TASK_POLICY_STATE_COUNT) | |
562 | return (KERN_INVALID_ARGUMENT); | |
563 | ||
564 | /* Only root can get this info */ | |
565 | if (current_task()->sec_token.val[0] != 0) | |
566 | return KERN_PROTECTION_FAILURE; | |
567 | ||
568 | if (*get_default) { | |
569 | info->requested = 0; | |
570 | info->effective = 0; | |
571 | info->pending = 0; | |
572 | info->imp_assertcnt = 0; | |
573 | info->imp_externcnt = 0; | |
574 | info->flags = 0; | |
575 | info->imp_transitions = 0; | |
576 | } else { | |
577 | task_lock(task); | |
578 | ||
579 | info->requested = task_requested_bitfield(task); | |
580 | info->effective = task_effective_bitfield(task); | |
581 | info->pending = 0; | |
582 | ||
583 | info->tps_requested_policy = *(uint64_t*)(&task->requested_policy); | |
584 | info->tps_effective_policy = *(uint64_t*)(&task->effective_policy); | |
585 | ||
586 | info->flags = 0; | |
587 | if (task->task_imp_base != NULL) { | |
588 | info->imp_assertcnt = task->task_imp_base->iit_assertcnt; | |
589 | info->imp_externcnt = IIT_EXTERN(task->task_imp_base); | |
590 | info->flags |= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : 0); | |
591 | info->flags |= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : 0); | |
592 | info->flags |= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : 0); | |
593 | info->flags |= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : 0); | |
594 | info->imp_transitions = task->task_imp_base->iit_transitions; | |
595 | } else { | |
596 | info->imp_assertcnt = 0; | |
597 | info->imp_externcnt = 0; | |
598 | info->imp_transitions = 0; | |
599 | } | |
600 | task_unlock(task); | |
601 | } | |
602 | ||
603 | break; | |
604 | } | |
605 | ||
606 | case TASK_SUPPRESSION_POLICY: | |
607 | { | |
608 | task_suppression_policy_t info = (task_suppression_policy_t)policy_info; | |
609 | ||
610 | if (*count < TASK_SUPPRESSION_POLICY_COUNT) | |
611 | return (KERN_INVALID_ARGUMENT); | |
612 | ||
613 | task_lock(task); | |
614 | ||
615 | if (*get_default) { | |
616 | info->active = 0; | |
617 | info->lowpri_cpu = 0; | |
618 | info->timer_throttle = LATENCY_QOS_TIER_UNSPECIFIED; | |
619 | info->disk_throttle = 0; | |
620 | info->cpu_limit = 0; | |
621 | info->suspend = 0; | |
622 | info->throughput_qos = 0; | |
623 | info->suppressed_cpu = 0; | |
624 | } else { | |
625 | info->active = task->requested_policy.trp_sup_active; | |
626 | info->lowpri_cpu = task->requested_policy.trp_sup_lowpri_cpu; | |
627 | info->timer_throttle = qos_latency_policy_package(task->requested_policy.trp_sup_timer); | |
628 | info->disk_throttle = task->requested_policy.trp_sup_disk; | |
629 | info->cpu_limit = 0; | |
630 | info->suspend = 0; | |
631 | info->throughput_qos = qos_throughput_policy_package(task->requested_policy.trp_sup_throughput); | |
632 | info->suppressed_cpu = task->requested_policy.trp_sup_cpu; | |
633 | info->background_sockets = task->requested_policy.trp_sup_bg_sockets; | |
634 | } | |
635 | ||
636 | task_unlock(task); | |
637 | break; | |
638 | } | |
639 | ||
640 | default: | |
641 | return (KERN_INVALID_ARGUMENT); | |
642 | } | |
643 | ||
644 | return (KERN_SUCCESS); | |
645 | } | |
646 | ||
647 | /* | |
648 | * Called at task creation | |
649 | * We calculate the correct effective but don't apply it to anything yet. | |
650 | * The threads, etc will inherit from the task as they get created. | |
651 | */ | |
652 | void | |
653 | task_policy_create(task_t task, task_t parent_task) | |
654 | { | |
655 | task->requested_policy.trp_apptype = parent_task->requested_policy.trp_apptype; | |
656 | ||
657 | task->requested_policy.trp_int_darwinbg = parent_task->requested_policy.trp_int_darwinbg; | |
658 | task->requested_policy.trp_ext_darwinbg = parent_task->requested_policy.trp_ext_darwinbg; | |
659 | task->requested_policy.trp_int_iotier = parent_task->requested_policy.trp_int_iotier; | |
660 | task->requested_policy.trp_ext_iotier = parent_task->requested_policy.trp_ext_iotier; | |
661 | task->requested_policy.trp_int_iopassive = parent_task->requested_policy.trp_int_iopassive; | |
662 | task->requested_policy.trp_ext_iopassive = parent_task->requested_policy.trp_ext_iopassive; | |
663 | task->requested_policy.trp_bg_iotier = parent_task->requested_policy.trp_bg_iotier; | |
664 | task->requested_policy.trp_terminated = parent_task->requested_policy.trp_terminated; | |
665 | task->requested_policy.trp_qos_clamp = parent_task->requested_policy.trp_qos_clamp; | |
666 | ||
667 | if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) { | |
668 | if (parent_task->requested_policy.trp_boosted) { | |
669 | task->requested_policy.trp_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE; | |
670 | task_importance_mark_donor(task, TRUE); | |
671 | } else { | |
672 | task->requested_policy.trp_apptype = TASK_APPTYPE_DAEMON_BACKGROUND; | |
673 | task_importance_mark_receiver(task, FALSE); | |
674 | } | |
675 | } | |
676 | ||
677 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
678 | (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START, | |
679 | task_pid(task), teffective_0(task), | |
680 | teffective_1(task), task->priority, 0); | |
681 | ||
682 | task_policy_update_internal_locked(task, TRUE, NULL); | |
683 | ||
684 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
685 | (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END, | |
686 | task_pid(task), teffective_0(task), | |
687 | teffective_1(task), task->priority, 0); | |
688 | ||
689 | task_importance_update_live_donor(task); | |
690 | } | |
691 | ||
692 | ||
693 | static void | |
694 | task_policy_update_locked(task_t task, task_pend_token_t pend_token) | |
695 | { | |
696 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
697 | (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_TASK) | DBG_FUNC_START), | |
698 | task_pid(task), teffective_0(task), | |
699 | teffective_1(task), task->priority, 0); | |
700 | ||
701 | task_policy_update_internal_locked(task, FALSE, pend_token); | |
702 | ||
703 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
704 | (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_TASK)) | DBG_FUNC_END, | |
705 | task_pid(task), teffective_0(task), | |
706 | teffective_1(task), task->priority, 0); | |
707 | } | |
708 | ||
709 | /* | |
710 | * One state update function TO RULE THEM ALL | |
711 | * | |
712 | * This function updates the task or thread effective policy fields | |
713 | * and pushes the results to the relevant subsystems. | |
714 | * | |
715 | * Must call update_complete after unlocking the task, | |
716 | * as some subsystems cannot be updated while holding the task lock. | |
717 | * | |
718 | * Called with task locked, not thread | |
719 | */ | |
720 | ||
721 | static void | |
722 | task_policy_update_internal_locked(task_t task, boolean_t in_create, task_pend_token_t pend_token) | |
723 | { | |
724 | /* | |
725 | * Step 1: | |
726 | * Gather requested policy | |
727 | */ | |
728 | ||
729 | struct task_requested_policy requested = task->requested_policy; | |
730 | ||
731 | /* | |
732 | * Step 2: | |
733 | * Calculate new effective policies from requested policy and task state | |
734 | * Rules: | |
735 | * Don't change requested, it won't take effect | |
736 | */ | |
737 | ||
738 | struct task_effective_policy next = {}; | |
739 | ||
740 | /* Update task role */ | |
741 | next.tep_role = requested.trp_role; | |
742 | ||
743 | /* Set task qos clamp and ceiling */ | |
744 | next.tep_qos_clamp = requested.trp_qos_clamp; | |
745 | ||
746 | if (requested.trp_apptype == TASK_APPTYPE_APP_DEFAULT || | |
747 | requested.trp_apptype == TASK_APPTYPE_APP_TAL) { | |
748 | ||
749 | switch (next.tep_role) { | |
750 | case TASK_FOREGROUND_APPLICATION: | |
751 | /* Foreground apps get urgent scheduler priority */ | |
752 | next.tep_qos_ui_is_urgent = 1; | |
753 | next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED; | |
754 | break; | |
755 | ||
756 | case TASK_BACKGROUND_APPLICATION: | |
757 | /* This is really 'non-focal but on-screen' */ | |
758 | next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED; | |
759 | break; | |
760 | ||
761 | case TASK_DEFAULT_APPLICATION: | |
762 | /* This is 'may render UI but we don't know if it's focal/nonfocal' */ | |
763 | next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED; | |
764 | break; | |
765 | ||
766 | case TASK_NONUI_APPLICATION: | |
767 | /* i.e. 'off-screen' */ | |
768 | next.tep_qos_ceiling = THREAD_QOS_LEGACY; | |
769 | break; | |
770 | ||
771 | case TASK_CONTROL_APPLICATION: | |
772 | case TASK_GRAPHICS_SERVER: | |
773 | next.tep_qos_ui_is_urgent = 1; | |
774 | next.tep_qos_ceiling = THREAD_QOS_UNSPECIFIED; | |
775 | break; | |
776 | ||
777 | case TASK_THROTTLE_APPLICATION: | |
778 | /* i.e. 'TAL launch' */ | |
779 | next.tep_qos_ceiling = THREAD_QOS_UTILITY; | |
780 | break; | |
781 | ||
782 | case TASK_UNSPECIFIED: | |
783 | default: | |
784 | /* Apps that don't have an application role get | |
785 | * USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */ | |
786 | next.tep_qos_ceiling = THREAD_QOS_LEGACY; | |
787 | break; | |
788 | } | |
789 | } else { | |
790 | /* Daemons get USER_INTERACTIVE squashed to USER_INITIATED */ | |
791 | next.tep_qos_ceiling = THREAD_QOS_USER_INITIATED; | |
792 | } | |
793 | ||
794 | /* Calculate DARWIN_BG */ | |
795 | boolean_t wants_darwinbg = FALSE; | |
796 | boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */ | |
797 | boolean_t wants_watchersbg = FALSE; /* Do I want my pidbound threads to be bg */ | |
798 | ||
799 | /* | |
800 | * If DARWIN_BG has been requested at either level, it's engaged. | |
801 | * Only true DARWIN_BG changes cause watchers to transition. | |
802 | * | |
803 | * Backgrounding due to apptype does. | |
804 | */ | |
805 | if (requested.trp_int_darwinbg || requested.trp_ext_darwinbg) | |
806 | wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE; | |
807 | ||
808 | /* Background TAL apps are throttled when TAL is enabled */ | |
809 | if (requested.trp_apptype == TASK_APPTYPE_APP_TAL && | |
810 | requested.trp_role == TASK_BACKGROUND_APPLICATION && | |
811 | requested.trp_tal_enabled == 1) { | |
812 | next.tep_tal_engaged = 1; | |
813 | } | |
814 | ||
815 | if ((requested.trp_apptype == TASK_APPTYPE_APP_DEFAULT || | |
816 | requested.trp_apptype == TASK_APPTYPE_APP_TAL) && | |
817 | requested.trp_role == TASK_THROTTLE_APPLICATION) { | |
818 | next.tep_tal_engaged = 1; | |
819 | } | |
820 | ||
821 | /* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */ | |
822 | if (requested.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && | |
823 | requested.trp_boosted == 0) | |
824 | wants_darwinbg = TRUE; | |
825 | ||
826 | /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */ | |
827 | if (requested.trp_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) | |
828 | wants_darwinbg = TRUE; | |
829 | ||
830 | if (next.tep_qos_clamp == THREAD_QOS_BACKGROUND || next.tep_qos_clamp == THREAD_QOS_MAINTENANCE) | |
831 | wants_darwinbg = TRUE; | |
832 | ||
833 | /* Calculate side effects of DARWIN_BG */ | |
834 | ||
835 | if (wants_darwinbg) { | |
836 | next.tep_darwinbg = 1; | |
837 | /* darwinbg tasks always create bg sockets, but we don't always loop over all sockets */ | |
838 | next.tep_new_sockets_bg = 1; | |
839 | next.tep_lowpri_cpu = 1; | |
840 | } | |
841 | ||
842 | if (wants_all_sockets_bg) | |
843 | next.tep_all_sockets_bg = 1; | |
844 | ||
845 | if (wants_watchersbg) | |
846 | next.tep_watchers_bg = 1; | |
847 | ||
848 | /* Calculate low CPU priority */ | |
849 | ||
850 | boolean_t wants_lowpri_cpu = FALSE; | |
851 | ||
852 | if (wants_darwinbg) | |
853 | wants_lowpri_cpu = TRUE; | |
854 | ||
855 | if (next.tep_tal_engaged) | |
856 | wants_lowpri_cpu = TRUE; | |
857 | ||
858 | if (requested.trp_sup_lowpri_cpu && requested.trp_boosted == 0) | |
859 | wants_lowpri_cpu = TRUE; | |
860 | ||
861 | if (wants_lowpri_cpu) | |
862 | next.tep_lowpri_cpu = 1; | |
863 | ||
864 | /* Calculate IO policy */ | |
865 | ||
866 | /* Update BG IO policy (so we can see if it has changed) */ | |
867 | next.tep_bg_iotier = requested.trp_bg_iotier; | |
868 | ||
869 | int iopol = THROTTLE_LEVEL_TIER0; | |
870 | ||
871 | if (wants_darwinbg) | |
872 | iopol = MAX(iopol, requested.trp_bg_iotier); | |
873 | ||
874 | if (requested.trp_apptype == TASK_APPTYPE_DAEMON_STANDARD) | |
875 | iopol = MAX(iopol, proc_standard_daemon_tier); | |
876 | ||
877 | if (requested.trp_sup_disk && requested.trp_boosted == 0) | |
878 | iopol = MAX(iopol, proc_suppressed_disk_tier); | |
879 | ||
880 | if (next.tep_tal_engaged) | |
881 | iopol = MAX(iopol, proc_tal_disk_tier); | |
882 | ||
883 | if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) | |
884 | iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.tep_qos_clamp]); | |
885 | ||
886 | iopol = MAX(iopol, requested.trp_int_iotier); | |
887 | iopol = MAX(iopol, requested.trp_ext_iotier); | |
888 | ||
889 | next.tep_io_tier = iopol; | |
890 | ||
891 | /* Calculate Passive IO policy */ | |
892 | ||
893 | if (requested.trp_ext_iopassive || requested.trp_int_iopassive) | |
894 | next.tep_io_passive = 1; | |
895 | ||
896 | /* Calculate suppression-active flag */ | |
897 | if (requested.trp_sup_active && requested.trp_boosted == 0) | |
898 | next.tep_sup_active = 1; | |
899 | ||
900 | /* Calculate timer QOS */ | |
901 | int latency_qos = requested.trp_base_latency_qos; | |
902 | ||
903 | if (requested.trp_sup_timer && requested.trp_boosted == 0) | |
904 | latency_qos = requested.trp_sup_timer; | |
905 | ||
906 | if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) | |
907 | latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.tep_qos_clamp]); | |
908 | ||
909 | if (requested.trp_over_latency_qos != 0) | |
910 | latency_qos = requested.trp_over_latency_qos; | |
911 | ||
912 | /* Treat the windowserver special */ | |
913 | if (requested.trp_role == TASK_GRAPHICS_SERVER) | |
914 | latency_qos = proc_graphics_timer_qos; | |
915 | ||
916 | next.tep_latency_qos = latency_qos; | |
917 | ||
918 | /* Calculate throughput QOS */ | |
919 | int through_qos = requested.trp_base_through_qos; | |
920 | ||
921 | if (requested.trp_sup_throughput && requested.trp_boosted == 0) | |
922 | through_qos = requested.trp_sup_throughput; | |
923 | ||
924 | if (next.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) | |
925 | through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.tep_qos_clamp]); | |
926 | ||
927 | if (requested.trp_over_through_qos != 0) | |
928 | through_qos = requested.trp_over_through_qos; | |
929 | ||
930 | next.tep_through_qos = through_qos; | |
931 | ||
932 | /* Calculate suppressed CPU priority */ | |
933 | if (requested.trp_sup_cpu && requested.trp_boosted == 0) | |
934 | next.tep_suppressed_cpu = 1; | |
935 | ||
936 | /* | |
937 | * Calculate background sockets | |
938 | * Don't take into account boosting to limit transition frequency. | |
939 | */ | |
940 | if (requested.trp_sup_bg_sockets){ | |
941 | next.tep_all_sockets_bg = 1; | |
942 | next.tep_new_sockets_bg = 1; | |
943 | } | |
944 | ||
945 | /* Apply SFI Managed class bit */ | |
946 | next.tep_sfi_managed = requested.trp_sfi_managed; | |
947 | ||
948 | /* Calculate 'live donor' status for live importance */ | |
949 | switch (requested.trp_apptype) { | |
950 | case TASK_APPTYPE_APP_TAL: | |
951 | case TASK_APPTYPE_APP_DEFAULT: | |
952 | if (requested.trp_ext_darwinbg == 0) | |
953 | next.tep_live_donor = 1; | |
954 | else | |
955 | next.tep_live_donor = 0; | |
956 | break; | |
957 | ||
958 | case TASK_APPTYPE_DAEMON_INTERACTIVE: | |
959 | case TASK_APPTYPE_DAEMON_STANDARD: | |
960 | case TASK_APPTYPE_DAEMON_ADAPTIVE: | |
961 | case TASK_APPTYPE_DAEMON_BACKGROUND: | |
962 | default: | |
963 | next.tep_live_donor = 0; | |
964 | break; | |
965 | } | |
966 | ||
967 | if (requested.trp_terminated) { | |
968 | /* | |
969 | * Shoot down the throttles that slow down exit or response to SIGTERM | |
970 | * We don't need to shoot down: | |
971 | * passive (don't want to cause others to throttle) | |
972 | * all_sockets_bg (don't need to iterate FDs on every exit) | |
973 | * new_sockets_bg (doesn't matter for exiting process) | |
974 | * pidsuspend (jetsam-ed BG process shouldn't run again) | |
975 | * watchers_bg (watcher threads don't need to be unthrottled) | |
976 | * latency_qos (affects userspace timers only) | |
977 | */ | |
978 | ||
979 | next.tep_terminated = 1; | |
980 | next.tep_darwinbg = 0; | |
981 | next.tep_lowpri_cpu = 0; | |
982 | next.tep_io_tier = THROTTLE_LEVEL_TIER0; | |
983 | next.tep_tal_engaged = 0; | |
984 | next.tep_role = TASK_UNSPECIFIED; | |
985 | next.tep_suppressed_cpu = 0; | |
986 | } | |
987 | ||
988 | /* | |
989 | * Step 3: | |
990 | * Swap out old policy for new policy | |
991 | */ | |
992 | ||
993 | struct task_effective_policy prev = task->effective_policy; | |
994 | ||
995 | /* This is the point where the new values become visible to other threads */ | |
996 | task->effective_policy = next; | |
997 | ||
998 | /* Don't do anything further to a half-formed task */ | |
999 | if (in_create) | |
1000 | return; | |
1001 | ||
1002 | if (task == kernel_task) | |
1003 | panic("Attempting to set task policy on kernel_task"); | |
1004 | ||
1005 | /* | |
1006 | * Step 4: | |
1007 | * Pend updates that can't be done while holding the task lock | |
1008 | */ | |
1009 | ||
1010 | if (prev.tep_all_sockets_bg != next.tep_all_sockets_bg) | |
1011 | pend_token->tpt_update_sockets = 1; | |
1012 | ||
1013 | /* Only re-scan the timer list if the qos level is getting less strong */ | |
1014 | if (prev.tep_latency_qos > next.tep_latency_qos) | |
1015 | pend_token->tpt_update_timers = 1; | |
1016 | ||
1017 | ||
1018 | if (prev.tep_live_donor != next.tep_live_donor) | |
1019 | pend_token->tpt_update_live_donor = 1; | |
1020 | ||
1021 | /* | |
1022 | * Step 5: | |
1023 | * Update other subsystems as necessary if something has changed | |
1024 | */ | |
1025 | ||
1026 | boolean_t update_threads = FALSE, update_sfi = FALSE; | |
1027 | ||
1028 | /* | |
1029 | * Check for the attributes that thread_policy_update_internal_locked() consults, | |
1030 | * and trigger thread policy re-evaluation. | |
1031 | */ | |
1032 | if (prev.tep_io_tier != next.tep_io_tier || | |
1033 | prev.tep_bg_iotier != next.tep_bg_iotier || | |
1034 | prev.tep_io_passive != next.tep_io_passive || | |
1035 | prev.tep_darwinbg != next.tep_darwinbg || | |
1036 | prev.tep_qos_clamp != next.tep_qos_clamp || | |
1037 | prev.tep_qos_ceiling != next.tep_qos_ceiling || | |
1038 | prev.tep_qos_ui_is_urgent != next.tep_qos_ui_is_urgent || | |
1039 | prev.tep_latency_qos != next.tep_latency_qos || | |
1040 | prev.tep_through_qos != next.tep_through_qos || | |
1041 | prev.tep_lowpri_cpu != next.tep_lowpri_cpu || | |
1042 | prev.tep_new_sockets_bg != next.tep_new_sockets_bg || | |
1043 | prev.tep_terminated != next.tep_terminated ) | |
1044 | update_threads = TRUE; | |
1045 | ||
1046 | /* | |
1047 | * Check for the attributes that sfi_thread_classify() consults, | |
1048 | * and trigger SFI re-evaluation. | |
1049 | */ | |
1050 | if (prev.tep_latency_qos != next.tep_latency_qos || | |
1051 | prev.tep_role != next.tep_role || | |
1052 | prev.tep_sfi_managed != next.tep_sfi_managed ) | |
1053 | update_sfi = TRUE; | |
1054 | ||
1055 | #if CONFIG_SCHED_SFI | |
1056 | /* Reflect task role transitions into the coalition role counters */ | |
1057 | if (prev.tep_role != next.tep_role) { | |
1058 | if (task_policy_update_coalition_focal_tasks(task, prev.tep_role, next.tep_role)) { | |
1059 | update_sfi = TRUE; | |
1060 | pend_token->tpt_update_coal_sfi = 1; | |
1061 | } | |
1062 | } | |
1063 | #endif /* !CONFIG_SCHED_SFI */ | |
1064 | ||
1065 | boolean_t update_priority = FALSE; | |
1066 | ||
1067 | int priority = BASEPRI_DEFAULT; | |
1068 | int max_priority = MAXPRI_USER; | |
1069 | ||
1070 | if (next.tep_lowpri_cpu) { | |
1071 | priority = MAXPRI_THROTTLE; | |
1072 | max_priority = MAXPRI_THROTTLE; | |
1073 | } else if (next.tep_suppressed_cpu) { | |
1074 | priority = MAXPRI_SUPPRESSED; | |
1075 | max_priority = MAXPRI_SUPPRESSED; | |
1076 | } else { | |
1077 | switch (next.tep_role) { | |
1078 | case TASK_CONTROL_APPLICATION: | |
1079 | priority = BASEPRI_CONTROL; | |
1080 | break; | |
1081 | case TASK_GRAPHICS_SERVER: | |
1082 | priority = BASEPRI_GRAPHICS; | |
1083 | max_priority = MAXPRI_RESERVED; | |
1084 | break; | |
1085 | default: | |
1086 | break; | |
1087 | } | |
1088 | ||
1089 | /* factor in 'nice' value */ | |
1090 | priority += task->importance; | |
1091 | ||
1092 | if (task->effective_policy.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) { | |
1093 | int qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.tep_qos_clamp]; | |
1094 | ||
1095 | priority = MIN(priority, qos_clamp_priority); | |
1096 | max_priority = MIN(max_priority, qos_clamp_priority); | |
1097 | } | |
1098 | ||
1099 | if (priority > max_priority) | |
1100 | priority = max_priority; | |
1101 | else if (priority < MINPRI) | |
1102 | priority = MINPRI; | |
1103 | } | |
1104 | ||
1105 | assert(priority <= max_priority); | |
1106 | ||
1107 | /* avoid extra work if priority isn't changing */ | |
1108 | if (priority != task->priority || | |
1109 | max_priority != task->max_priority ) { | |
1110 | /* update the scheduling priority for the task */ | |
1111 | task->max_priority = max_priority; | |
1112 | task->priority = priority; | |
1113 | update_priority = TRUE; | |
1114 | } | |
1115 | ||
1116 | /* Loop over the threads in the task: | |
1117 | * only once | |
1118 | * only if necessary | |
1119 | * with one thread mutex hold per thread | |
1120 | */ | |
1121 | if (update_threads || update_priority || update_sfi) { | |
1122 | thread_t thread; | |
1123 | ||
1124 | queue_iterate(&task->threads, thread, thread_t, task_threads) { | |
1125 | struct task_pend_token thread_pend_token = {}; | |
1126 | ||
1127 | if (update_sfi) | |
1128 | thread_pend_token.tpt_update_thread_sfi = 1; | |
1129 | ||
1130 | if (update_priority || update_threads) | |
1131 | thread_policy_update_tasklocked(thread, | |
1132 | task->priority, task->max_priority, | |
1133 | &thread_pend_token); | |
1134 | ||
1135 | assert(!thread_pend_token.tpt_update_sockets); | |
1136 | ||
1137 | // Slightly risky, as we still hold the task lock... | |
1138 | thread_policy_update_complete_unlocked(thread, &thread_pend_token); | |
1139 | } | |
1140 | } | |
1141 | } | |
1142 | ||
1143 | ||
1144 | #if CONFIG_SCHED_SFI | |
1145 | /* | |
1146 | * Yet another layering violation. We reach out and bang on the coalition directly. | |
1147 | */ | |
1148 | static boolean_t | |
1149 | task_policy_update_coalition_focal_tasks(task_t task, | |
1150 | int prev_role, | |
1151 | int next_role) | |
1152 | { | |
1153 | boolean_t sfi_transition = FALSE; | |
1154 | ||
1155 | /* task moving into/out-of the foreground */ | |
1156 | if (prev_role != TASK_FOREGROUND_APPLICATION && next_role == TASK_FOREGROUND_APPLICATION) { | |
1157 | if (task_coalition_adjust_focal_count(task, 1) == 1) | |
1158 | sfi_transition = TRUE; | |
1159 | } else if (prev_role == TASK_FOREGROUND_APPLICATION && next_role != TASK_FOREGROUND_APPLICATION) { | |
1160 | if (task_coalition_adjust_focal_count(task, -1) == 0) | |
1161 | sfi_transition = TRUE; | |
1162 | } | |
1163 | ||
1164 | /* task moving into/out-of background */ | |
1165 | if (prev_role != TASK_BACKGROUND_APPLICATION && next_role == TASK_BACKGROUND_APPLICATION) { | |
1166 | if (task_coalition_adjust_nonfocal_count(task, 1) == 1) | |
1167 | sfi_transition = TRUE; | |
1168 | } else if (prev_role == TASK_BACKGROUND_APPLICATION && next_role != TASK_BACKGROUND_APPLICATION) { | |
1169 | if (task_coalition_adjust_nonfocal_count(task, -1) == 0) | |
1170 | sfi_transition = TRUE; | |
1171 | } | |
1172 | ||
1173 | return sfi_transition; | |
1174 | } | |
1175 | ||
1176 | /* coalition object is locked */ | |
1177 | static void | |
1178 | task_sfi_reevaluate_cb(coalition_t coal, void *ctx, task_t task) | |
1179 | { | |
1180 | thread_t thread; | |
1181 | ||
1182 | /* unused for now */ | |
1183 | (void)coal; | |
1184 | ||
1185 | /* skip the task we're re-evaluating on behalf of: it's already updated */ | |
1186 | if (task == (task_t)ctx) | |
1187 | return; | |
1188 | ||
1189 | task_lock(task); | |
1190 | ||
1191 | queue_iterate(&task->threads, thread, thread_t, task_threads) { | |
1192 | sfi_reevaluate(thread); | |
1193 | } | |
1194 | ||
1195 | task_unlock(task); | |
1196 | } | |
1197 | #endif /* CONFIG_SCHED_SFI */ | |
1198 | ||
1199 | /* | |
1200 | * Called with task unlocked to do things that can't be done while holding the task lock | |
1201 | */ | |
1202 | void | |
1203 | task_policy_update_complete_unlocked(task_t task, task_pend_token_t pend_token) | |
1204 | { | |
1205 | #ifdef MACH_BSD | |
1206 | if (pend_token->tpt_update_sockets) | |
1207 | proc_apply_task_networkbg(task->bsd_info, THREAD_NULL); | |
1208 | #endif /* MACH_BSD */ | |
1209 | ||
1210 | /* The timer throttle has been removed or reduced, we need to look for expired timers and fire them */ | |
1211 | if (pend_token->tpt_update_timers) | |
1212 | ml_timer_evaluate(); | |
1213 | ||
1214 | ||
1215 | if (pend_token->tpt_update_live_donor) | |
1216 | task_importance_update_live_donor(task); | |
1217 | ||
1218 | #if CONFIG_SCHED_SFI | |
1219 | /* use the resource coalition for SFI re-evaluation */ | |
1220 | if (pend_token->tpt_update_coal_sfi) | |
1221 | coalition_for_each_task(task->coalition[COALITION_TYPE_RESOURCE], | |
1222 | (void *)task, task_sfi_reevaluate_cb); | |
1223 | #endif /* CONFIG_SCHED_SFI */ | |
1224 | } | |
1225 | ||
1226 | /* | |
1227 | * Initiate a task policy state transition | |
1228 | * | |
1229 | * Everything that modifies requested except functions that need to hold the task lock | |
1230 | * should use this function | |
1231 | * | |
1232 | * Argument validation should be performed before reaching this point. | |
1233 | * | |
1234 | * TODO: Do we need to check task->active? | |
1235 | */ | |
1236 | void | |
1237 | proc_set_task_policy(task_t task, | |
1238 | int category, | |
1239 | int flavor, | |
1240 | int value) | |
1241 | { | |
1242 | struct task_pend_token pend_token = {}; | |
1243 | ||
1244 | task_lock(task); | |
1245 | ||
1246 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1247 | (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_START, | |
1248 | task_pid(task), trequested_0(task), | |
1249 | trequested_1(task), value, 0); | |
1250 | ||
1251 | proc_set_task_policy_locked(task, category, flavor, value, 0); | |
1252 | ||
1253 | task_policy_update_locked(task, &pend_token); | |
1254 | ||
1255 | ||
1256 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1257 | (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_END, | |
1258 | task_pid(task), trequested_0(task), | |
1259 | trequested_1(task), tpending(&pend_token), 0); | |
1260 | ||
1261 | task_unlock(task); | |
1262 | ||
1263 | task_policy_update_complete_unlocked(task, &pend_token); | |
1264 | } | |
1265 | ||
1266 | /* | |
1267 | * Variant of proc_set_task_policy() that sets two scalars in the requested policy structure. | |
1268 | * Same locking rules apply. | |
1269 | */ | |
1270 | void | |
1271 | proc_set_task_policy2(task_t task, | |
1272 | int category, | |
1273 | int flavor, | |
1274 | int value, | |
1275 | int value2) | |
1276 | { | |
1277 | struct task_pend_token pend_token = {}; | |
1278 | ||
1279 | task_lock(task); | |
1280 | ||
1281 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1282 | (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_START, | |
1283 | task_pid(task), trequested_0(task), | |
1284 | trequested_1(task), value, 0); | |
1285 | ||
1286 | proc_set_task_policy_locked(task, category, flavor, value, value2); | |
1287 | ||
1288 | task_policy_update_locked(task, &pend_token); | |
1289 | ||
1290 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1291 | (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_TASK))) | DBG_FUNC_END, | |
1292 | task_pid(task), trequested_0(task), | |
1293 | trequested_1(task), tpending(&pend_token), 0); | |
1294 | ||
1295 | task_unlock(task); | |
1296 | ||
1297 | task_policy_update_complete_unlocked(task, &pend_token); | |
1298 | } | |
1299 | ||
1300 | /* | |
1301 | * Set the requested state for a specific flavor to a specific value. | |
1302 | * | |
1303 | * TODO: | |
1304 | * Verify that arguments to non iopol things are 1 or 0 | |
1305 | */ | |
1306 | static void | |
1307 | proc_set_task_policy_locked(task_t task, | |
1308 | int category, | |
1309 | int flavor, | |
1310 | int value, | |
1311 | int value2) | |
1312 | { | |
1313 | int tier, passive; | |
1314 | ||
1315 | struct task_requested_policy requested = task->requested_policy; | |
1316 | ||
1317 | switch (flavor) { | |
1318 | ||
1319 | /* Category: EXTERNAL and INTERNAL */ | |
1320 | ||
1321 | case TASK_POLICY_DARWIN_BG: | |
1322 | if (category == TASK_POLICY_EXTERNAL) | |
1323 | requested.trp_ext_darwinbg = value; | |
1324 | else | |
1325 | requested.trp_int_darwinbg = value; | |
1326 | break; | |
1327 | ||
1328 | case TASK_POLICY_IOPOL: | |
1329 | proc_iopol_to_tier(value, &tier, &passive); | |
1330 | if (category == TASK_POLICY_EXTERNAL) { | |
1331 | requested.trp_ext_iotier = tier; | |
1332 | requested.trp_ext_iopassive = passive; | |
1333 | } else { | |
1334 | requested.trp_int_iotier = tier; | |
1335 | requested.trp_int_iopassive = passive; | |
1336 | } | |
1337 | break; | |
1338 | ||
1339 | case TASK_POLICY_IO: | |
1340 | if (category == TASK_POLICY_EXTERNAL) | |
1341 | requested.trp_ext_iotier = value; | |
1342 | else | |
1343 | requested.trp_int_iotier = value; | |
1344 | break; | |
1345 | ||
1346 | case TASK_POLICY_PASSIVE_IO: | |
1347 | if (category == TASK_POLICY_EXTERNAL) | |
1348 | requested.trp_ext_iopassive = value; | |
1349 | else | |
1350 | requested.trp_int_iopassive = value; | |
1351 | break; | |
1352 | ||
1353 | /* Category: INTERNAL */ | |
1354 | ||
1355 | case TASK_POLICY_DARWIN_BG_IOPOL: | |
1356 | assert(category == TASK_POLICY_INTERNAL); | |
1357 | proc_iopol_to_tier(value, &tier, &passive); | |
1358 | requested.trp_bg_iotier = tier; | |
1359 | break; | |
1360 | ||
1361 | /* Category: ATTRIBUTE */ | |
1362 | ||
1363 | case TASK_POLICY_TAL: | |
1364 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1365 | requested.trp_tal_enabled = value; | |
1366 | break; | |
1367 | ||
1368 | case TASK_POLICY_BOOST: | |
1369 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1370 | requested.trp_boosted = value; | |
1371 | break; | |
1372 | ||
1373 | case TASK_POLICY_ROLE: | |
1374 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1375 | requested.trp_role = value; | |
1376 | break; | |
1377 | ||
1378 | case TASK_POLICY_TERMINATED: | |
1379 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1380 | requested.trp_terminated = value; | |
1381 | break; | |
1382 | ||
1383 | case TASK_BASE_LATENCY_QOS_POLICY: | |
1384 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1385 | requested.trp_base_latency_qos = value; | |
1386 | break; | |
1387 | ||
1388 | case TASK_BASE_THROUGHPUT_QOS_POLICY: | |
1389 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1390 | requested.trp_base_through_qos = value; | |
1391 | break; | |
1392 | ||
1393 | case TASK_POLICY_SFI_MANAGED: | |
1394 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1395 | requested.trp_sfi_managed = value; | |
1396 | break; | |
1397 | ||
1398 | case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS: | |
1399 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1400 | requested.trp_base_latency_qos = value; | |
1401 | requested.trp_base_through_qos = value2; | |
1402 | break; | |
1403 | ||
1404 | case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS: | |
1405 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1406 | requested.trp_over_latency_qos = value; | |
1407 | requested.trp_over_through_qos = value2; | |
1408 | break; | |
1409 | ||
1410 | default: | |
1411 | panic("unknown task policy: %d %d %d %d", category, flavor, value, value2); | |
1412 | break; | |
1413 | } | |
1414 | ||
1415 | task->requested_policy = requested; | |
1416 | } | |
1417 | ||
1418 | /* | |
1419 | * Gets what you set. Effective values may be different. | |
1420 | */ | |
1421 | int | |
1422 | proc_get_task_policy(task_t task, | |
1423 | int category, | |
1424 | int flavor) | |
1425 | { | |
1426 | int value = 0; | |
1427 | ||
1428 | task_lock(task); | |
1429 | ||
1430 | struct task_requested_policy requested = task->requested_policy; | |
1431 | ||
1432 | switch (flavor) { | |
1433 | case TASK_POLICY_DARWIN_BG: | |
1434 | if (category == TASK_POLICY_EXTERNAL) | |
1435 | value = requested.trp_ext_darwinbg; | |
1436 | else | |
1437 | value = requested.trp_int_darwinbg; | |
1438 | break; | |
1439 | case TASK_POLICY_IOPOL: | |
1440 | if (category == TASK_POLICY_EXTERNAL) | |
1441 | value = proc_tier_to_iopol(requested.trp_ext_iotier, | |
1442 | requested.trp_ext_iopassive); | |
1443 | else | |
1444 | value = proc_tier_to_iopol(requested.trp_int_iotier, | |
1445 | requested.trp_int_iopassive); | |
1446 | break; | |
1447 | case TASK_POLICY_IO: | |
1448 | if (category == TASK_POLICY_EXTERNAL) | |
1449 | value = requested.trp_ext_iotier; | |
1450 | else | |
1451 | value = requested.trp_int_iotier; | |
1452 | break; | |
1453 | case TASK_POLICY_PASSIVE_IO: | |
1454 | if (category == TASK_POLICY_EXTERNAL) | |
1455 | value = requested.trp_ext_iopassive; | |
1456 | else | |
1457 | value = requested.trp_int_iopassive; | |
1458 | break; | |
1459 | case TASK_POLICY_DARWIN_BG_IOPOL: | |
1460 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1461 | value = proc_tier_to_iopol(requested.trp_bg_iotier, 0); | |
1462 | break; | |
1463 | case TASK_POLICY_ROLE: | |
1464 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1465 | value = requested.trp_role; | |
1466 | break; | |
1467 | case TASK_POLICY_SFI_MANAGED: | |
1468 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1469 | value = requested.trp_sfi_managed; | |
1470 | break; | |
1471 | default: | |
1472 | panic("unknown policy_flavor %d", flavor); | |
1473 | break; | |
1474 | } | |
1475 | ||
1476 | task_unlock(task); | |
1477 | ||
1478 | return value; | |
1479 | } | |
1480 | ||
1481 | /* | |
1482 | * Variant of proc_get_task_policy() that returns two scalar outputs. | |
1483 | */ | |
1484 | void | |
1485 | proc_get_task_policy2(task_t task, | |
1486 | __assert_only int category, | |
1487 | int flavor, | |
1488 | int *value1, | |
1489 | int *value2) | |
1490 | { | |
1491 | task_lock(task); | |
1492 | ||
1493 | struct task_requested_policy requested = task->requested_policy; | |
1494 | ||
1495 | switch (flavor) { | |
1496 | case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS: | |
1497 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1498 | *value1 = requested.trp_base_latency_qos; | |
1499 | *value2 = requested.trp_base_through_qos; | |
1500 | break; | |
1501 | ||
1502 | case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS: | |
1503 | assert(category == TASK_POLICY_ATTRIBUTE); | |
1504 | *value1 = requested.trp_over_latency_qos; | |
1505 | *value2 = requested.trp_over_through_qos; | |
1506 | break; | |
1507 | ||
1508 | default: | |
1509 | panic("unknown policy_flavor %d", flavor); | |
1510 | break; | |
1511 | } | |
1512 | ||
1513 | task_unlock(task); | |
1514 | } | |
1515 | ||
1516 | /* | |
1517 | * Function for querying effective state for relevant subsystems | |
1518 | * Gets what is actually in effect, for subsystems which pull policy instead of receive updates. | |
1519 | * | |
1520 | * ONLY the relevant subsystem should query this. | |
1521 | * NEVER take a value from the 'effective' function and stuff it into a setter. | |
1522 | * | |
1523 | * NOTE: This accessor does not take the task lock. | |
1524 | * Notifications of state updates need to be externally synchronized with state queries. | |
1525 | * This routine *MUST* remain interrupt safe, as it is potentially invoked | |
1526 | * within the context of a timer interrupt. It is also called in KDP context for stackshot. | |
1527 | */ | |
1528 | int | |
1529 | proc_get_effective_task_policy(task_t task, | |
1530 | int flavor) | |
1531 | { | |
1532 | int value = 0; | |
1533 | ||
1534 | switch (flavor) { | |
1535 | case TASK_POLICY_DARWIN_BG: | |
1536 | /* | |
1537 | * This backs the KPI call proc_pidbackgrounded to find | |
1538 | * out if a pid is backgrounded. | |
1539 | * It is used to communicate state to the VM system, as well as | |
1540 | * prioritizing requests to the graphics system. | |
1541 | * Returns 1 for background mode, 0 for normal mode | |
1542 | */ | |
1543 | value = task->effective_policy.tep_darwinbg; | |
1544 | break; | |
1545 | case TASK_POLICY_ALL_SOCKETS_BG: | |
1546 | /* | |
1547 | * do_background_socket() calls this to determine what it should do to the proc's sockets | |
1548 | * Returns 1 for background mode, 0 for normal mode | |
1549 | * | |
1550 | * This consults both thread and task so un-DBGing a thread while the task is BG | |
1551 | * doesn't get you out of the network throttle. | |
1552 | */ | |
1553 | value = task->effective_policy.tep_all_sockets_bg; | |
1554 | break; | |
1555 | case TASK_POLICY_LATENCY_QOS: | |
1556 | /* | |
1557 | * timer arming calls into here to find out the timer coalescing level | |
1558 | * Returns a QoS tier (0-6) | |
1559 | */ | |
1560 | value = task->effective_policy.tep_latency_qos; | |
1561 | break; | |
1562 | case TASK_POLICY_THROUGH_QOS: | |
1563 | /* | |
1564 | * This value is passed into the urgency callout from the scheduler | |
1565 | * to the performance management subsystem. | |
1566 | * Returns a QoS tier (0-6) | |
1567 | */ | |
1568 | value = task->effective_policy.tep_through_qos; | |
1569 | break; | |
1570 | case TASK_POLICY_ROLE: | |
1571 | /* | |
1572 | * This controls various things that ask whether a process is foreground, | |
1573 | * like SFI, VM, access to GPU, etc | |
1574 | */ | |
1575 | value = task->effective_policy.tep_role; | |
1576 | break; | |
1577 | case TASK_POLICY_WATCHERS_BG: | |
1578 | /* | |
1579 | * This controls whether or not a thread watching this process should be BG. | |
1580 | */ | |
1581 | value = task->effective_policy.tep_watchers_bg; | |
1582 | break; | |
1583 | case TASK_POLICY_SFI_MANAGED: | |
1584 | /* | |
1585 | * This controls whether or not a process is targeted for specific control by thermald. | |
1586 | */ | |
1587 | value = task->effective_policy.tep_sfi_managed; | |
1588 | break; | |
1589 | default: | |
1590 | panic("unknown policy_flavor %d", flavor); | |
1591 | break; | |
1592 | } | |
1593 | ||
1594 | return value; | |
1595 | } | |
1596 | ||
1597 | /* | |
1598 | * Convert from IOPOL_* values to throttle tiers. | |
1599 | * | |
1600 | * TODO: Can this be made more compact, like an array lookup | |
1601 | * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future | |
1602 | */ | |
1603 | ||
1604 | void | |
1605 | proc_iopol_to_tier(int iopolicy, int *tier, int *passive) | |
1606 | { | |
1607 | *passive = 0; | |
1608 | *tier = 0; | |
1609 | switch (iopolicy) { | |
1610 | case IOPOL_IMPORTANT: | |
1611 | *tier = THROTTLE_LEVEL_TIER0; | |
1612 | break; | |
1613 | case IOPOL_PASSIVE: | |
1614 | *tier = THROTTLE_LEVEL_TIER0; | |
1615 | *passive = 1; | |
1616 | break; | |
1617 | case IOPOL_STANDARD: | |
1618 | *tier = THROTTLE_LEVEL_TIER1; | |
1619 | break; | |
1620 | case IOPOL_UTILITY: | |
1621 | *tier = THROTTLE_LEVEL_TIER2; | |
1622 | break; | |
1623 | case IOPOL_THROTTLE: | |
1624 | *tier = THROTTLE_LEVEL_TIER3; | |
1625 | break; | |
1626 | default: | |
1627 | panic("unknown I/O policy %d", iopolicy); | |
1628 | break; | |
1629 | } | |
1630 | } | |
1631 | ||
1632 | int | |
1633 | proc_tier_to_iopol(int tier, int passive) | |
1634 | { | |
1635 | if (passive == 1) { | |
1636 | switch (tier) { | |
1637 | case THROTTLE_LEVEL_TIER0: | |
1638 | return IOPOL_PASSIVE; | |
1639 | default: | |
1640 | panic("unknown passive tier %d", tier); | |
1641 | return IOPOL_DEFAULT; | |
1642 | } | |
1643 | } else { | |
1644 | switch (tier) { | |
1645 | case THROTTLE_LEVEL_NONE: | |
1646 | case THROTTLE_LEVEL_TIER0: | |
1647 | return IOPOL_DEFAULT; | |
1648 | case THROTTLE_LEVEL_TIER1: | |
1649 | return IOPOL_STANDARD; | |
1650 | case THROTTLE_LEVEL_TIER2: | |
1651 | return IOPOL_UTILITY; | |
1652 | case THROTTLE_LEVEL_TIER3: | |
1653 | return IOPOL_THROTTLE; | |
1654 | default: | |
1655 | panic("unknown tier %d", tier); | |
1656 | return IOPOL_DEFAULT; | |
1657 | } | |
1658 | } | |
1659 | } | |
1660 | ||
1661 | int | |
1662 | proc_darwin_role_to_task_role(int darwin_role, int* task_role) | |
1663 | { | |
1664 | integer_t role = TASK_UNSPECIFIED; | |
1665 | ||
1666 | switch (darwin_role) { | |
1667 | case PRIO_DARWIN_ROLE_DEFAULT: | |
1668 | role = TASK_UNSPECIFIED; | |
1669 | break; | |
1670 | case PRIO_DARWIN_ROLE_UI_FOCAL: | |
1671 | role = TASK_FOREGROUND_APPLICATION; | |
1672 | break; | |
1673 | case PRIO_DARWIN_ROLE_UI: | |
1674 | role = TASK_DEFAULT_APPLICATION; | |
1675 | break; | |
1676 | case PRIO_DARWIN_ROLE_NON_UI: | |
1677 | role = TASK_NONUI_APPLICATION; | |
1678 | break; | |
1679 | case PRIO_DARWIN_ROLE_UI_NON_FOCAL: | |
1680 | role = TASK_BACKGROUND_APPLICATION; | |
1681 | break; | |
1682 | case PRIO_DARWIN_ROLE_TAL_LAUNCH: | |
1683 | role = TASK_THROTTLE_APPLICATION; | |
1684 | break; | |
1685 | default: | |
1686 | return EINVAL; | |
1687 | } | |
1688 | ||
1689 | *task_role = role; | |
1690 | ||
1691 | return 0; | |
1692 | } | |
1693 | ||
1694 | int | |
1695 | proc_task_role_to_darwin_role(int task_role) | |
1696 | { | |
1697 | switch (task_role) { | |
1698 | case TASK_FOREGROUND_APPLICATION: | |
1699 | return PRIO_DARWIN_ROLE_UI_FOCAL; | |
1700 | case TASK_BACKGROUND_APPLICATION: | |
1701 | return PRIO_DARWIN_ROLE_UI_NON_FOCAL; | |
1702 | case TASK_NONUI_APPLICATION: | |
1703 | return PRIO_DARWIN_ROLE_NON_UI; | |
1704 | case TASK_DEFAULT_APPLICATION: | |
1705 | return PRIO_DARWIN_ROLE_UI; | |
1706 | case TASK_THROTTLE_APPLICATION: | |
1707 | return PRIO_DARWIN_ROLE_TAL_LAUNCH; | |
1708 | case TASK_UNSPECIFIED: | |
1709 | default: | |
1710 | return PRIO_DARWIN_ROLE_DEFAULT; | |
1711 | } | |
1712 | } | |
1713 | ||
1714 | ||
1715 | /* TODO: remove this variable when interactive daemon audit period is over */ | |
1716 | extern boolean_t ipc_importance_interactive_receiver; | |
1717 | ||
1718 | /* | |
1719 | * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process | |
1720 | * | |
1721 | * TODO: Make this function more table-driven instead of ad-hoc | |
1722 | */ | |
1723 | void | |
1724 | proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, int role, | |
1725 | ipc_port_t * portwatch_ports, int portwatch_count) | |
1726 | { | |
1727 | struct task_pend_token pend_token = {}; | |
1728 | ||
1729 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1730 | (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START, | |
1731 | task_pid(task), trequested_0(task), trequested_1(task), | |
1732 | apptype, 0); | |
1733 | ||
1734 | switch (apptype) { | |
1735 | case TASK_APPTYPE_APP_TAL: | |
1736 | case TASK_APPTYPE_APP_DEFAULT: | |
1737 | /* Apps become donors via the 'live-donor' flag instead of the static donor flag */ | |
1738 | task_importance_mark_donor(task, FALSE); | |
1739 | task_importance_mark_live_donor(task, TRUE); | |
1740 | task_importance_mark_receiver(task, FALSE); | |
1741 | /* Apps are de-nap recievers on desktop for suppression behaviors */ | |
1742 | task_importance_mark_denap_receiver(task, TRUE); | |
1743 | break; | |
1744 | ||
1745 | case TASK_APPTYPE_DAEMON_INTERACTIVE: | |
1746 | task_importance_mark_donor(task, TRUE); | |
1747 | task_importance_mark_live_donor(task, FALSE); | |
1748 | ||
1749 | /* | |
1750 | * A boot arg controls whether interactive daemons are importance receivers. | |
1751 | * Normally, they are not. But for testing their behavior as an adaptive | |
1752 | * daemon, the boot-arg can be set. | |
1753 | * | |
1754 | * TODO: remove this when the interactive daemon audit period is over. | |
1755 | */ | |
1756 | task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver); | |
1757 | task_importance_mark_denap_receiver(task, FALSE); | |
1758 | break; | |
1759 | ||
1760 | case TASK_APPTYPE_DAEMON_STANDARD: | |
1761 | task_importance_mark_donor(task, TRUE); | |
1762 | task_importance_mark_live_donor(task, FALSE); | |
1763 | task_importance_mark_receiver(task, FALSE); | |
1764 | task_importance_mark_denap_receiver(task, FALSE); | |
1765 | break; | |
1766 | ||
1767 | case TASK_APPTYPE_DAEMON_ADAPTIVE: | |
1768 | task_importance_mark_donor(task, FALSE); | |
1769 | task_importance_mark_live_donor(task, FALSE); | |
1770 | task_importance_mark_receiver(task, TRUE); | |
1771 | task_importance_mark_denap_receiver(task, FALSE); | |
1772 | break; | |
1773 | ||
1774 | case TASK_APPTYPE_DAEMON_BACKGROUND: | |
1775 | task_importance_mark_donor(task, FALSE); | |
1776 | task_importance_mark_live_donor(task, FALSE); | |
1777 | task_importance_mark_receiver(task, FALSE); | |
1778 | task_importance_mark_denap_receiver(task, FALSE); | |
1779 | break; | |
1780 | ||
1781 | case TASK_APPTYPE_NONE: | |
1782 | break; | |
1783 | } | |
1784 | ||
1785 | if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) { | |
1786 | int portwatch_boosts = 0; | |
1787 | ||
1788 | for (int i = 0; i < portwatch_count; i++) { | |
1789 | ipc_port_t port = NULL; | |
1790 | ||
1791 | if ((port = portwatch_ports[i]) != NULL) { | |
1792 | int boost = 0; | |
1793 | task_add_importance_watchport(task, port, &boost); | |
1794 | portwatch_boosts += boost; | |
1795 | } | |
1796 | } | |
1797 | ||
1798 | if (portwatch_boosts > 0) { | |
1799 | task_importance_hold_internal_assertion(task, portwatch_boosts); | |
1800 | } | |
1801 | } | |
1802 | ||
1803 | task_lock(task); | |
1804 | ||
1805 | if (apptype == TASK_APPTYPE_APP_TAL) { | |
1806 | /* TAL starts off enabled by default */ | |
1807 | task->requested_policy.trp_tal_enabled = 1; | |
1808 | } | |
1809 | ||
1810 | if (apptype != TASK_APPTYPE_NONE) { | |
1811 | task->requested_policy.trp_apptype = apptype; | |
1812 | } | |
1813 | ||
1814 | if (role != TASK_UNSPECIFIED) { | |
1815 | task->requested_policy.trp_role = role; | |
1816 | } | |
1817 | ||
1818 | if (qos_clamp != THREAD_QOS_UNSPECIFIED) { | |
1819 | task->requested_policy.trp_qos_clamp = qos_clamp; | |
1820 | } | |
1821 | ||
1822 | task_policy_update_locked(task, &pend_token); | |
1823 | ||
1824 | task_unlock(task); | |
1825 | ||
1826 | /* Ensure the donor bit is updated to be in sync with the new live donor status */ | |
1827 | pend_token.tpt_update_live_donor = 1; | |
1828 | ||
1829 | task_policy_update_complete_unlocked(task, &pend_token); | |
1830 | ||
1831 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1832 | (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END, | |
1833 | task_pid(task), trequested_0(task), trequested_1(task), | |
1834 | task_is_importance_receiver(task), 0); | |
1835 | } | |
1836 | ||
1837 | extern task_t bsd_init_task; | |
1838 | ||
1839 | /* | |
1840 | * Compute the default main thread qos for a task | |
1841 | */ | |
1842 | int | |
1843 | task_compute_main_thread_qos(task_t task) | |
1844 | { | |
1845 | int primordial_qos = THREAD_QOS_UNSPECIFIED; | |
1846 | ||
1847 | int qos_clamp = task->requested_policy.trp_qos_clamp; | |
1848 | ||
1849 | switch (task->requested_policy.trp_apptype) { | |
1850 | case TASK_APPTYPE_APP_TAL: | |
1851 | case TASK_APPTYPE_APP_DEFAULT: | |
1852 | primordial_qos = THREAD_QOS_USER_INTERACTIVE; | |
1853 | break; | |
1854 | ||
1855 | case TASK_APPTYPE_DAEMON_INTERACTIVE: | |
1856 | case TASK_APPTYPE_DAEMON_STANDARD: | |
1857 | case TASK_APPTYPE_DAEMON_ADAPTIVE: | |
1858 | primordial_qos = THREAD_QOS_LEGACY; | |
1859 | break; | |
1860 | ||
1861 | case TASK_APPTYPE_DAEMON_BACKGROUND: | |
1862 | primordial_qos = THREAD_QOS_BACKGROUND; | |
1863 | break; | |
1864 | } | |
1865 | ||
1866 | if (task == bsd_init_task) { | |
1867 | /* PID 1 gets a special case */ | |
1868 | primordial_qos = MAX(primordial_qos, THREAD_QOS_USER_INITIATED); | |
1869 | } | |
1870 | ||
1871 | if (qos_clamp != THREAD_QOS_UNSPECIFIED) { | |
1872 | if (primordial_qos != THREAD_QOS_UNSPECIFIED) { | |
1873 | primordial_qos = MIN(qos_clamp, primordial_qos); | |
1874 | } else { | |
1875 | primordial_qos = qos_clamp; | |
1876 | } | |
1877 | } | |
1878 | ||
1879 | return primordial_qos; | |
1880 | } | |
1881 | ||
1882 | ||
1883 | /* for process_policy to check before attempting to set */ | |
1884 | boolean_t | |
1885 | proc_task_is_tal(task_t task) | |
1886 | { | |
1887 | return (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE; | |
1888 | } | |
1889 | ||
1890 | int | |
1891 | task_get_apptype(task_t task) | |
1892 | { | |
1893 | return task->requested_policy.trp_apptype; | |
1894 | } | |
1895 | ||
1896 | boolean_t | |
1897 | task_is_daemon(task_t task) | |
1898 | { | |
1899 | switch (task->requested_policy.trp_apptype) { | |
1900 | case TASK_APPTYPE_DAEMON_INTERACTIVE: | |
1901 | case TASK_APPTYPE_DAEMON_STANDARD: | |
1902 | case TASK_APPTYPE_DAEMON_ADAPTIVE: | |
1903 | case TASK_APPTYPE_DAEMON_BACKGROUND: | |
1904 | return TRUE; | |
1905 | default: | |
1906 | return FALSE; | |
1907 | } | |
1908 | } | |
1909 | ||
1910 | boolean_t | |
1911 | task_is_app(task_t task) | |
1912 | { | |
1913 | switch (task->requested_policy.trp_apptype) { | |
1914 | case TASK_APPTYPE_APP_DEFAULT: | |
1915 | case TASK_APPTYPE_APP_TAL: | |
1916 | return TRUE; | |
1917 | default: | |
1918 | return FALSE; | |
1919 | } | |
1920 | } | |
1921 | ||
1922 | /* for telemetry */ | |
1923 | integer_t | |
1924 | task_grab_latency_qos(task_t task) | |
1925 | { | |
1926 | return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS)); | |
1927 | } | |
1928 | ||
1929 | /* update the darwin background action state in the flags field for libproc */ | |
1930 | int | |
1931 | proc_get_darwinbgstate(task_t task, uint32_t * flagsp) | |
1932 | { | |
1933 | if (task->requested_policy.trp_ext_darwinbg) | |
1934 | *flagsp |= PROC_FLAG_EXT_DARWINBG; | |
1935 | ||
1936 | if (task->requested_policy.trp_int_darwinbg) | |
1937 | *flagsp |= PROC_FLAG_DARWINBG; | |
1938 | ||
1939 | ||
1940 | if (task->requested_policy.trp_apptype == TASK_APPTYPE_APP_DEFAULT || | |
1941 | task->requested_policy.trp_apptype == TASK_APPTYPE_APP_TAL) | |
1942 | *flagsp |= PROC_FLAG_APPLICATION; | |
1943 | ||
1944 | if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) | |
1945 | *flagsp |= PROC_FLAG_ADAPTIVE; | |
1946 | ||
1947 | if (task->requested_policy.trp_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && | |
1948 | task->requested_policy.trp_boosted == 1) | |
1949 | *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT; | |
1950 | ||
1951 | if (task_is_importance_donor(task)) | |
1952 | *flagsp |= PROC_FLAG_IMPORTANCE_DONOR; | |
1953 | ||
1954 | if (task->effective_policy.tep_sup_active) | |
1955 | *flagsp |= PROC_FLAG_SUPPRESSED; | |
1956 | ||
1957 | return(0); | |
1958 | } | |
1959 | ||
1960 | /* | |
1961 | * Tracepoint data... Reading the tracepoint data can be somewhat complicated. | |
1962 | * The current scheme packs as much data into a single tracepoint as it can. | |
1963 | * | |
1964 | * Each task/thread requested/effective structure is 64 bits in size. Any | |
1965 | * given tracepoint will emit either requested or effective data, but not both. | |
1966 | * | |
1967 | * A tracepoint may emit any of task, thread, or task & thread data. | |
1968 | * | |
1969 | * The type of data emitted varies with pointer size. Where possible, both | |
1970 | * task and thread data are emitted. In LP32 systems, the first and second | |
1971 | * halves of either the task or thread data is emitted. | |
1972 | * | |
1973 | * The code uses uintptr_t array indexes instead of high/low to avoid | |
1974 | * confusion WRT big vs little endian. | |
1975 | * | |
1976 | * The truth table for the tracepoint data functions is below, and has the | |
1977 | * following invariants: | |
1978 | * | |
1979 | * 1) task and thread are uintptr_t* | |
1980 | * 2) task may never be NULL | |
1981 | * | |
1982 | * | |
1983 | * LP32 LP64 | |
1984 | * trequested_0(task, NULL) task[0] task[0] | |
1985 | * trequested_1(task, NULL) task[1] NULL | |
1986 | * trequested_0(task, thread) thread[0] task[0] | |
1987 | * trequested_1(task, thread) thread[1] thread[0] | |
1988 | * | |
1989 | * Basically, you get a full task or thread on LP32, and both on LP64. | |
1990 | * | |
1991 | * The uintptr_t munging here is squicky enough to deserve a comment. | |
1992 | * | |
1993 | * The variables we are accessing are laid out in memory like this: | |
1994 | * | |
1995 | * [ LP64 uintptr_t 0 ] | |
1996 | * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ] | |
1997 | * | |
1998 | * 1 2 3 4 5 6 7 8 | |
1999 | * | |
2000 | */ | |
2001 | ||
2002 | static uintptr_t | |
2003 | trequested_0(task_t task) | |
2004 | { | |
2005 | static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated"); | |
2006 | ||
2007 | uintptr_t* raw = (uintptr_t*)&task->requested_policy; | |
2008 | ||
2009 | return raw[0]; | |
2010 | } | |
2011 | ||
2012 | static uintptr_t | |
2013 | trequested_1(task_t task) | |
2014 | { | |
2015 | #if defined __LP64__ | |
2016 | (void)task; | |
2017 | return 0; | |
2018 | #else | |
2019 | uintptr_t* raw = (uintptr_t*)(&task->requested_policy); | |
2020 | return raw[1]; | |
2021 | #endif | |
2022 | } | |
2023 | ||
2024 | static uintptr_t | |
2025 | teffective_0(task_t task) | |
2026 | { | |
2027 | uintptr_t* raw = (uintptr_t*)&task->effective_policy; | |
2028 | ||
2029 | return raw[0]; | |
2030 | } | |
2031 | ||
2032 | static uintptr_t | |
2033 | teffective_1(task_t task) | |
2034 | { | |
2035 | #if defined __LP64__ | |
2036 | (void)task; | |
2037 | return 0; | |
2038 | #else | |
2039 | uintptr_t* raw = (uintptr_t*)(&task->effective_policy); | |
2040 | return raw[1]; | |
2041 | #endif | |
2042 | } | |
2043 | ||
2044 | /* dump pending for tracepoint */ | |
2045 | uint32_t tpending(task_pend_token_t pend_token) { return *(uint32_t*)(void*)(pend_token); } | |
2046 | ||
2047 | uint64_t | |
2048 | task_requested_bitfield(task_t task) | |
2049 | { | |
2050 | uint64_t bits = 0; | |
2051 | struct task_requested_policy requested = task->requested_policy; | |
2052 | ||
2053 | bits |= (requested.trp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0); | |
2054 | bits |= (requested.trp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0); | |
2055 | bits |= (requested.trp_int_iotier ? (((uint64_t)requested.trp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0); | |
2056 | bits |= (requested.trp_ext_iotier ? (((uint64_t)requested.trp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0); | |
2057 | bits |= (requested.trp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0); | |
2058 | bits |= (requested.trp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0); | |
2059 | bits |= (requested.trp_bg_iotier ? (((uint64_t)requested.trp_bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT) : 0); | |
2060 | bits |= (requested.trp_terminated ? POLICY_REQ_TERMINATED : 0); | |
2061 | ||
2062 | bits |= (requested.trp_boosted ? POLICY_REQ_BOOSTED : 0); | |
2063 | bits |= (requested.trp_tal_enabled ? POLICY_REQ_TAL_ENABLED : 0); | |
2064 | bits |= (requested.trp_apptype ? (((uint64_t)requested.trp_apptype) << POLICY_REQ_APPTYPE_SHIFT) : 0); | |
2065 | bits |= (requested.trp_role ? (((uint64_t)requested.trp_role) << POLICY_REQ_ROLE_SHIFT) : 0); | |
2066 | ||
2067 | bits |= (requested.trp_sup_active ? POLICY_REQ_SUP_ACTIVE : 0); | |
2068 | bits |= (requested.trp_sup_lowpri_cpu ? POLICY_REQ_SUP_LOWPRI_CPU : 0); | |
2069 | bits |= (requested.trp_sup_cpu ? POLICY_REQ_SUP_CPU : 0); | |
2070 | bits |= (requested.trp_sup_timer ? (((uint64_t)requested.trp_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0); | |
2071 | bits |= (requested.trp_sup_throughput ? (((uint64_t)requested.trp_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT) : 0); | |
2072 | bits |= (requested.trp_sup_disk ? POLICY_REQ_SUP_DISK_THROTTLE : 0); | |
2073 | bits |= (requested.trp_sup_bg_sockets ? POLICY_REQ_SUP_BG_SOCKETS : 0); | |
2074 | ||
2075 | bits |= (requested.trp_base_latency_qos ? (((uint64_t)requested.trp_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0); | |
2076 | bits |= (requested.trp_over_latency_qos ? (((uint64_t)requested.trp_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0); | |
2077 | bits |= (requested.trp_base_through_qos ? (((uint64_t)requested.trp_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0); | |
2078 | bits |= (requested.trp_over_through_qos ? (((uint64_t)requested.trp_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0); | |
2079 | bits |= (requested.trp_sfi_managed ? POLICY_REQ_SFI_MANAGED : 0); | |
2080 | bits |= (requested.trp_qos_clamp ? (((uint64_t)requested.trp_qos_clamp) << POLICY_REQ_QOS_CLAMP_SHIFT) : 0); | |
2081 | ||
2082 | return bits; | |
2083 | } | |
2084 | ||
2085 | uint64_t | |
2086 | task_effective_bitfield(task_t task) | |
2087 | { | |
2088 | uint64_t bits = 0; | |
2089 | struct task_effective_policy effective = task->effective_policy; | |
2090 | ||
2091 | bits |= (effective.tep_io_tier ? (((uint64_t)effective.tep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0); | |
2092 | bits |= (effective.tep_io_passive ? POLICY_EFF_IO_PASSIVE : 0); | |
2093 | bits |= (effective.tep_darwinbg ? POLICY_EFF_DARWIN_BG : 0); | |
2094 | bits |= (effective.tep_lowpri_cpu ? POLICY_EFF_LOWPRI_CPU : 0); | |
2095 | bits |= (effective.tep_terminated ? POLICY_EFF_TERMINATED : 0); | |
2096 | bits |= (effective.tep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0); | |
2097 | bits |= (effective.tep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0); | |
2098 | bits |= (effective.tep_bg_iotier ? (((uint64_t)effective.tep_bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0); | |
2099 | bits |= (effective.tep_qos_ui_is_urgent ? POLICY_EFF_QOS_UI_IS_URGENT : 0); | |
2100 | ||
2101 | bits |= (effective.tep_tal_engaged ? POLICY_EFF_TAL_ENGAGED : 0); | |
2102 | bits |= (effective.tep_watchers_bg ? POLICY_EFF_WATCHERS_BG : 0); | |
2103 | bits |= (effective.tep_sup_active ? POLICY_EFF_SUP_ACTIVE : 0); | |
2104 | bits |= (effective.tep_suppressed_cpu ? POLICY_EFF_SUP_CPU : 0); | |
2105 | bits |= (effective.tep_role ? (((uint64_t)effective.tep_role) << POLICY_EFF_ROLE_SHIFT) : 0); | |
2106 | bits |= (effective.tep_latency_qos ? (((uint64_t)effective.tep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0); | |
2107 | bits |= (effective.tep_through_qos ? (((uint64_t)effective.tep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0); | |
2108 | bits |= (effective.tep_sfi_managed ? POLICY_EFF_SFI_MANAGED : 0); | |
2109 | bits |= (effective.tep_qos_ceiling ? (((uint64_t)effective.tep_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0); | |
2110 | ||
2111 | return bits; | |
2112 | } | |
2113 | ||
2114 | ||
2115 | /* | |
2116 | * Resource usage and CPU related routines | |
2117 | */ | |
2118 | ||
2119 | int | |
2120 | proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep) | |
2121 | { | |
2122 | ||
2123 | int error = 0; | |
2124 | int scope; | |
2125 | ||
2126 | task_lock(task); | |
2127 | ||
2128 | ||
2129 | error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope); | |
2130 | task_unlock(task); | |
2131 | ||
2132 | /* | |
2133 | * Reverse-map from CPU resource limit scopes back to policies (see comment below). | |
2134 | */ | |
2135 | if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { | |
2136 | *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC; | |
2137 | } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) { | |
2138 | *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE; | |
2139 | } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) { | |
2140 | *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; | |
2141 | } | |
2142 | ||
2143 | return(error); | |
2144 | } | |
2145 | ||
2146 | /* | |
2147 | * Configure the default CPU usage monitor parameters. | |
2148 | * | |
2149 | * For tasks which have this mechanism activated: if any thread in the | |
2150 | * process consumes more CPU than this, an EXC_RESOURCE exception will be generated. | |
2151 | */ | |
2152 | void | |
2153 | proc_init_cpumon_params(void) | |
2154 | { | |
2155 | /* | |
2156 | * The max CPU percentage can be configured via the boot-args and | |
2157 | * a key in the device tree. The boot-args are honored first, then the | |
2158 | * device tree. | |
2159 | */ | |
2160 | if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage, | |
2161 | sizeof (proc_max_cpumon_percentage))) | |
2162 | { | |
2163 | uint64_t max_percentage = 0ULL; | |
2164 | ||
2165 | if (!PE_get_default("kern.max_cpumon_percentage", &max_percentage, | |
2166 | sizeof(max_percentage))) | |
2167 | { | |
2168 | max_percentage = DEFAULT_CPUMON_PERCENTAGE; | |
2169 | } | |
2170 | ||
2171 | assert(max_percentage <= UINT8_MAX); | |
2172 | proc_max_cpumon_percentage = (uint8_t) max_percentage; | |
2173 | } | |
2174 | ||
2175 | if (proc_max_cpumon_percentage > 100) { | |
2176 | proc_max_cpumon_percentage = 100; | |
2177 | } | |
2178 | ||
2179 | /* | |
2180 | * The interval should be specified in seconds. | |
2181 | * | |
2182 | * Like the max CPU percentage, the max CPU interval can be configured | |
2183 | * via boot-args and the device tree. | |
2184 | */ | |
2185 | if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval, | |
2186 | sizeof (proc_max_cpumon_interval))) | |
2187 | { | |
2188 | if (!PE_get_default("kern.max_cpumon_interval", &proc_max_cpumon_interval, | |
2189 | sizeof(proc_max_cpumon_interval))) | |
2190 | { | |
2191 | proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL; | |
2192 | } | |
2193 | } | |
2194 | ||
2195 | proc_max_cpumon_interval *= NSEC_PER_SEC; | |
2196 | ||
2197 | /* TEMPORARY boot arg to control App suppression */ | |
2198 | PE_parse_boot_argn("task_policy_suppression_disable", | |
2199 | &task_policy_suppression_disable, | |
2200 | sizeof(task_policy_suppression_disable)); | |
2201 | } | |
2202 | ||
2203 | /* | |
2204 | * Currently supported configurations for CPU limits. | |
2205 | * | |
2206 | * Policy | Deadline-based CPU limit | Percentage-based CPU limit | |
2207 | * -------------------------------------+--------------------------+------------------------------ | |
2208 | * PROC_POLICY_RSRCACT_THROTTLE | ENOTSUP | Task-wide scope only | |
2209 | * PROC_POLICY_RSRCACT_SUSPEND | Task-wide scope only | ENOTSUP | |
2210 | * PROC_POLICY_RSRCACT_TERMINATE | Task-wide scope only | ENOTSUP | |
2211 | * PROC_POLICY_RSRCACT_NOTIFY_KQ | Task-wide scope only | ENOTSUP | |
2212 | * PROC_POLICY_RSRCACT_NOTIFY_EXC | ENOTSUP | Per-thread scope only | |
2213 | * | |
2214 | * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed | |
2215 | * after the specified amount of wallclock time has elapsed. | |
2216 | * | |
2217 | * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time | |
2218 | * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an | |
2219 | * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads | |
2220 | * in the task are added together), or by any one thread in the task (so-called "per-thread" scope). | |
2221 | * | |
2222 | * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them | |
2223 | * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action | |
2224 | * after I have used some amount of CPU time; this is different than the recurring percentage/interval model) | |
2225 | * but the potential consumer of the API at the time was insisting on wallclock time instead. | |
2226 | * | |
2227 | * Currently, requesting notification via an exception is the only way to get per-thread scope for a | |
2228 | * CPU limit. All other types of notifications force task-wide scope for the limit. | |
2229 | */ | |
2230 | int | |
2231 | proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline, | |
2232 | int cpumon_entitled) | |
2233 | { | |
2234 | int error = 0; | |
2235 | int scope; | |
2236 | ||
2237 | /* | |
2238 | * Enforce the matrix of supported configurations for policy, percentage, and deadline. | |
2239 | */ | |
2240 | switch (policy) { | |
2241 | // If no policy is explicitly given, the default is to throttle. | |
2242 | case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE: | |
2243 | case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE: | |
2244 | if (deadline != 0) | |
2245 | return (ENOTSUP); | |
2246 | scope = TASK_RUSECPU_FLAGS_PROC_LIMIT; | |
2247 | break; | |
2248 | case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND: | |
2249 | case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE: | |
2250 | case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ: | |
2251 | if (percentage != 0) | |
2252 | return (ENOTSUP); | |
2253 | scope = TASK_RUSECPU_FLAGS_DEADLINE; | |
2254 | break; | |
2255 | case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC: | |
2256 | if (deadline != 0) | |
2257 | return (ENOTSUP); | |
2258 | scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT; | |
2259 | #ifdef CONFIG_NOMONITORS | |
2260 | return (error); | |
2261 | #endif /* CONFIG_NOMONITORS */ | |
2262 | break; | |
2263 | default: | |
2264 | return (EINVAL); | |
2265 | } | |
2266 | ||
2267 | task_lock(task); | |
2268 | if (task != current_task()) { | |
2269 | task->policy_ru_cpu_ext = policy; | |
2270 | } else { | |
2271 | task->policy_ru_cpu = policy; | |
2272 | } | |
2273 | error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled); | |
2274 | task_unlock(task); | |
2275 | return(error); | |
2276 | } | |
2277 | ||
2278 | /* TODO: get rid of these */ | |
2279 | #define TASK_POLICY_CPU_RESOURCE_USAGE 0 | |
2280 | #define TASK_POLICY_WIREDMEM_RESOURCE_USAGE 1 | |
2281 | #define TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE 2 | |
2282 | #define TASK_POLICY_DISK_RESOURCE_USAGE 3 | |
2283 | #define TASK_POLICY_NETWORK_RESOURCE_USAGE 4 | |
2284 | #define TASK_POLICY_POWER_RESOURCE_USAGE 5 | |
2285 | ||
2286 | #define TASK_POLICY_RESOURCE_USAGE_COUNT 6 | |
2287 | ||
2288 | int | |
2289 | proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled) | |
2290 | { | |
2291 | int error = 0; | |
2292 | int action; | |
2293 | void * bsdinfo = NULL; | |
2294 | ||
2295 | task_lock(task); | |
2296 | if (task != current_task()) { | |
2297 | task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT; | |
2298 | } else { | |
2299 | task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT; | |
2300 | } | |
2301 | ||
2302 | error = task_clear_cpuusage_locked(task, cpumon_entitled); | |
2303 | if (error != 0) | |
2304 | goto out; | |
2305 | ||
2306 | action = task->applied_ru_cpu; | |
2307 | if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { | |
2308 | /* reset action */ | |
2309 | task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; | |
2310 | } | |
2311 | if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { | |
2312 | bsdinfo = task->bsd_info; | |
2313 | task_unlock(task); | |
2314 | proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action); | |
2315 | goto out1; | |
2316 | } | |
2317 | ||
2318 | out: | |
2319 | task_unlock(task); | |
2320 | out1: | |
2321 | return(error); | |
2322 | ||
2323 | } | |
2324 | ||
2325 | /* used to apply resource limit related actions */ | |
2326 | static int | |
2327 | task_apply_resource_actions(task_t task, int type) | |
2328 | { | |
2329 | int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; | |
2330 | void * bsdinfo = NULL; | |
2331 | ||
2332 | switch (type) { | |
2333 | case TASK_POLICY_CPU_RESOURCE_USAGE: | |
2334 | break; | |
2335 | case TASK_POLICY_WIREDMEM_RESOURCE_USAGE: | |
2336 | case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE: | |
2337 | case TASK_POLICY_DISK_RESOURCE_USAGE: | |
2338 | case TASK_POLICY_NETWORK_RESOURCE_USAGE: | |
2339 | case TASK_POLICY_POWER_RESOURCE_USAGE: | |
2340 | return(0); | |
2341 | ||
2342 | default: | |
2343 | return(1); | |
2344 | }; | |
2345 | ||
2346 | /* only cpu actions for now */ | |
2347 | task_lock(task); | |
2348 | ||
2349 | if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { | |
2350 | /* apply action */ | |
2351 | task->applied_ru_cpu_ext = task->policy_ru_cpu_ext; | |
2352 | action = task->applied_ru_cpu_ext; | |
2353 | } else { | |
2354 | action = task->applied_ru_cpu_ext; | |
2355 | } | |
2356 | ||
2357 | if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { | |
2358 | bsdinfo = task->bsd_info; | |
2359 | task_unlock(task); | |
2360 | proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action); | |
2361 | } else | |
2362 | task_unlock(task); | |
2363 | ||
2364 | return(0); | |
2365 | } | |
2366 | ||
2367 | /* | |
2368 | * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API | |
2369 | * only allows for one at a time. This means that if there is a per-thread limit active, the other | |
2370 | * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest | |
2371 | * to the caller, and prefer that, but there's no need for that at the moment. | |
2372 | */ | |
2373 | static int | |
2374 | task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope) | |
2375 | { | |
2376 | *percentagep = 0; | |
2377 | *intervalp = 0; | |
2378 | *deadlinep = 0; | |
2379 | ||
2380 | if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) { | |
2381 | *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT; | |
2382 | *percentagep = task->rusage_cpu_perthr_percentage; | |
2383 | *intervalp = task->rusage_cpu_perthr_interval; | |
2384 | } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) { | |
2385 | *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT; | |
2386 | *percentagep = task->rusage_cpu_percentage; | |
2387 | *intervalp = task->rusage_cpu_interval; | |
2388 | } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) { | |
2389 | *scope = TASK_RUSECPU_FLAGS_DEADLINE; | |
2390 | *deadlinep = task->rusage_cpu_deadline; | |
2391 | } else { | |
2392 | *scope = 0; | |
2393 | } | |
2394 | ||
2395 | return(0); | |
2396 | } | |
2397 | ||
2398 | /* | |
2399 | * Suspend the CPU usage monitor for the task. Return value indicates | |
2400 | * if the mechanism was actually enabled. | |
2401 | */ | |
2402 | int | |
2403 | task_suspend_cpumon(task_t task) | |
2404 | { | |
2405 | thread_t thread; | |
2406 | ||
2407 | task_lock_assert_owned(task); | |
2408 | ||
2409 | if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) { | |
2410 | return KERN_INVALID_ARGUMENT; | |
2411 | } | |
2412 | ||
2413 | #if CONFIG_TELEMETRY | |
2414 | /* | |
2415 | * Disable task-wide telemetry if it was ever enabled by the CPU usage | |
2416 | * monitor's warning zone. | |
2417 | */ | |
2418 | telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0); | |
2419 | #endif | |
2420 | ||
2421 | /* | |
2422 | * Suspend monitoring for the task, and propagate that change to each thread. | |
2423 | */ | |
2424 | task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON); | |
2425 | queue_iterate(&task->threads, thread, thread_t, task_threads) { | |
2426 | set_astledger(thread); | |
2427 | } | |
2428 | ||
2429 | return KERN_SUCCESS; | |
2430 | } | |
2431 | ||
2432 | /* | |
2433 | * Remove all traces of the CPU monitor. | |
2434 | */ | |
2435 | int | |
2436 | task_disable_cpumon(task_t task) | |
2437 | { | |
2438 | int kret; | |
2439 | ||
2440 | task_lock_assert_owned(task); | |
2441 | ||
2442 | kret = task_suspend_cpumon(task); | |
2443 | if (kret) return kret; | |
2444 | ||
2445 | /* Once we clear these values, the monitor can't be resumed */ | |
2446 | task->rusage_cpu_perthr_percentage = 0; | |
2447 | task->rusage_cpu_perthr_interval = 0; | |
2448 | ||
2449 | return (KERN_SUCCESS); | |
2450 | } | |
2451 | ||
2452 | ||
2453 | static int | |
2454 | task_enable_cpumon_locked(task_t task) | |
2455 | { | |
2456 | thread_t thread; | |
2457 | task_lock_assert_owned(task); | |
2458 | ||
2459 | if (task->rusage_cpu_perthr_percentage == 0 || | |
2460 | task->rusage_cpu_perthr_interval == 0) { | |
2461 | return KERN_INVALID_ARGUMENT; | |
2462 | } | |
2463 | ||
2464 | task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT; | |
2465 | queue_iterate(&task->threads, thread, thread_t, task_threads) { | |
2466 | set_astledger(thread); | |
2467 | } | |
2468 | ||
2469 | return KERN_SUCCESS; | |
2470 | } | |
2471 | ||
2472 | int | |
2473 | task_resume_cpumon(task_t task) | |
2474 | { | |
2475 | kern_return_t kret; | |
2476 | ||
2477 | if (!task) { | |
2478 | return EINVAL; | |
2479 | } | |
2480 | ||
2481 | task_lock(task); | |
2482 | kret = task_enable_cpumon_locked(task); | |
2483 | task_unlock(task); | |
2484 | ||
2485 | return kret; | |
2486 | } | |
2487 | ||
2488 | ||
2489 | /* duplicate values from bsd/sys/process_policy.h */ | |
2490 | #define PROC_POLICY_CPUMON_DISABLE 0xFF | |
2491 | #define PROC_POLICY_CPUMON_DEFAULTS 0xFE | |
2492 | ||
2493 | static int | |
2494 | task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled) | |
2495 | { | |
2496 | uint64_t abstime = 0; | |
2497 | uint64_t limittime = 0; | |
2498 | ||
2499 | lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED); | |
2500 | ||
2501 | /* By default, refill once per second */ | |
2502 | if (interval == 0) | |
2503 | interval = NSEC_PER_SEC; | |
2504 | ||
2505 | if (percentage != 0) { | |
2506 | if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { | |
2507 | boolean_t warn = FALSE; | |
2508 | ||
2509 | /* | |
2510 | * A per-thread CPU limit on a task generates an exception | |
2511 | * (LEDGER_ACTION_EXCEPTION) if any one thread in the task | |
2512 | * exceeds the limit. | |
2513 | */ | |
2514 | ||
2515 | if (percentage == PROC_POLICY_CPUMON_DISABLE) { | |
2516 | if (cpumon_entitled) { | |
2517 | /* 25095698 - task_disable_cpumon() should be reliable */ | |
2518 | task_disable_cpumon(task); | |
2519 | return 0; | |
2520 | } | |
2521 | ||
2522 | /* | |
2523 | * This task wishes to disable the CPU usage monitor, but it's | |
2524 | * missing the required entitlement: | |
2525 | * com.apple.private.kernel.override-cpumon | |
2526 | * | |
2527 | * Instead, treat this as a request to reset its params | |
2528 | * back to the defaults. | |
2529 | */ | |
2530 | warn = TRUE; | |
2531 | percentage = PROC_POLICY_CPUMON_DEFAULTS; | |
2532 | } | |
2533 | ||
2534 | if (percentage == PROC_POLICY_CPUMON_DEFAULTS) { | |
2535 | percentage = proc_max_cpumon_percentage; | |
2536 | interval = proc_max_cpumon_interval; | |
2537 | } | |
2538 | ||
2539 | if (percentage > 100) { | |
2540 | percentage = 100; | |
2541 | } | |
2542 | ||
2543 | /* | |
2544 | * Passing in an interval of -1 means either: | |
2545 | * - Leave the interval as-is, if there's already a per-thread | |
2546 | * limit configured | |
2547 | * - Use the system default. | |
2548 | */ | |
2549 | if (interval == -1ULL) { | |
2550 | if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { | |
2551 | interval = task->rusage_cpu_perthr_interval; | |
2552 | } else { | |
2553 | interval = proc_max_cpumon_interval; | |
2554 | } | |
2555 | } | |
2556 | ||
2557 | /* | |
2558 | * Enforce global caps on CPU usage monitor here if the process is not | |
2559 | * entitled to escape the global caps. | |
2560 | */ | |
2561 | if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) { | |
2562 | warn = TRUE; | |
2563 | percentage = proc_max_cpumon_percentage; | |
2564 | } | |
2565 | ||
2566 | if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) { | |
2567 | warn = TRUE; | |
2568 | interval = proc_max_cpumon_interval; | |
2569 | } | |
2570 | ||
2571 | if (warn) { | |
2572 | int pid = 0; | |
2573 | const char *procname = "unknown"; | |
2574 | ||
2575 | #ifdef MACH_BSD | |
2576 | pid = proc_selfpid(); | |
2577 | if (current_task()->bsd_info != NULL) { | |
2578 | procname = proc_name_address(current_task()->bsd_info); | |
2579 | } | |
2580 | #endif | |
2581 | ||
2582 | printf("process %s[%d] denied attempt to escape CPU monitor" | |
2583 | " (missing required entitlement).\n", procname, pid); | |
2584 | } | |
2585 | ||
2586 | /* configure the limit values */ | |
2587 | task->rusage_cpu_perthr_percentage = percentage; | |
2588 | task->rusage_cpu_perthr_interval = interval; | |
2589 | ||
2590 | /* and enable the CPU monitor */ | |
2591 | (void)task_enable_cpumon_locked(task); | |
2592 | } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) { | |
2593 | /* | |
2594 | * Currently, a proc-wide CPU limit always blocks if the limit is | |
2595 | * exceeded (LEDGER_ACTION_BLOCK). | |
2596 | */ | |
2597 | task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT; | |
2598 | task->rusage_cpu_percentage = percentage; | |
2599 | task->rusage_cpu_interval = interval; | |
2600 | ||
2601 | limittime = (interval * percentage) / 100; | |
2602 | nanoseconds_to_absolutetime(limittime, &abstime); | |
2603 | ||
2604 | ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0); | |
2605 | ledger_set_period(task->ledger, task_ledgers.cpu_time, interval); | |
2606 | ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK); | |
2607 | } | |
2608 | } | |
2609 | ||
2610 | if (deadline != 0) { | |
2611 | assert(scope == TASK_RUSECPU_FLAGS_DEADLINE); | |
2612 | ||
2613 | /* if already in use, cancel and wait for it to cleanout */ | |
2614 | if (task->rusage_cpu_callt != NULL) { | |
2615 | task_unlock(task); | |
2616 | thread_call_cancel_wait(task->rusage_cpu_callt); | |
2617 | task_lock(task); | |
2618 | } | |
2619 | if (task->rusage_cpu_callt == NULL) { | |
2620 | task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL); | |
2621 | } | |
2622 | /* setup callout */ | |
2623 | if (task->rusage_cpu_callt != 0) { | |
2624 | uint64_t save_abstime = 0; | |
2625 | ||
2626 | task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE; | |
2627 | task->rusage_cpu_deadline = deadline; | |
2628 | ||
2629 | nanoseconds_to_absolutetime(deadline, &abstime); | |
2630 | save_abstime = abstime; | |
2631 | clock_absolutetime_interval_to_deadline(save_abstime, &abstime); | |
2632 | thread_call_enter_delayed(task->rusage_cpu_callt, abstime); | |
2633 | } | |
2634 | } | |
2635 | ||
2636 | return(0); | |
2637 | } | |
2638 | ||
2639 | int | |
2640 | task_clear_cpuusage(task_t task, int cpumon_entitled) | |
2641 | { | |
2642 | int retval = 0; | |
2643 | ||
2644 | task_lock(task); | |
2645 | retval = task_clear_cpuusage_locked(task, cpumon_entitled); | |
2646 | task_unlock(task); | |
2647 | ||
2648 | return(retval); | |
2649 | } | |
2650 | ||
2651 | static int | |
2652 | task_clear_cpuusage_locked(task_t task, int cpumon_entitled) | |
2653 | { | |
2654 | thread_call_t savecallt; | |
2655 | ||
2656 | /* cancel percentage handling if set */ | |
2657 | if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) { | |
2658 | task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT; | |
2659 | ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0); | |
2660 | task->rusage_cpu_percentage = 0; | |
2661 | task->rusage_cpu_interval = 0; | |
2662 | } | |
2663 | ||
2664 | /* | |
2665 | * Disable the CPU usage monitor. | |
2666 | */ | |
2667 | if (cpumon_entitled) { | |
2668 | task_disable_cpumon(task); | |
2669 | } | |
2670 | ||
2671 | /* cancel deadline handling if set */ | |
2672 | if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) { | |
2673 | task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE; | |
2674 | if (task->rusage_cpu_callt != 0) { | |
2675 | savecallt = task->rusage_cpu_callt; | |
2676 | task->rusage_cpu_callt = NULL; | |
2677 | task->rusage_cpu_deadline = 0; | |
2678 | task_unlock(task); | |
2679 | thread_call_cancel_wait(savecallt); | |
2680 | thread_call_free(savecallt); | |
2681 | task_lock(task); | |
2682 | } | |
2683 | } | |
2684 | return(0); | |
2685 | } | |
2686 | ||
2687 | /* called by ledger unit to enforce action due to resource usage criteria being met */ | |
2688 | static void | |
2689 | task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1) | |
2690 | { | |
2691 | task_t task = (task_t)param0; | |
2692 | (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE); | |
2693 | return; | |
2694 | } | |
2695 | ||
2696 | ||
2697 | /* | |
2698 | * Routines for taskwatch and pidbind | |
2699 | */ | |
2700 | ||
2701 | ||
2702 | /* | |
2703 | * Routines for importance donation/inheritance/boosting | |
2704 | */ | |
2705 | ||
2706 | static void | |
2707 | task_importance_update_live_donor(task_t target_task) | |
2708 | { | |
2709 | #if IMPORTANCE_INHERITANCE | |
2710 | ||
2711 | ipc_importance_task_t task_imp; | |
2712 | ||
2713 | task_imp = ipc_importance_for_task(target_task, FALSE); | |
2714 | if (IIT_NULL != task_imp) { | |
2715 | ipc_importance_task_update_live_donor(task_imp); | |
2716 | ipc_importance_task_release(task_imp); | |
2717 | } | |
2718 | #endif /* IMPORTANCE_INHERITANCE */ | |
2719 | } | |
2720 | ||
2721 | void | |
2722 | task_importance_mark_donor(task_t task, boolean_t donating) | |
2723 | { | |
2724 | #if IMPORTANCE_INHERITANCE | |
2725 | ipc_importance_task_t task_imp; | |
2726 | ||
2727 | task_imp = ipc_importance_for_task(task, FALSE); | |
2728 | if (IIT_NULL != task_imp) { | |
2729 | ipc_importance_task_mark_donor(task_imp, donating); | |
2730 | ipc_importance_task_release(task_imp); | |
2731 | } | |
2732 | #endif /* IMPORTANCE_INHERITANCE */ | |
2733 | } | |
2734 | ||
2735 | void | |
2736 | task_importance_mark_live_donor(task_t task, boolean_t live_donating) | |
2737 | { | |
2738 | #if IMPORTANCE_INHERITANCE | |
2739 | ipc_importance_task_t task_imp; | |
2740 | ||
2741 | task_imp = ipc_importance_for_task(task, FALSE); | |
2742 | if (IIT_NULL != task_imp) { | |
2743 | ipc_importance_task_mark_live_donor(task_imp, live_donating); | |
2744 | ipc_importance_task_release(task_imp); | |
2745 | } | |
2746 | #endif /* IMPORTANCE_INHERITANCE */ | |
2747 | } | |
2748 | ||
2749 | void | |
2750 | task_importance_mark_receiver(task_t task, boolean_t receiving) | |
2751 | { | |
2752 | #if IMPORTANCE_INHERITANCE | |
2753 | ipc_importance_task_t task_imp; | |
2754 | ||
2755 | task_imp = ipc_importance_for_task(task, FALSE); | |
2756 | if (IIT_NULL != task_imp) { | |
2757 | ipc_importance_task_mark_receiver(task_imp, receiving); | |
2758 | ipc_importance_task_release(task_imp); | |
2759 | } | |
2760 | #endif /* IMPORTANCE_INHERITANCE */ | |
2761 | } | |
2762 | ||
2763 | void | |
2764 | task_importance_mark_denap_receiver(task_t task, boolean_t denap) | |
2765 | { | |
2766 | #if IMPORTANCE_INHERITANCE | |
2767 | ipc_importance_task_t task_imp; | |
2768 | ||
2769 | task_imp = ipc_importance_for_task(task, FALSE); | |
2770 | if (IIT_NULL != task_imp) { | |
2771 | ipc_importance_task_mark_denap_receiver(task_imp, denap); | |
2772 | ipc_importance_task_release(task_imp); | |
2773 | } | |
2774 | #endif /* IMPORTANCE_INHERITANCE */ | |
2775 | } | |
2776 | ||
2777 | void | |
2778 | task_importance_reset(__imp_only task_t task) | |
2779 | { | |
2780 | #if IMPORTANCE_INHERITANCE | |
2781 | ipc_importance_task_t task_imp; | |
2782 | ||
2783 | /* TODO: Lower importance downstream before disconnect */ | |
2784 | task_imp = task->task_imp_base; | |
2785 | ipc_importance_reset(task_imp, FALSE); | |
2786 | task_importance_update_live_donor(task); | |
2787 | #endif /* IMPORTANCE_INHERITANCE */ | |
2788 | } | |
2789 | ||
2790 | #if IMPORTANCE_INHERITANCE | |
2791 | ||
2792 | /* | |
2793 | * Sets the task boost bit to the provided value. Does NOT run the update function. | |
2794 | * | |
2795 | * Task lock must be held. | |
2796 | */ | |
2797 | static void | |
2798 | task_set_boost_locked(task_t task, boolean_t boost_active) | |
2799 | { | |
2800 | #if IMPORTANCE_DEBUG | |
2801 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START), | |
2802 | proc_selfpid(), task_pid(task), trequested_0(task), trequested_1(task), 0); | |
2803 | #endif | |
2804 | ||
2805 | task->requested_policy.trp_boosted = boost_active; | |
2806 | ||
2807 | #if IMPORTANCE_DEBUG | |
2808 | if (boost_active == TRUE){ | |
2809 | DTRACE_BOOST2(boost, task_t, task, int, task_pid(task)); | |
2810 | } else { | |
2811 | DTRACE_BOOST2(unboost, task_t, task, int, task_pid(task)); | |
2812 | } | |
2813 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END), | |
2814 | proc_selfpid(), task_pid(task), | |
2815 | trequested_0(task), trequested_1(task), 0); | |
2816 | #endif | |
2817 | } | |
2818 | ||
2819 | /* | |
2820 | * Sets the task boost bit to the provided value and applies the update. | |
2821 | * | |
2822 | * Task lock must be held. Must call update complete after unlocking the task. | |
2823 | */ | |
2824 | void | |
2825 | task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token) | |
2826 | { | |
2827 | task_set_boost_locked(task, boost_active); | |
2828 | ||
2829 | task_policy_update_locked(task, pend_token); | |
2830 | } | |
2831 | ||
2832 | /* | |
2833 | * Check if this task should donate importance. | |
2834 | * | |
2835 | * May be called without taking the task lock. In that case, donor status can change | |
2836 | * so you must check only once for each donation event. | |
2837 | */ | |
2838 | boolean_t | |
2839 | task_is_importance_donor(task_t task) | |
2840 | { | |
2841 | if (task->task_imp_base == IIT_NULL) | |
2842 | return FALSE; | |
2843 | return ipc_importance_task_is_donor(task->task_imp_base); | |
2844 | } | |
2845 | ||
2846 | /* | |
2847 | * Query the status of the task's donor mark. | |
2848 | */ | |
2849 | boolean_t | |
2850 | task_is_marked_importance_donor(task_t task) | |
2851 | { | |
2852 | if (task->task_imp_base == IIT_NULL) | |
2853 | return FALSE; | |
2854 | return ipc_importance_task_is_marked_donor(task->task_imp_base); | |
2855 | } | |
2856 | ||
2857 | /* | |
2858 | * Query the status of the task's live donor and donor mark. | |
2859 | */ | |
2860 | boolean_t | |
2861 | task_is_marked_live_importance_donor(task_t task) | |
2862 | { | |
2863 | if (task->task_imp_base == IIT_NULL) | |
2864 | return FALSE; | |
2865 | return ipc_importance_task_is_marked_live_donor(task->task_imp_base); | |
2866 | } | |
2867 | ||
2868 | ||
2869 | /* | |
2870 | * This routine may be called without holding task lock | |
2871 | * since the value of imp_receiver can never be unset. | |
2872 | */ | |
2873 | boolean_t | |
2874 | task_is_importance_receiver(task_t task) | |
2875 | { | |
2876 | if (task->task_imp_base == IIT_NULL) | |
2877 | return FALSE; | |
2878 | return ipc_importance_task_is_marked_receiver(task->task_imp_base); | |
2879 | } | |
2880 | ||
2881 | /* | |
2882 | * Query the task's receiver mark. | |
2883 | */ | |
2884 | boolean_t | |
2885 | task_is_marked_importance_receiver(task_t task) | |
2886 | { | |
2887 | if (task->task_imp_base == IIT_NULL) | |
2888 | return FALSE; | |
2889 | return ipc_importance_task_is_marked_receiver(task->task_imp_base); | |
2890 | } | |
2891 | ||
2892 | /* | |
2893 | * This routine may be called without holding task lock | |
2894 | * since the value of de-nap receiver can never be unset. | |
2895 | */ | |
2896 | boolean_t | |
2897 | task_is_importance_denap_receiver(task_t task) | |
2898 | { | |
2899 | if (task->task_imp_base == IIT_NULL) | |
2900 | return FALSE; | |
2901 | return ipc_importance_task_is_denap_receiver(task->task_imp_base); | |
2902 | } | |
2903 | ||
2904 | /* | |
2905 | * Query the task's de-nap receiver mark. | |
2906 | */ | |
2907 | boolean_t | |
2908 | task_is_marked_importance_denap_receiver(task_t task) | |
2909 | { | |
2910 | if (task->task_imp_base == IIT_NULL) | |
2911 | return FALSE; | |
2912 | return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base); | |
2913 | } | |
2914 | ||
2915 | /* | |
2916 | * This routine may be called without holding task lock | |
2917 | * since the value of imp_receiver can never be unset. | |
2918 | */ | |
2919 | boolean_t | |
2920 | task_is_importance_receiver_type(task_t task) | |
2921 | { | |
2922 | if (task->task_imp_base == IIT_NULL) | |
2923 | return FALSE; | |
2924 | return (task_is_importance_receiver(task) || | |
2925 | task_is_importance_denap_receiver(task)); | |
2926 | } | |
2927 | ||
2928 | /* | |
2929 | * External importance assertions are managed by the process in userspace | |
2930 | * Internal importance assertions are the responsibility of the kernel | |
2931 | * Assertions are changed from internal to external via task_importance_externalize_assertion | |
2932 | */ | |
2933 | ||
2934 | int | |
2935 | task_importance_hold_internal_assertion(task_t target_task, uint32_t count) | |
2936 | { | |
2937 | ipc_importance_task_t task_imp; | |
2938 | kern_return_t ret; | |
2939 | ||
2940 | /* may be first time, so allow for possible importance setup */ | |
2941 | task_imp = ipc_importance_for_task(target_task, FALSE); | |
2942 | if (IIT_NULL == task_imp) { | |
2943 | return EOVERFLOW; | |
2944 | } | |
2945 | ret = ipc_importance_task_hold_internal_assertion(task_imp, count); | |
2946 | ipc_importance_task_release(task_imp); | |
2947 | ||
2948 | return (KERN_SUCCESS != ret) ? ENOTSUP : 0; | |
2949 | } | |
2950 | ||
2951 | int | |
2952 | task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count) | |
2953 | { | |
2954 | ipc_importance_task_t task_imp; | |
2955 | kern_return_t ret; | |
2956 | ||
2957 | /* may be first time, so allow for possible importance setup */ | |
2958 | task_imp = ipc_importance_for_task(target_task, FALSE); | |
2959 | if (IIT_NULL == task_imp) { | |
2960 | return EOVERFLOW; | |
2961 | } | |
2962 | ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count); | |
2963 | ipc_importance_task_release(task_imp); | |
2964 | ||
2965 | return (KERN_SUCCESS != ret) ? ENOTSUP : 0; | |
2966 | } | |
2967 | ||
2968 | int | |
2969 | task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count) | |
2970 | { | |
2971 | ipc_importance_task_t task_imp; | |
2972 | kern_return_t ret; | |
2973 | ||
2974 | /* must already have set up an importance */ | |
2975 | task_imp = target_task->task_imp_base; | |
2976 | if (IIT_NULL == task_imp) { | |
2977 | return EOVERFLOW; | |
2978 | } | |
2979 | ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count); | |
2980 | return (KERN_SUCCESS != ret) ? ENOTSUP : 0; | |
2981 | } | |
2982 | ||
2983 | int | |
2984 | task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count) | |
2985 | { | |
2986 | ipc_importance_task_t task_imp; | |
2987 | kern_return_t ret; | |
2988 | ||
2989 | /* must already have set up an importance */ | |
2990 | task_imp = target_task->task_imp_base; | |
2991 | if (IIT_NULL == task_imp) { | |
2992 | return EOVERFLOW; | |
2993 | } | |
2994 | ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count); | |
2995 | return (KERN_SUCCESS != ret) ? EOVERFLOW : 0; | |
2996 | } | |
2997 | ||
2998 | int | |
2999 | task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count) | |
3000 | { | |
3001 | ipc_importance_task_t task_imp; | |
3002 | kern_return_t ret; | |
3003 | ||
3004 | /* must already have set up an importance */ | |
3005 | task_imp = target_task->task_imp_base; | |
3006 | if (IIT_NULL == task_imp) { | |
3007 | return EOVERFLOW; | |
3008 | } | |
3009 | ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count); | |
3010 | return (KERN_SUCCESS != ret) ? EOVERFLOW : 0; | |
3011 | } | |
3012 | ||
3013 | static void | |
3014 | task_add_importance_watchport(task_t task, mach_port_t port, int *boostp) | |
3015 | { | |
3016 | int boost = 0; | |
3017 | ||
3018 | __impdebug_only int released_pid = 0; | |
3019 | __impdebug_only int pid = task_pid(task); | |
3020 | ||
3021 | ipc_importance_task_t release_imp_task = IIT_NULL; | |
3022 | ||
3023 | if (IP_VALID(port) != 0) { | |
3024 | ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE); | |
3025 | ||
3026 | ip_lock(port); | |
3027 | ||
3028 | /* | |
3029 | * The port must have been marked tempowner already. | |
3030 | * This also filters out ports whose receive rights | |
3031 | * are already enqueued in a message, as you can't | |
3032 | * change the right's destination once it's already | |
3033 | * on its way. | |
3034 | */ | |
3035 | if (port->ip_tempowner != 0) { | |
3036 | assert(port->ip_impdonation != 0); | |
3037 | ||
3038 | boost = port->ip_impcount; | |
3039 | if (IIT_NULL != port->ip_imp_task) { | |
3040 | /* | |
3041 | * if this port is already bound to a task, | |
3042 | * release the task reference and drop any | |
3043 | * watchport-forwarded boosts | |
3044 | */ | |
3045 | release_imp_task = port->ip_imp_task; | |
3046 | port->ip_imp_task = IIT_NULL; | |
3047 | } | |
3048 | ||
3049 | /* mark the port is watching another task (reference held in port->ip_imp_task) */ | |
3050 | if (ipc_importance_task_is_marked_receiver(new_imp_task)) { | |
3051 | port->ip_imp_task = new_imp_task; | |
3052 | new_imp_task = IIT_NULL; | |
3053 | } | |
3054 | } | |
3055 | ip_unlock(port); | |
3056 | ||
3057 | if (IIT_NULL != new_imp_task) { | |
3058 | ipc_importance_task_release(new_imp_task); | |
3059 | } | |
3060 | ||
3061 | if (IIT_NULL != release_imp_task) { | |
3062 | if (boost > 0) | |
3063 | ipc_importance_task_drop_internal_assertion(release_imp_task, boost); | |
3064 | ||
3065 | // released_pid = task_pid(release_imp_task); /* TODO: Need ref-safe way to get pid */ | |
3066 | ipc_importance_task_release(release_imp_task); | |
3067 | } | |
3068 | #if IMPORTANCE_DEBUG | |
3069 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE, | |
3070 | proc_selfpid(), pid, boost, released_pid, 0); | |
3071 | #endif /* IMPORTANCE_DEBUG */ | |
3072 | } | |
3073 | ||
3074 | *boostp = boost; | |
3075 | return; | |
3076 | } | |
3077 | ||
3078 | #endif /* IMPORTANCE_INHERITANCE */ | |
3079 | ||
3080 | /* | |
3081 | * Routines for VM to query task importance | |
3082 | */ | |
3083 | ||
3084 | ||
3085 | /* | |
3086 | * Order to be considered while estimating importance | |
3087 | * for low memory notification and purging purgeable memory. | |
3088 | */ | |
3089 | #define TASK_IMPORTANCE_FOREGROUND 4 | |
3090 | #define TASK_IMPORTANCE_NOTDARWINBG 1 | |
3091 | ||
3092 | ||
3093 | /* | |
3094 | * (Un)Mark the task as a privileged listener for memory notifications. | |
3095 | * if marked, this task will be among the first to be notified amongst | |
3096 | * the bulk of all other tasks when the system enters a pressure level | |
3097 | * of interest to this task. | |
3098 | */ | |
3099 | int | |
3100 | task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value) | |
3101 | { | |
3102 | if (old_value != NULL) { | |
3103 | *old_value = (boolean_t)task->low_mem_privileged_listener; | |
3104 | } else { | |
3105 | task_lock(task); | |
3106 | task->low_mem_privileged_listener = (uint32_t)new_value; | |
3107 | task_unlock(task); | |
3108 | } | |
3109 | ||
3110 | return 0; | |
3111 | } | |
3112 | ||
3113 | /* | |
3114 | * Checks if the task is already notified. | |
3115 | * | |
3116 | * Condition: task lock should be held while calling this function. | |
3117 | */ | |
3118 | boolean_t | |
3119 | task_has_been_notified(task_t task, int pressurelevel) | |
3120 | { | |
3121 | if (task == NULL) { | |
3122 | return FALSE; | |
3123 | } | |
3124 | ||
3125 | if (pressurelevel == kVMPressureWarning) | |
3126 | return (task->low_mem_notified_warn ? TRUE : FALSE); | |
3127 | else if (pressurelevel == kVMPressureCritical) | |
3128 | return (task->low_mem_notified_critical ? TRUE : FALSE); | |
3129 | else | |
3130 | return TRUE; | |
3131 | } | |
3132 | ||
3133 | ||
3134 | /* | |
3135 | * Checks if the task is used for purging. | |
3136 | * | |
3137 | * Condition: task lock should be held while calling this function. | |
3138 | */ | |
3139 | boolean_t | |
3140 | task_used_for_purging(task_t task, int pressurelevel) | |
3141 | { | |
3142 | if (task == NULL) { | |
3143 | return FALSE; | |
3144 | } | |
3145 | ||
3146 | if (pressurelevel == kVMPressureWarning) | |
3147 | return (task->purged_memory_warn ? TRUE : FALSE); | |
3148 | else if (pressurelevel == kVMPressureCritical) | |
3149 | return (task->purged_memory_critical ? TRUE : FALSE); | |
3150 | else | |
3151 | return TRUE; | |
3152 | } | |
3153 | ||
3154 | ||
3155 | /* | |
3156 | * Mark the task as notified with memory notification. | |
3157 | * | |
3158 | * Condition: task lock should be held while calling this function. | |
3159 | */ | |
3160 | void | |
3161 | task_mark_has_been_notified(task_t task, int pressurelevel) | |
3162 | { | |
3163 | if (task == NULL) { | |
3164 | return; | |
3165 | } | |
3166 | ||
3167 | if (pressurelevel == kVMPressureWarning) | |
3168 | task->low_mem_notified_warn = 1; | |
3169 | else if (pressurelevel == kVMPressureCritical) | |
3170 | task->low_mem_notified_critical = 1; | |
3171 | } | |
3172 | ||
3173 | ||
3174 | /* | |
3175 | * Mark the task as purged. | |
3176 | * | |
3177 | * Condition: task lock should be held while calling this function. | |
3178 | */ | |
3179 | void | |
3180 | task_mark_used_for_purging(task_t task, int pressurelevel) | |
3181 | { | |
3182 | if (task == NULL) { | |
3183 | return; | |
3184 | } | |
3185 | ||
3186 | if (pressurelevel == kVMPressureWarning) | |
3187 | task->purged_memory_warn = 1; | |
3188 | else if (pressurelevel == kVMPressureCritical) | |
3189 | task->purged_memory_critical = 1; | |
3190 | } | |
3191 | ||
3192 | ||
3193 | /* | |
3194 | * Mark the task eligible for low memory notification. | |
3195 | * | |
3196 | * Condition: task lock should be held while calling this function. | |
3197 | */ | |
3198 | void | |
3199 | task_clear_has_been_notified(task_t task, int pressurelevel) | |
3200 | { | |
3201 | if (task == NULL) { | |
3202 | return; | |
3203 | } | |
3204 | ||
3205 | if (pressurelevel == kVMPressureWarning) | |
3206 | task->low_mem_notified_warn = 0; | |
3207 | else if (pressurelevel == kVMPressureCritical) | |
3208 | task->low_mem_notified_critical = 0; | |
3209 | } | |
3210 | ||
3211 | ||
3212 | /* | |
3213 | * Mark the task eligible for purging its purgeable memory. | |
3214 | * | |
3215 | * Condition: task lock should be held while calling this function. | |
3216 | */ | |
3217 | void | |
3218 | task_clear_used_for_purging(task_t task) | |
3219 | { | |
3220 | if (task == NULL) { | |
3221 | return; | |
3222 | } | |
3223 | ||
3224 | task->purged_memory_warn = 0; | |
3225 | task->purged_memory_critical = 0; | |
3226 | } | |
3227 | ||
3228 | ||
3229 | /* | |
3230 | * Estimate task importance for purging its purgeable memory | |
3231 | * and low memory notification. | |
3232 | * | |
3233 | * Importance is calculated in the following order of criteria: | |
3234 | * -Task role : Background vs Foreground | |
3235 | * -Boost status: Not boosted vs Boosted | |
3236 | * -Darwin BG status. | |
3237 | * | |
3238 | * Returns: Estimated task importance. Less important task will have lower | |
3239 | * estimated importance. | |
3240 | */ | |
3241 | int | |
3242 | task_importance_estimate(task_t task) | |
3243 | { | |
3244 | int task_importance = 0; | |
3245 | ||
3246 | if (task == NULL) { | |
3247 | return 0; | |
3248 | } | |
3249 | ||
3250 | if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION) | |
3251 | task_importance += TASK_IMPORTANCE_FOREGROUND; | |
3252 | ||
3253 | if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0) | |
3254 | task_importance += TASK_IMPORTANCE_NOTDARWINBG; | |
3255 | ||
3256 | return task_importance; | |
3257 | } | |
3258 | ||
3259 | boolean_t | |
3260 | task_has_assertions(task_t task) | |
3261 | { | |
3262 | return (task->task_imp_base->iit_assertcnt? TRUE : FALSE); | |
3263 | } | |
3264 | ||
3265 | ||
3266 | kern_return_t | |
3267 | send_resource_violation(typeof(send_cpu_usage_violation) sendfunc, | |
3268 | task_t violator, | |
3269 | struct ledger_entry_info *linfo, | |
3270 | resource_notify_flags_t flags) | |
3271 | { | |
3272 | #ifndef MACH_BSD | |
3273 | return KERN_NOT_SUPPORTED; | |
3274 | #else | |
3275 | kern_return_t kr = KERN_SUCCESS; | |
3276 | proc_t proc = NULL; | |
3277 | posix_path_t proc_path = ""; | |
3278 | proc_name_t procname = "<unknown>"; | |
3279 | int pid = -1; | |
3280 | clock_sec_t secs; | |
3281 | clock_nsec_t nsecs; | |
3282 | mach_timespec_t timestamp; | |
3283 | thread_t curthread = current_thread(); | |
3284 | ipc_port_t dstport = MACH_PORT_NULL; | |
3285 | ||
3286 | if (!violator) { | |
3287 | kr = KERN_INVALID_ARGUMENT; goto finish; | |
3288 | } | |
3289 | ||
3290 | /* extract violator information */ | |
3291 | task_lock(violator); | |
3292 | if (!(proc = get_bsdtask_info(violator))) { | |
3293 | task_unlock(violator); | |
3294 | kr = KERN_INVALID_ARGUMENT; goto finish; | |
3295 | } | |
3296 | (void)mig_strncpy(procname, proc_best_name(proc), sizeof(procname)); | |
3297 | pid = task_pid(violator); | |
3298 | if (flags & kRNFatalLimitFlag) { | |
3299 | kr = proc_pidpathinfo_internal(proc, 0, proc_path, | |
3300 | sizeof(proc_path), NULL); | |
3301 | } | |
3302 | task_unlock(violator); | |
3303 | if (kr) goto finish; | |
3304 | ||
3305 | /* violation time ~ now */ | |
3306 | clock_get_calendar_nanotime(&secs, &nsecs); | |
3307 | timestamp.tv_sec = (int32_t)secs; | |
3308 | timestamp.tv_nsec = (int32_t)nsecs; | |
3309 | /* 25567702 tracks widening mach_timespec_t */ | |
3310 | ||
3311 | /* send message */ | |
3312 | kr = host_get_special_port(host_priv_self(), HOST_LOCAL_NODE, | |
3313 | HOST_RESOURCE_NOTIFY_PORT, &dstport); | |
3314 | if (kr) goto finish; | |
3315 | ||
3316 | /* TH_OPT_HONOR_QLIMIT causes ipc_kmsg_send() to respect the | |
3317 | * queue limit. It also unsets this flag, but this code also | |
3318 | * unsets it for clarity and in case that code changes. */ | |
3319 | curthread->options |= TH_OPT_HONOR_QLIMIT; | |
3320 | kr = sendfunc(dstport, | |
3321 | procname, pid, proc_path, timestamp, | |
3322 | linfo->lei_balance, linfo->lei_last_refill, | |
3323 | linfo->lei_limit, linfo->lei_refill_period, | |
3324 | flags); | |
3325 | curthread->options &= (~TH_OPT_HONOR_QLIMIT); | |
3326 | ||
3327 | ipc_port_release_send(dstport); | |
3328 | ||
3329 | finish: | |
3330 | return kr; | |
3331 | #endif /* MACH_BSD */ | |
3332 | } | |
3333 | ||
3334 | ||
3335 | /* | |
3336 | * Resource violations trace four 64-bit integers. For K32, two additional | |
3337 | * codes are allocated, the first with the low nibble doubled. So if the K64 | |
3338 | * code is 0x042, the K32 codes would be 0x044 and 0x45. | |
3339 | */ | |
3340 | #ifdef __LP64__ | |
3341 | void | |
3342 | trace_resource_violation(uint16_t code, | |
3343 | struct ledger_entry_info *linfo) | |
3344 | { | |
3345 | KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, code), | |
3346 | linfo->lei_balance, linfo->lei_last_refill, | |
3347 | linfo->lei_limit, linfo->lei_refill_period); | |
3348 | } | |
3349 | #else /* K32 */ | |
3350 | /* TODO: create/find a trace_two_LLs() for K32 systems */ | |
3351 | #define MASK32 0xffffffff | |
3352 | void | |
3353 | trace_resource_violation(uint16_t code, | |
3354 | struct ledger_entry_info *linfo) | |
3355 | { | |
3356 | int8_t lownibble = (code & 0x3) * 2; | |
3357 | int16_t codeA = (code & 0xffc) | lownibble; | |
3358 | int16_t codeB = codeA + 1; | |
3359 | ||
3360 | int32_t balance_high = (linfo->lei_balance >> 32) & MASK32; | |
3361 | int32_t balance_low = linfo->lei_balance & MASK32; | |
3362 | int32_t last_refill_high = (linfo->lei_last_refill >> 32) & MASK32; | |
3363 | int32_t last_refill_low = linfo->lei_last_refill & MASK32; | |
3364 | ||
3365 | int32_t limit_high = (linfo->lei_limit >> 32) & MASK32; | |
3366 | int32_t limit_low = linfo->lei_limit & MASK32; | |
3367 | int32_t refill_period_high = (linfo->lei_refill_period >> 32) & MASK32; | |
3368 | int32_t refill_period_low = linfo->lei_refill_period & MASK32; | |
3369 | ||
3370 | KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeA), | |
3371 | balance_high, balance_low, | |
3372 | last_refill_high, last_refill_low); | |
3373 | KERNEL_DBG_IST_SANE(KDBG_CODE(DBG_MACH, DBG_MACH_RESOURCE, codeB), | |
3374 | limit_high, limit_low, | |
3375 | refill_period_high, refill_period_low); | |
3376 | } | |
3377 | #endif /* K64/K32 */ |