]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <mach/mach_types.h> | |
30 | #include <mach/task_server.h> | |
31 | ||
32 | #include <kern/sched.h> | |
33 | #include <kern/task.h> | |
34 | #include <mach/thread_policy.h> | |
35 | #include <sys/errno.h> | |
36 | #include <sys/resource.h> | |
37 | #include <machine/limits.h> | |
38 | #include <kern/ledger.h> | |
39 | #include <kern/thread_call.h> | |
40 | #include <kern/sfi.h> | |
41 | #include <kern/coalition.h> | |
42 | #if CONFIG_TELEMETRY | |
43 | #include <kern/telemetry.h> | |
44 | #endif | |
45 | ||
46 | #if IMPORTANCE_INHERITANCE | |
47 | #include <ipc/ipc_importance.h> | |
48 | #if IMPORTANCE_DEBUG | |
49 | #include <mach/machine/sdt.h> | |
50 | #endif /* IMPORTANCE_DEBUG */ | |
51 | #endif /* IMPORTANCE_INHERITACE */ | |
52 | ||
53 | #include <sys/kdebug.h> | |
54 | ||
55 | /* | |
56 | * Task Policy | |
57 | * | |
58 | * This subsystem manages task and thread IO priority and backgrounding, | |
59 | * as well as importance inheritance, process suppression, task QoS, and apptype. | |
60 | * These properties have a suprising number of complex interactions, so they are | |
61 | * centralized here in one state machine to simplify the implementation of those interactions. | |
62 | * | |
63 | * Architecture: | |
64 | * Threads and tasks have three policy fields: requested, effective, and pending. | |
65 | * Requested represents the wishes of each interface that influences task policy. | |
66 | * Effective represents the distillation of that policy into a set of behaviors. | |
67 | * Pending represents updates that haven't been applied yet. | |
68 | * | |
69 | * Each interface that has an input into the task policy state machine controls a field in requested. | |
70 | * If the interface has a getter, it returns what is in the field in requested, but that is | |
71 | * not necessarily what is actually in effect. | |
72 | * | |
73 | * All kernel subsystems that behave differently based on task policy call into | |
74 | * the get_effective_policy function, which returns the decision of the task policy state machine | |
75 | * for that subsystem by querying only the 'effective' field. | |
76 | * | |
77 | * Policy change operations: | |
78 | * Here are the steps to change a policy on a task or thread: | |
79 | * 1) Lock task | |
80 | * 2) Change requested field for the relevant policy | |
81 | * 3) Run a task policy update, which recalculates effective based on requested, | |
82 | * then takes a diff between the old and new versions of requested and calls the relevant | |
83 | * other subsystems to apply these changes, and updates the pending field. | |
84 | * 4) Unlock task | |
85 | * 5) Run task policy update complete, which looks at the pending field to update | |
86 | * subsystems which cannot be touched while holding the task lock. | |
87 | * | |
88 | * To add a new requested policy, add the field in the requested struct, the flavor in task.h, | |
89 | * the setter and getter in proc_(set|get)_task_policy*, and dump the state in task_requested_bitfield, | |
90 | * then set up the effects of that behavior in task_policy_update*. If the policy manifests | |
91 | * itself as a distinct effective policy, add it to the effective struct and add it to the | |
92 | * proc_get_effective_policy accessor. | |
93 | * | |
94 | * Most policies are set via proc_set_task_policy, but policies that don't fit that interface | |
95 | * roll their own lock/set/update/unlock/complete code inside this file. | |
96 | * | |
97 | * | |
98 | * Suppression policy | |
99 | * | |
100 | * These are a set of behaviors that can be requested for a task. They currently have specific | |
101 | * implied actions when they're enabled, but they may be made customizable in the future. | |
102 | * | |
103 | * When the affected task is boosted, we temporarily disable the suppression behaviors | |
104 | * so that the affected process has a chance to run so it can call the API to permanently | |
105 | * disable the suppression behaviors. | |
106 | * | |
107 | * Locking | |
108 | * | |
109 | * Changing task policy on a task or thread takes the task lock, and not the thread lock. | |
110 | * TODO: Should changing policy on a thread take the thread lock instead? | |
111 | * | |
112 | * Querying the effective policy does not take the task lock, to prevent deadlocks or slowdown in sensitive code. | |
113 | * This means that any notification of state change needs to be externally synchronized. | |
114 | * | |
115 | */ | |
116 | ||
117 | extern const qos_policy_params_t thread_qos_policy_params; | |
118 | ||
119 | /* for task holds without dropping the lock */ | |
120 | extern void task_hold_locked(task_t task); | |
121 | extern void task_release_locked(task_t task); | |
122 | extern void task_wait_locked(task_t task, boolean_t until_not_runnable); | |
123 | ||
124 | extern void thread_recompute_qos(thread_t thread); | |
125 | ||
126 | /* Task policy related helper functions */ | |
127 | static void proc_set_task_policy_locked(task_t task, thread_t thread, int category, int flavor, int value); | |
128 | static void proc_set_task_policy2_locked(task_t task, thread_t thread, int category, int flavor, int value1, int value2); | |
129 | ||
130 | static void task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token); | |
131 | static void task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token); | |
132 | static void task_policy_update_task_locked(task_t task, boolean_t update_throttle, boolean_t update_bg_throttle, boolean_t update_sfi); | |
133 | static void task_policy_update_thread_locked(thread_t thread, int update_cpu, boolean_t update_throttle, boolean_t update_sfi, boolean_t update_qos); | |
134 | static boolean_t task_policy_update_coalition_focal_tasks(task_t task, int prev_role, int next_role); | |
135 | ||
136 | static int proc_get_effective_policy(task_t task, thread_t thread, int policy); | |
137 | ||
138 | static void proc_iopol_to_tier(int iopolicy, int *tier, int *passive); | |
139 | static int proc_tier_to_iopol(int tier, int passive); | |
140 | ||
141 | static uintptr_t trequested_0(task_t task, thread_t thread); | |
142 | static uintptr_t trequested_1(task_t task, thread_t thread); | |
143 | static uintptr_t teffective_0(task_t task, thread_t thread); | |
144 | static uintptr_t teffective_1(task_t task, thread_t thread); | |
145 | static uint32_t tpending(task_pend_token_t pend_token); | |
146 | static uint64_t task_requested_bitfield(task_t task, thread_t thread); | |
147 | static uint64_t task_effective_bitfield(task_t task, thread_t thread); | |
148 | ||
149 | void proc_get_thread_policy(thread_t thread, thread_policy_state_t info); | |
150 | ||
151 | /* CPU Limits related helper functions */ | |
152 | static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope); | |
153 | int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled); | |
154 | static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled); | |
155 | int task_disable_cpumon(task_t task); | |
156 | static int task_apply_resource_actions(task_t task, int type); | |
157 | void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1); | |
158 | void proc_init_cpumon_params(void); | |
159 | ||
160 | #ifdef MACH_BSD | |
161 | int proc_pid(void *proc); | |
162 | extern int proc_selfpid(void); | |
163 | extern char * proc_name_address(void *p); | |
164 | extern void rethrottle_thread(void * uthread); | |
165 | extern void proc_apply_task_networkbg(void * bsd_info, thread_t thread); | |
166 | #endif /* MACH_BSD */ | |
167 | ||
168 | extern zone_t thread_qos_override_zone; | |
169 | static boolean_t _proc_thread_qos_remove_override_internal(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type, boolean_t reset); | |
170 | ||
171 | ||
172 | /* Importance Inheritance related helper functions */ | |
173 | ||
174 | #if IMPORTANCE_INHERITANCE | |
175 | ||
176 | static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp); | |
177 | static void task_importance_update_live_donor(task_t target_task); | |
178 | ||
179 | #endif /* IMPORTANCE_INHERITANCE */ | |
180 | ||
181 | #if IMPORTANCE_DEBUG | |
182 | #define __impdebug_only | |
183 | #else | |
184 | #define __impdebug_only __unused | |
185 | #endif | |
186 | ||
187 | #if IMPORTANCE_INHERITANCE | |
188 | #define __imp_only | |
189 | #else | |
190 | #define __imp_only __unused | |
191 | #endif | |
192 | ||
193 | #define TASK_LOCKED 1 | |
194 | #define TASK_UNLOCKED 0 | |
195 | ||
196 | #define DO_LOWPRI_CPU 1 | |
197 | #define UNDO_LOWPRI_CPU 2 | |
198 | ||
199 | /* Macros for making tracing simpler */ | |
200 | ||
201 | #define tpriority(task, thread) ((uintptr_t)(thread == THREAD_NULL ? (task->priority) : (thread->priority))) | |
202 | #define tisthread(thread) (thread == THREAD_NULL ? TASK_POLICY_TASK : TASK_POLICY_THREAD) | |
203 | #define targetid(task, thread) ((uintptr_t)(thread == THREAD_NULL ? (audit_token_pid_from_task(task)) : (thread->thread_id))) | |
204 | ||
205 | /* | |
206 | * Default parameters for certain policies | |
207 | */ | |
208 | ||
209 | int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1; | |
210 | int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1; | |
211 | int proc_tal_disk_tier = THROTTLE_LEVEL_TIER1; | |
212 | ||
213 | int proc_graphics_timer_qos = (LATENCY_QOS_TIER_0 & 0xFF); | |
214 | ||
215 | const int proc_default_bg_iotier = THROTTLE_LEVEL_TIER2; | |
216 | ||
217 | /* Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation */ | |
218 | const struct task_requested_policy default_task_requested_policy = { | |
219 | .bg_iotier = proc_default_bg_iotier | |
220 | }; | |
221 | const struct task_effective_policy default_task_effective_policy = {}; | |
222 | const struct task_pended_policy default_task_pended_policy = {}; | |
223 | ||
224 | /* | |
225 | * Default parameters for CPU usage monitor. | |
226 | * | |
227 | * Default setting is 50% over 3 minutes. | |
228 | */ | |
229 | #define DEFAULT_CPUMON_PERCENTAGE 50 | |
230 | #define DEFAULT_CPUMON_INTERVAL (3 * 60) | |
231 | ||
232 | uint8_t proc_max_cpumon_percentage; | |
233 | uint64_t proc_max_cpumon_interval; | |
234 | ||
235 | kern_return_t | |
236 | qos_latency_policy_validate(task_latency_qos_t ltier) { | |
237 | if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) && | |
238 | ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0))) | |
239 | return KERN_INVALID_ARGUMENT; | |
240 | ||
241 | return KERN_SUCCESS; | |
242 | } | |
243 | ||
244 | kern_return_t | |
245 | qos_throughput_policy_validate(task_throughput_qos_t ttier) { | |
246 | if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) && | |
247 | ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0))) | |
248 | return KERN_INVALID_ARGUMENT; | |
249 | ||
250 | return KERN_SUCCESS; | |
251 | } | |
252 | ||
253 | static kern_return_t | |
254 | task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count) { | |
255 | if (count < TASK_QOS_POLICY_COUNT) | |
256 | return KERN_INVALID_ARGUMENT; | |
257 | ||
258 | task_latency_qos_t ltier = qosinfo->task_latency_qos_tier; | |
259 | task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier; | |
260 | ||
261 | kern_return_t kr = qos_latency_policy_validate(ltier); | |
262 | ||
263 | if (kr != KERN_SUCCESS) | |
264 | return kr; | |
265 | ||
266 | kr = qos_throughput_policy_validate(ttier); | |
267 | ||
268 | return kr; | |
269 | } | |
270 | ||
271 | uint32_t | |
272 | qos_extract(uint32_t qv) { | |
273 | return (qv & 0xFF); | |
274 | } | |
275 | ||
276 | uint32_t | |
277 | qos_latency_policy_package(uint32_t qv) { | |
278 | return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv); | |
279 | } | |
280 | ||
281 | uint32_t | |
282 | qos_throughput_policy_package(uint32_t qv) { | |
283 | return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv); | |
284 | } | |
285 | ||
286 | /* TEMPORARY boot-arg controlling task_policy suppression (App Nap) */ | |
287 | static boolean_t task_policy_suppression_disable = FALSE; | |
288 | ||
289 | kern_return_t | |
290 | task_policy_set( | |
291 | task_t task, | |
292 | task_policy_flavor_t flavor, | |
293 | task_policy_t policy_info, | |
294 | mach_msg_type_number_t count) | |
295 | { | |
296 | kern_return_t result = KERN_SUCCESS; | |
297 | ||
298 | if (task == TASK_NULL || task == kernel_task) | |
299 | return (KERN_INVALID_ARGUMENT); | |
300 | ||
301 | switch (flavor) { | |
302 | ||
303 | case TASK_CATEGORY_POLICY: { | |
304 | task_category_policy_t info = (task_category_policy_t)policy_info; | |
305 | ||
306 | if (count < TASK_CATEGORY_POLICY_COUNT) | |
307 | return (KERN_INVALID_ARGUMENT); | |
308 | ||
309 | ||
310 | switch(info->role) { | |
311 | case TASK_FOREGROUND_APPLICATION: | |
312 | case TASK_BACKGROUND_APPLICATION: | |
313 | case TASK_DEFAULT_APPLICATION: | |
314 | proc_set_task_policy(task, THREAD_NULL, | |
315 | TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, | |
316 | info->role); | |
317 | break; | |
318 | ||
319 | case TASK_CONTROL_APPLICATION: | |
320 | if (task != current_task() || task->sec_token.val[0] != 0) | |
321 | result = KERN_INVALID_ARGUMENT; | |
322 | else | |
323 | proc_set_task_policy(task, THREAD_NULL, | |
324 | TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, | |
325 | info->role); | |
326 | break; | |
327 | ||
328 | case TASK_GRAPHICS_SERVER: | |
329 | /* TODO: Restrict this role to FCFS <rdar://problem/12552788> */ | |
330 | if (task != current_task() || task->sec_token.val[0] != 0) | |
331 | result = KERN_INVALID_ARGUMENT; | |
332 | else | |
333 | proc_set_task_policy(task, THREAD_NULL, | |
334 | TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE, | |
335 | info->role); | |
336 | break; | |
337 | default: | |
338 | result = KERN_INVALID_ARGUMENT; | |
339 | break; | |
340 | } /* switch (info->role) */ | |
341 | ||
342 | break; | |
343 | } | |
344 | ||
345 | /* Desired energy-efficiency/performance "quality-of-service" */ | |
346 | case TASK_BASE_QOS_POLICY: | |
347 | case TASK_OVERRIDE_QOS_POLICY: | |
348 | { | |
349 | task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; | |
350 | kern_return_t kr = task_qos_policy_validate(qosinfo, count); | |
351 | ||
352 | if (kr != KERN_SUCCESS) | |
353 | return kr; | |
354 | ||
355 | ||
356 | uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier); | |
357 | uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier); | |
358 | ||
359 | proc_set_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, | |
360 | flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, | |
361 | lqos, tqos); | |
362 | } | |
363 | break; | |
364 | ||
365 | case TASK_BASE_LATENCY_QOS_POLICY: | |
366 | { | |
367 | task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; | |
368 | kern_return_t kr = task_qos_policy_validate(qosinfo, count); | |
369 | ||
370 | if (kr != KERN_SUCCESS) | |
371 | return kr; | |
372 | ||
373 | uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier); | |
374 | ||
375 | proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, lqos); | |
376 | } | |
377 | break; | |
378 | ||
379 | case TASK_BASE_THROUGHPUT_QOS_POLICY: | |
380 | { | |
381 | task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info; | |
382 | kern_return_t kr = task_qos_policy_validate(qosinfo, count); | |
383 | ||
384 | if (kr != KERN_SUCCESS) | |
385 | return kr; | |
386 | ||
387 | uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier); | |
388 | ||
389 | proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, tqos); | |
390 | } | |
391 | break; | |
392 | ||
393 | case TASK_SUPPRESSION_POLICY: | |
394 | { | |
395 | ||
396 | task_suppression_policy_t info = (task_suppression_policy_t)policy_info; | |
397 | ||
398 | if (count < TASK_SUPPRESSION_POLICY_COUNT) | |
399 | return (KERN_INVALID_ARGUMENT); | |
400 | ||
401 | struct task_qos_policy qosinfo; | |
402 | ||
403 | qosinfo.task_latency_qos_tier = info->timer_throttle; | |
404 | qosinfo.task_throughput_qos_tier = info->throughput_qos; | |
405 | ||
406 | kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT); | |
407 | ||
408 | if (kr != KERN_SUCCESS) | |
409 | return kr; | |
410 | ||
411 | /* TEMPORARY disablement of task suppression */ | |
412 | if (task_policy_suppression_disable && info->active) | |
413 | return KERN_SUCCESS; | |
414 | ||
415 | struct task_pend_token pend_token = {}; | |
416 | ||
417 | task_lock(task); | |
418 | ||
419 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
420 | (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START, | |
421 | proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), | |
422 | trequested_1(task, THREAD_NULL), 0); | |
423 | ||
424 | task->requested_policy.t_sup_active = (info->active) ? 1 : 0; | |
425 | task->requested_policy.t_sup_lowpri_cpu = (info->lowpri_cpu) ? 1 : 0; | |
426 | task->requested_policy.t_sup_timer = qos_extract(info->timer_throttle); | |
427 | task->requested_policy.t_sup_disk = (info->disk_throttle) ? 1 : 0; | |
428 | task->requested_policy.t_sup_cpu_limit = (info->cpu_limit) ? 1 : 0; | |
429 | task->requested_policy.t_sup_suspend = (info->suspend) ? 1 : 0; | |
430 | task->requested_policy.t_sup_throughput = qos_extract(info->throughput_qos); | |
431 | task->requested_policy.t_sup_cpu = (info->suppressed_cpu) ? 1 : 0; | |
432 | task->requested_policy.t_sup_bg_sockets = (info->background_sockets) ? 1 : 0; | |
433 | ||
434 | task_policy_update_locked(task, THREAD_NULL, &pend_token); | |
435 | ||
436 | task_unlock(task); | |
437 | ||
438 | task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token); | |
439 | ||
440 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
441 | (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END, | |
442 | proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), | |
443 | trequested_1(task, THREAD_NULL), 0); | |
444 | ||
445 | break; | |
446 | ||
447 | } | |
448 | ||
449 | default: | |
450 | result = KERN_INVALID_ARGUMENT; | |
451 | break; | |
452 | } | |
453 | ||
454 | return (result); | |
455 | } | |
456 | ||
457 | /* Sets BSD 'nice' value on the task */ | |
458 | kern_return_t | |
459 | task_importance( | |
460 | task_t task, | |
461 | integer_t importance) | |
462 | { | |
463 | if (task == TASK_NULL || task == kernel_task) | |
464 | return (KERN_INVALID_ARGUMENT); | |
465 | ||
466 | task_lock(task); | |
467 | ||
468 | if (!task->active) { | |
469 | task_unlock(task); | |
470 | ||
471 | return (KERN_TERMINATED); | |
472 | } | |
473 | ||
474 | if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) { | |
475 | task_unlock(task); | |
476 | ||
477 | return (KERN_INVALID_ARGUMENT); | |
478 | } | |
479 | ||
480 | task->importance = importance; | |
481 | ||
482 | /* TODO: tracepoint? */ | |
483 | ||
484 | /* Redrive only the task priority calculation */ | |
485 | task_policy_update_task_locked(task, FALSE, FALSE, FALSE); | |
486 | ||
487 | task_unlock(task); | |
488 | ||
489 | return (KERN_SUCCESS); | |
490 | } | |
491 | ||
492 | kern_return_t | |
493 | task_policy_get( | |
494 | task_t task, | |
495 | task_policy_flavor_t flavor, | |
496 | task_policy_t policy_info, | |
497 | mach_msg_type_number_t *count, | |
498 | boolean_t *get_default) | |
499 | { | |
500 | if (task == TASK_NULL || task == kernel_task) | |
501 | return (KERN_INVALID_ARGUMENT); | |
502 | ||
503 | switch (flavor) { | |
504 | ||
505 | case TASK_CATEGORY_POLICY: | |
506 | { | |
507 | task_category_policy_t info = (task_category_policy_t)policy_info; | |
508 | ||
509 | if (*count < TASK_CATEGORY_POLICY_COUNT) | |
510 | return (KERN_INVALID_ARGUMENT); | |
511 | ||
512 | if (*get_default) | |
513 | info->role = TASK_UNSPECIFIED; | |
514 | else | |
515 | info->role = proc_get_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE); | |
516 | break; | |
517 | } | |
518 | ||
519 | case TASK_BASE_QOS_POLICY: /* FALLTHRU */ | |
520 | case TASK_OVERRIDE_QOS_POLICY: | |
521 | { | |
522 | task_qos_policy_t info = (task_qos_policy_t)policy_info; | |
523 | ||
524 | if (*count < TASK_QOS_POLICY_COUNT) | |
525 | return (KERN_INVALID_ARGUMENT); | |
526 | ||
527 | if (*get_default) { | |
528 | info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED; | |
529 | info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED; | |
530 | } else if (flavor == TASK_BASE_QOS_POLICY) { | |
531 | int value1, value2; | |
532 | ||
533 | proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2); | |
534 | ||
535 | info->task_latency_qos_tier = qos_latency_policy_package(value1); | |
536 | info->task_throughput_qos_tier = qos_throughput_policy_package(value2); | |
537 | ||
538 | } else if (flavor == TASK_OVERRIDE_QOS_POLICY) { | |
539 | int value1, value2; | |
540 | ||
541 | proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2); | |
542 | ||
543 | info->task_latency_qos_tier = qos_latency_policy_package(value1); | |
544 | info->task_throughput_qos_tier = qos_throughput_policy_package(value2); | |
545 | } | |
546 | ||
547 | break; | |
548 | } | |
549 | ||
550 | case TASK_POLICY_STATE: | |
551 | { | |
552 | task_policy_state_t info = (task_policy_state_t)policy_info; | |
553 | ||
554 | if (*count < TASK_POLICY_STATE_COUNT) | |
555 | return (KERN_INVALID_ARGUMENT); | |
556 | ||
557 | /* Only root can get this info */ | |
558 | if (current_task()->sec_token.val[0] != 0) | |
559 | return KERN_PROTECTION_FAILURE; | |
560 | ||
561 | if (*get_default) { | |
562 | info->requested = 0; | |
563 | info->effective = 0; | |
564 | info->pending = 0; | |
565 | info->imp_assertcnt = 0; | |
566 | info->imp_externcnt = 0; | |
567 | info->flags = 0; | |
568 | info->imp_transitions = 0; | |
569 | } else { | |
570 | task_lock(task); | |
571 | ||
572 | info->requested = task_requested_bitfield(task, THREAD_NULL); | |
573 | info->effective = task_effective_bitfield(task, THREAD_NULL); | |
574 | info->pending = 0; | |
575 | ||
576 | info->flags = 0; | |
577 | if (task->task_imp_base != NULL) { | |
578 | info->imp_assertcnt = task->task_imp_base->iit_assertcnt; | |
579 | info->imp_externcnt = IIT_EXTERN(task->task_imp_base); | |
580 | info->flags |= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : 0); | |
581 | info->flags |= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : 0); | |
582 | info->flags |= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : 0); | |
583 | info->flags |= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : 0); | |
584 | info->imp_transitions = task->task_imp_base->iit_transitions; | |
585 | } else { | |
586 | info->imp_assertcnt = 0; | |
587 | info->imp_externcnt = 0; | |
588 | info->imp_transitions = 0; | |
589 | } | |
590 | task_unlock(task); | |
591 | } | |
592 | ||
593 | info->reserved[0] = 0; | |
594 | info->reserved[1] = 0; | |
595 | ||
596 | break; | |
597 | } | |
598 | ||
599 | case TASK_SUPPRESSION_POLICY: | |
600 | { | |
601 | task_suppression_policy_t info = (task_suppression_policy_t)policy_info; | |
602 | ||
603 | if (*count < TASK_SUPPRESSION_POLICY_COUNT) | |
604 | return (KERN_INVALID_ARGUMENT); | |
605 | ||
606 | task_lock(task); | |
607 | ||
608 | if (*get_default) { | |
609 | info->active = 0; | |
610 | info->lowpri_cpu = 0; | |
611 | info->timer_throttle = LATENCY_QOS_TIER_UNSPECIFIED; | |
612 | info->disk_throttle = 0; | |
613 | info->cpu_limit = 0; | |
614 | info->suspend = 0; | |
615 | info->throughput_qos = 0; | |
616 | info->suppressed_cpu = 0; | |
617 | } else { | |
618 | info->active = task->requested_policy.t_sup_active; | |
619 | info->lowpri_cpu = task->requested_policy.t_sup_lowpri_cpu; | |
620 | info->timer_throttle = qos_latency_policy_package(task->requested_policy.t_sup_timer); | |
621 | info->disk_throttle = task->requested_policy.t_sup_disk; | |
622 | info->cpu_limit = task->requested_policy.t_sup_cpu_limit; | |
623 | info->suspend = task->requested_policy.t_sup_suspend; | |
624 | info->throughput_qos = qos_throughput_policy_package(task->requested_policy.t_sup_throughput); | |
625 | info->suppressed_cpu = task->requested_policy.t_sup_cpu; | |
626 | info->background_sockets = task->requested_policy.t_sup_bg_sockets; | |
627 | } | |
628 | ||
629 | task_unlock(task); | |
630 | break; | |
631 | } | |
632 | ||
633 | default: | |
634 | return (KERN_INVALID_ARGUMENT); | |
635 | } | |
636 | ||
637 | return (KERN_SUCCESS); | |
638 | } | |
639 | ||
640 | /* | |
641 | * Called at task creation | |
642 | * We calculate the correct effective but don't apply it to anything yet. | |
643 | * The threads, etc will inherit from the task as they get created. | |
644 | */ | |
645 | void | |
646 | task_policy_create(task_t task, int parent_boosted) | |
647 | { | |
648 | if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) { | |
649 | if (parent_boosted) { | |
650 | task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE; | |
651 | task_importance_mark_donor(task, TRUE); | |
652 | } else { | |
653 | task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_BACKGROUND; | |
654 | task_importance_mark_receiver(task, FALSE); | |
655 | } | |
656 | } | |
657 | ||
658 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
659 | (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START, | |
660 | audit_token_pid_from_task(task), teffective_0(task, THREAD_NULL), | |
661 | teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0); | |
662 | ||
663 | task_policy_update_internal_locked(task, THREAD_NULL, TRUE, NULL); | |
664 | ||
665 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
666 | (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END, | |
667 | audit_token_pid_from_task(task), teffective_0(task, THREAD_NULL), | |
668 | teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0); | |
669 | ||
670 | task_importance_update_live_donor(task); | |
671 | task_policy_update_task_locked(task, FALSE, FALSE, FALSE); | |
672 | } | |
673 | ||
674 | void | |
675 | thread_policy_create(thread_t thread) | |
676 | { | |
677 | task_t task = thread->task; | |
678 | ||
679 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
680 | (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START, | |
681 | targetid(task, thread), teffective_0(task, thread), | |
682 | teffective_1(task, thread), tpriority(task, thread), 0); | |
683 | ||
684 | task_policy_update_internal_locked(task, thread, TRUE, NULL); | |
685 | ||
686 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
687 | (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END, | |
688 | targetid(task, thread), teffective_0(task, thread), | |
689 | teffective_1(task, thread), tpriority(task, thread), 0); | |
690 | } | |
691 | ||
692 | static void | |
693 | task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token) | |
694 | { | |
695 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
696 | (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread)) | DBG_FUNC_START), | |
697 | targetid(task, thread), teffective_0(task, thread), | |
698 | teffective_1(task, thread), tpriority(task, thread), 0); | |
699 | ||
700 | task_policy_update_internal_locked(task, thread, FALSE, pend_token); | |
701 | ||
702 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
703 | (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread))) | DBG_FUNC_END, | |
704 | targetid(task, thread), teffective_0(task, thread), | |
705 | teffective_1(task, thread), tpriority(task, thread), 0); | |
706 | } | |
707 | ||
708 | /* | |
709 | * One state update function TO RULE THEM ALL | |
710 | * | |
711 | * This function updates the task or thread effective policy fields | |
712 | * and pushes the results to the relevant subsystems. | |
713 | * | |
714 | * Must call update_complete after unlocking the task, | |
715 | * as some subsystems cannot be updated while holding the task lock. | |
716 | * | |
717 | * Called with task locked, not thread | |
718 | */ | |
719 | ||
720 | static void | |
721 | task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token) | |
722 | { | |
723 | boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; | |
724 | ||
725 | /* | |
726 | * Step 1: | |
727 | * Gather requested policy | |
728 | */ | |
729 | ||
730 | struct task_requested_policy requested = | |
731 | (on_task) ? task->requested_policy : thread->requested_policy; | |
732 | ||
733 | ||
734 | /* | |
735 | * Step 2: | |
736 | * Calculate new effective policies from requested policy and task state | |
737 | * Rules: | |
738 | * If in an 'on_task' block, must only look at and set fields starting with t_ | |
739 | * If operating on a task, don't touch anything starting with th_ | |
740 | * If operating on a thread, don't touch anything starting with t_ | |
741 | * Don't change requested, it won't take effect | |
742 | */ | |
743 | ||
744 | struct task_effective_policy next = {}; | |
745 | struct task_effective_policy task_effective; | |
746 | ||
747 | /* Calculate QoS policies */ | |
748 | ||
749 | if (on_task) { | |
750 | /* Update task role */ | |
751 | next.t_role = requested.t_role; | |
752 | ||
753 | /* Set task qos clamp and ceiling */ | |
754 | next.t_qos_clamp = requested.t_qos_clamp; | |
755 | ||
756 | if (requested.t_apptype == TASK_APPTYPE_APP_DEFAULT || | |
757 | requested.t_apptype == TASK_APPTYPE_APP_TAL) { | |
758 | ||
759 | switch (next.t_role) { | |
760 | case TASK_FOREGROUND_APPLICATION: | |
761 | /* Foreground apps get urgent scheduler priority */ | |
762 | next.qos_ui_is_urgent = 1; | |
763 | next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED; | |
764 | break; | |
765 | ||
766 | case TASK_BACKGROUND_APPLICATION: | |
767 | /* This is really 'non-focal but on-screen' */ | |
768 | next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED; | |
769 | break; | |
770 | ||
771 | case TASK_DEFAULT_APPLICATION: | |
772 | /* This is 'may render UI but we don't know if it's focal/nonfocal' */ | |
773 | next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED; | |
774 | break; | |
775 | ||
776 | case TASK_NONUI_APPLICATION: | |
777 | /* i.e. 'off-screen' */ | |
778 | next.t_qos_ceiling = THREAD_QOS_LEGACY; | |
779 | break; | |
780 | ||
781 | case TASK_CONTROL_APPLICATION: | |
782 | case TASK_GRAPHICS_SERVER: | |
783 | next.qos_ui_is_urgent = 1; | |
784 | next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED; | |
785 | break; | |
786 | ||
787 | case TASK_UNSPECIFIED: | |
788 | default: | |
789 | /* Apps that don't have an application role get | |
790 | * USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */ | |
791 | next.t_qos_ceiling = THREAD_QOS_LEGACY; | |
792 | break; | |
793 | } | |
794 | } else { | |
795 | /* Daemons get USER_INTERACTIVE squashed to USER_INITIATED */ | |
796 | next.t_qos_ceiling = THREAD_QOS_USER_INITIATED; | |
797 | } | |
798 | } else { | |
799 | /* | |
800 | * Set thread qos tier | |
801 | * Note that an override only overrides the QoS field, not other policy settings. | |
802 | * A thread must already be participating in QoS for override to take effect | |
803 | */ | |
804 | ||
805 | /* Snapshot the task's effective policy */ | |
806 | task_effective = task->effective_policy; | |
807 | ||
808 | next.qos_ui_is_urgent = task_effective.qos_ui_is_urgent; | |
809 | ||
810 | if ((requested.thrp_qos_override != THREAD_QOS_UNSPECIFIED) && (requested.thrp_qos != THREAD_QOS_UNSPECIFIED)) | |
811 | next.thep_qos = MAX(requested.thrp_qos_override, requested.thrp_qos); | |
812 | else | |
813 | next.thep_qos = requested.thrp_qos; | |
814 | ||
815 | /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */ | |
816 | if (task_effective.t_qos_clamp != THREAD_QOS_UNSPECIFIED) { | |
817 | if (next.thep_qos != THREAD_QOS_UNSPECIFIED) | |
818 | next.thep_qos = MIN(task_effective.t_qos_clamp, next.thep_qos); | |
819 | else | |
820 | next.thep_qos = task_effective.t_qos_clamp; | |
821 | } | |
822 | ||
823 | /* The ceiling only applies to threads that are in the QoS world */ | |
824 | if (task_effective.t_qos_ceiling != THREAD_QOS_UNSPECIFIED && | |
825 | next.thep_qos != THREAD_QOS_UNSPECIFIED) { | |
826 | next.thep_qos = MIN(task_effective.t_qos_ceiling, next.thep_qos); | |
827 | } | |
828 | ||
829 | /* | |
830 | * The QoS relative priority is only applicable when the original programmer's | |
831 | * intended (requested) QoS is in effect. When the QoS is clamped (e.g. | |
832 | * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored, | |
833 | * since otherwise it would be lower than unclamped threads. Similarly, in the | |
834 | * presence of boosting, the programmer doesn't know what other actors | |
835 | * are boosting the thread. | |
836 | */ | |
837 | if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) && | |
838 | (requested.thrp_qos == next.thep_qos) && | |
839 | (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) { | |
840 | next.thep_qos_relprio = requested.thrp_qos_relprio; | |
841 | } else { | |
842 | next.thep_qos_relprio = 0; | |
843 | } | |
844 | } | |
845 | ||
846 | /* Calculate DARWIN_BG */ | |
847 | boolean_t wants_darwinbg = FALSE; | |
848 | boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */ | |
849 | boolean_t wants_watchersbg = FALSE; /* Do I want my pidbound threads to be bg */ | |
850 | boolean_t wants_tal = FALSE; /* Do I want the effects of TAL mode */ | |
851 | ||
852 | /* | |
853 | * If DARWIN_BG has been requested at either level, it's engaged. | |
854 | * Only true DARWIN_BG changes cause watchers to transition. | |
855 | * | |
856 | * Backgrounding due to apptype does. | |
857 | */ | |
858 | if (requested.int_darwinbg || requested.ext_darwinbg) | |
859 | wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE; | |
860 | ||
861 | if (on_task) { | |
862 | /* Background TAL apps are throttled when TAL is enabled */ | |
863 | if (requested.t_apptype == TASK_APPTYPE_APP_TAL && | |
864 | requested.t_role == TASK_BACKGROUND_APPLICATION && | |
865 | requested.t_tal_enabled == 1) { | |
866 | wants_tal = TRUE; | |
867 | next.t_tal_engaged = 1; | |
868 | } | |
869 | ||
870 | /* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */ | |
871 | if (requested.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && | |
872 | requested.t_boosted == 0) | |
873 | wants_darwinbg = TRUE; | |
874 | ||
875 | /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */ | |
876 | if (requested.t_apptype == TASK_APPTYPE_DAEMON_BACKGROUND) | |
877 | wants_darwinbg = TRUE; | |
878 | ||
879 | if (next.t_qos_clamp == THREAD_QOS_BACKGROUND || next.t_qos_clamp == THREAD_QOS_MAINTENANCE) | |
880 | wants_darwinbg = TRUE; | |
881 | } else { | |
882 | if (requested.th_pidbind_bg) | |
883 | wants_all_sockets_bg = wants_darwinbg = TRUE; | |
884 | ||
885 | if (requested.th_workq_bg) | |
886 | wants_darwinbg = TRUE; | |
887 | ||
888 | if (next.thep_qos == THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_MAINTENANCE) | |
889 | wants_darwinbg = TRUE; | |
890 | } | |
891 | ||
892 | /* Calculate side effects of DARWIN_BG */ | |
893 | ||
894 | if (wants_darwinbg) { | |
895 | next.darwinbg = 1; | |
896 | /* darwinbg threads/tasks always create bg sockets, but we don't always loop over all sockets */ | |
897 | next.new_sockets_bg = 1; | |
898 | next.lowpri_cpu = 1; | |
899 | } | |
900 | ||
901 | if (wants_all_sockets_bg) | |
902 | next.all_sockets_bg = 1; | |
903 | ||
904 | if (on_task && wants_watchersbg) | |
905 | next.t_watchers_bg = 1; | |
906 | ||
907 | /* darwinbg on either task or thread implies background QOS (or lower) */ | |
908 | if (!on_task && | |
909 | (wants_darwinbg || task_effective.darwinbg) && | |
910 | (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)){ | |
911 | next.thep_qos = THREAD_QOS_BACKGROUND; | |
912 | next.thep_qos_relprio = 0; | |
913 | } | |
914 | ||
915 | /* Calculate low CPU priority */ | |
916 | ||
917 | boolean_t wants_lowpri_cpu = FALSE; | |
918 | ||
919 | if (wants_darwinbg || wants_tal) | |
920 | wants_lowpri_cpu = TRUE; | |
921 | ||
922 | if (on_task && requested.t_sup_lowpri_cpu && requested.t_boosted == 0) | |
923 | wants_lowpri_cpu = TRUE; | |
924 | ||
925 | if (wants_lowpri_cpu) | |
926 | next.lowpri_cpu = 1; | |
927 | ||
928 | /* Calculate IO policy */ | |
929 | ||
930 | /* Update BG IO policy (so we can see if it has changed) */ | |
931 | next.bg_iotier = requested.bg_iotier; | |
932 | ||
933 | int iopol = THROTTLE_LEVEL_TIER0; | |
934 | ||
935 | if (wants_darwinbg) | |
936 | iopol = MAX(iopol, requested.bg_iotier); | |
937 | ||
938 | if (on_task) { | |
939 | if (requested.t_apptype == TASK_APPTYPE_DAEMON_STANDARD) | |
940 | iopol = MAX(iopol, proc_standard_daemon_tier); | |
941 | ||
942 | if (requested.t_sup_disk && requested.t_boosted == 0) | |
943 | iopol = MAX(iopol, proc_suppressed_disk_tier); | |
944 | ||
945 | if (wants_tal) | |
946 | iopol = MAX(iopol, proc_tal_disk_tier); | |
947 | ||
948 | if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED) | |
949 | iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.t_qos_clamp]); | |
950 | ||
951 | } else { | |
952 | /* Look up the associated IO tier value for the QoS class */ | |
953 | iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]); | |
954 | } | |
955 | ||
956 | iopol = MAX(iopol, requested.int_iotier); | |
957 | iopol = MAX(iopol, requested.ext_iotier); | |
958 | ||
959 | next.io_tier = iopol; | |
960 | ||
961 | /* Calculate Passive IO policy */ | |
962 | ||
963 | if (requested.ext_iopassive || requested.int_iopassive) | |
964 | next.io_passive = 1; | |
965 | ||
966 | /* Calculate miscellaneous policy */ | |
967 | ||
968 | if (on_task) { | |
969 | /* Calculate suppression-active flag */ | |
970 | if (requested.t_sup_active && requested.t_boosted == 0) | |
971 | next.t_sup_active = 1; | |
972 | ||
973 | /* Calculate suspend policy */ | |
974 | if (requested.t_sup_suspend && requested.t_boosted == 0) | |
975 | next.t_suspended = 1; | |
976 | ||
977 | /* Calculate timer QOS */ | |
978 | int latency_qos = requested.t_base_latency_qos; | |
979 | ||
980 | if (requested.t_sup_timer && requested.t_boosted == 0) | |
981 | latency_qos = requested.t_sup_timer; | |
982 | ||
983 | if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED) | |
984 | latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.t_qos_clamp]); | |
985 | ||
986 | if (requested.t_over_latency_qos != 0) | |
987 | latency_qos = requested.t_over_latency_qos; | |
988 | ||
989 | /* Treat the windowserver special */ | |
990 | if (requested.t_role == TASK_GRAPHICS_SERVER) | |
991 | latency_qos = proc_graphics_timer_qos; | |
992 | ||
993 | next.t_latency_qos = latency_qos; | |
994 | ||
995 | /* Calculate throughput QOS */ | |
996 | int through_qos = requested.t_base_through_qos; | |
997 | ||
998 | if (requested.t_sup_throughput && requested.t_boosted == 0) | |
999 | through_qos = requested.t_sup_throughput; | |
1000 | ||
1001 | if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED) | |
1002 | through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.t_qos_clamp]); | |
1003 | ||
1004 | if (requested.t_over_through_qos != 0) | |
1005 | through_qos = requested.t_over_through_qos; | |
1006 | ||
1007 | next.t_through_qos = through_qos; | |
1008 | ||
1009 | /* Calculate suppressed CPU priority */ | |
1010 | if (requested.t_sup_cpu && requested.t_boosted == 0) | |
1011 | next.t_suppressed_cpu = 1; | |
1012 | ||
1013 | /* | |
1014 | * Calculate background sockets | |
1015 | * Don't take into account boosting to limit transition frequency. | |
1016 | */ | |
1017 | if (requested.t_sup_bg_sockets){ | |
1018 | next.all_sockets_bg = 1; | |
1019 | next.new_sockets_bg = 1; | |
1020 | } | |
1021 | ||
1022 | /* Apply SFI Managed class bit */ | |
1023 | next.t_sfi_managed = requested.t_sfi_managed; | |
1024 | ||
1025 | /* Calculate 'live donor' status for live importance */ | |
1026 | switch (requested.t_apptype) { | |
1027 | case TASK_APPTYPE_APP_TAL: | |
1028 | case TASK_APPTYPE_APP_DEFAULT: | |
1029 | if (requested.ext_darwinbg == 0) | |
1030 | next.t_live_donor = 1; | |
1031 | else | |
1032 | next.t_live_donor = 0; | |
1033 | break; | |
1034 | ||
1035 | case TASK_APPTYPE_DAEMON_INTERACTIVE: | |
1036 | case TASK_APPTYPE_DAEMON_STANDARD: | |
1037 | case TASK_APPTYPE_DAEMON_ADAPTIVE: | |
1038 | case TASK_APPTYPE_DAEMON_BACKGROUND: | |
1039 | default: | |
1040 | next.t_live_donor = 0; | |
1041 | break; | |
1042 | } | |
1043 | } | |
1044 | ||
1045 | if (requested.terminated) { | |
1046 | /* | |
1047 | * Shoot down the throttles that slow down exit or response to SIGTERM | |
1048 | * We don't need to shoot down: | |
1049 | * passive (don't want to cause others to throttle) | |
1050 | * all_sockets_bg (don't need to iterate FDs on every exit) | |
1051 | * new_sockets_bg (doesn't matter for exiting process) | |
1052 | * pidsuspend (jetsam-ed BG process shouldn't run again) | |
1053 | * watchers_bg (watcher threads don't need to be unthrottled) | |
1054 | * t_latency_qos (affects userspace timers only) | |
1055 | */ | |
1056 | ||
1057 | next.terminated = 1; | |
1058 | next.darwinbg = 0; | |
1059 | next.lowpri_cpu = 0; | |
1060 | next.io_tier = THROTTLE_LEVEL_TIER0; | |
1061 | if (on_task) { | |
1062 | next.t_tal_engaged = 0; | |
1063 | next.t_role = TASK_UNSPECIFIED; | |
1064 | next.t_suppressed_cpu = 0; | |
1065 | ||
1066 | /* TODO: This should only be shot down on SIGTERM, not exit */ | |
1067 | next.t_suspended = 0; | |
1068 | } else { | |
1069 | next.thep_qos = 0; | |
1070 | } | |
1071 | } | |
1072 | ||
1073 | /* | |
1074 | * Step 3: | |
1075 | * Swap out old policy for new policy | |
1076 | */ | |
1077 | ||
1078 | if (!on_task) { | |
1079 | /* Acquire thread mutex to synchronize against | |
1080 | * thread_policy_set(). Consider reworking to separate qos | |
1081 | * fields, or locking the task in thread_policy_set. | |
1082 | * A more efficient model would be to make the thread bits | |
1083 | * authoritative. | |
1084 | */ | |
1085 | thread_mtx_lock(thread); | |
1086 | } | |
1087 | ||
1088 | struct task_effective_policy prev = | |
1089 | (on_task) ? task->effective_policy : thread->effective_policy; | |
1090 | ||
1091 | /* | |
1092 | * Check for invalid transitions here for easier debugging | |
1093 | * TODO: dump the structs as hex in the panic string | |
1094 | */ | |
1095 | if (task == kernel_task && prev.all_sockets_bg != next.all_sockets_bg) | |
1096 | panic("unexpected network change for kernel task"); | |
1097 | ||
1098 | /* This is the point where the new values become visible to other threads */ | |
1099 | if (on_task) | |
1100 | task->effective_policy = next; | |
1101 | else { | |
1102 | /* Preserve thread specific latency/throughput QoS modified via | |
1103 | * thread_policy_set(). Inelegant in the extreme, to be reworked. | |
1104 | * | |
1105 | * If thread QoS class is set, we don't need to preserve the previously set values. | |
1106 | * We should ensure to not accidentally preserve previous thread QoS values if you set a thread | |
1107 | * back to default QoS. | |
1108 | */ | |
1109 | uint32_t lqos = thread->effective_policy.t_latency_qos, tqos = thread->effective_policy.t_through_qos; | |
1110 | ||
1111 | if (prev.thep_qos == THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) { | |
1112 | next.t_latency_qos = lqos; | |
1113 | next.t_through_qos = tqos; | |
1114 | } else if (prev.thep_qos != THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) { | |
1115 | next.t_latency_qos = 0; | |
1116 | next.t_through_qos = 0; | |
1117 | } else { | |
1118 | next.t_latency_qos = thread_qos_policy_params.qos_latency_qos[next.thep_qos]; | |
1119 | next.t_through_qos = thread_qos_policy_params.qos_through_qos[next.thep_qos]; | |
1120 | } | |
1121 | ||
1122 | thread_update_qos_cpu_time(thread, TRUE); | |
1123 | thread->effective_policy = next; | |
1124 | thread_mtx_unlock(thread); | |
1125 | } | |
1126 | ||
1127 | /* Don't do anything further to a half-formed task or thread */ | |
1128 | if (in_create) | |
1129 | return; | |
1130 | ||
1131 | /* | |
1132 | * Step 4: | |
1133 | * Pend updates that can't be done while holding the task lock | |
1134 | */ | |
1135 | ||
1136 | if (prev.all_sockets_bg != next.all_sockets_bg) | |
1137 | pend_token->tpt_update_sockets = 1; | |
1138 | ||
1139 | if (on_task) { | |
1140 | /* Only re-scan the timer list if the qos level is getting less strong */ | |
1141 | if (prev.t_latency_qos > next.t_latency_qos) | |
1142 | pend_token->tpt_update_timers = 1; | |
1143 | ||
1144 | ||
1145 | if (prev.t_live_donor != next.t_live_donor) | |
1146 | pend_token->tpt_update_live_donor = 1; | |
1147 | } | |
1148 | ||
1149 | /* | |
1150 | * Step 5: | |
1151 | * Update other subsystems as necessary if something has changed | |
1152 | */ | |
1153 | ||
1154 | boolean_t update_throttle = (prev.io_tier != next.io_tier) ? TRUE : FALSE; | |
1155 | ||
1156 | if (on_task) { | |
1157 | if (prev.t_suspended == 0 && next.t_suspended == 1 && task->active) { | |
1158 | task_hold_locked(task); | |
1159 | task_wait_locked(task, FALSE); | |
1160 | } | |
1161 | if (prev.t_suspended == 1 && next.t_suspended == 0 && task->active) { | |
1162 | task_release_locked(task); | |
1163 | } | |
1164 | ||
1165 | boolean_t update_threads = FALSE; | |
1166 | boolean_t update_sfi = FALSE; | |
1167 | ||
1168 | if (prev.bg_iotier != next.bg_iotier || | |
1169 | prev.terminated != next.terminated || | |
1170 | prev.t_qos_clamp != next.t_qos_clamp || | |
1171 | prev.t_qos_ceiling != next.t_qos_ceiling || | |
1172 | prev.qos_ui_is_urgent != next.qos_ui_is_urgent || | |
1173 | prev.darwinbg != next.darwinbg) | |
1174 | update_threads = TRUE; | |
1175 | ||
1176 | /* | |
1177 | * A bit of a layering violation. We know what task policy attributes | |
1178 | * sfi_thread_classify() consults, so if they change, trigger SFI | |
1179 | * re-evaluation. | |
1180 | */ | |
1181 | if ((prev.t_latency_qos != next.t_latency_qos) || | |
1182 | (prev.t_role != next.t_role) || | |
1183 | (prev.darwinbg != next.darwinbg) || | |
1184 | (prev.t_sfi_managed != next.t_sfi_managed)) | |
1185 | update_sfi = TRUE; | |
1186 | ||
1187 | /* TODO: if CONFIG_SFI */ | |
1188 | if (prev.t_role != next.t_role && task_policy_update_coalition_focal_tasks(task, prev.t_role, next.t_role)) { | |
1189 | update_sfi = TRUE; | |
1190 | pend_token->tpt_update_coal_sfi = 1; | |
1191 | } | |
1192 | ||
1193 | task_policy_update_task_locked(task, update_throttle, update_threads, update_sfi); | |
1194 | } else { | |
1195 | int update_cpu = 0; | |
1196 | boolean_t update_sfi = FALSE; | |
1197 | boolean_t update_qos = FALSE; | |
1198 | ||
1199 | if (prev.lowpri_cpu != next.lowpri_cpu) | |
1200 | update_cpu = (next.lowpri_cpu ? DO_LOWPRI_CPU : UNDO_LOWPRI_CPU); | |
1201 | ||
1202 | if (prev.darwinbg != next.darwinbg || | |
1203 | prev.thep_qos != next.thep_qos) | |
1204 | update_sfi = TRUE; | |
1205 | ||
1206 | if (prev.thep_qos != next.thep_qos || | |
1207 | prev.thep_qos_relprio != next.thep_qos_relprio || | |
1208 | prev.qos_ui_is_urgent != next.qos_ui_is_urgent) { | |
1209 | update_qos = TRUE; | |
1210 | } | |
1211 | ||
1212 | task_policy_update_thread_locked(thread, update_cpu, update_throttle, update_sfi, update_qos); | |
1213 | } | |
1214 | } | |
1215 | ||
1216 | /* | |
1217 | * Yet another layering violation. We reach out and bang on the coalition directly. | |
1218 | */ | |
1219 | static boolean_t | |
1220 | task_policy_update_coalition_focal_tasks(task_t task, | |
1221 | int prev_role, | |
1222 | int next_role) | |
1223 | { | |
1224 | boolean_t sfi_transition = FALSE; | |
1225 | ||
1226 | if (prev_role != TASK_FOREGROUND_APPLICATION && next_role == TASK_FOREGROUND_APPLICATION) { | |
1227 | if (coalition_adjust_focal_task_count(task->coalition, 1) == 1) | |
1228 | sfi_transition = TRUE; | |
1229 | } else if (prev_role == TASK_FOREGROUND_APPLICATION && next_role != TASK_FOREGROUND_APPLICATION) { | |
1230 | if (coalition_adjust_focal_task_count(task->coalition, -1) == 0) | |
1231 | sfi_transition = TRUE; | |
1232 | } | |
1233 | ||
1234 | if (prev_role != TASK_BACKGROUND_APPLICATION && next_role == TASK_BACKGROUND_APPLICATION) { | |
1235 | if (coalition_adjust_non_focal_task_count(task->coalition, 1) == 1) | |
1236 | sfi_transition = TRUE; | |
1237 | } else if (prev_role == TASK_BACKGROUND_APPLICATION && next_role != TASK_BACKGROUND_APPLICATION) { | |
1238 | if (coalition_adjust_non_focal_task_count(task->coalition, -1) == 0) | |
1239 | sfi_transition = TRUE; | |
1240 | } | |
1241 | ||
1242 | return sfi_transition; | |
1243 | } | |
1244 | ||
1245 | /* Despite the name, the thread's task is locked, the thread is not */ | |
1246 | void | |
1247 | task_policy_update_thread_locked(thread_t thread, | |
1248 | int update_cpu, | |
1249 | boolean_t update_throttle, | |
1250 | boolean_t update_sfi, | |
1251 | boolean_t update_qos) | |
1252 | { | |
1253 | thread_precedence_policy_data_t policy; | |
1254 | ||
1255 | if (update_throttle) { | |
1256 | rethrottle_thread(thread->uthread); | |
1257 | } | |
1258 | ||
1259 | if (update_sfi) { | |
1260 | sfi_reevaluate(thread); | |
1261 | } | |
1262 | ||
1263 | /* | |
1264 | * TODO: pidbind needs to stuff remembered importance into saved_importance | |
1265 | * properly deal with bg'ed threads being pidbound and unbging while pidbound | |
1266 | * | |
1267 | * TODO: A BG thread's priority is 0 on desktop and 4 on embedded. Need to reconcile this. | |
1268 | * */ | |
1269 | if (update_cpu == DO_LOWPRI_CPU) { | |
1270 | thread->saved_importance = thread->importance; | |
1271 | policy.importance = INT_MIN; | |
1272 | } else if (update_cpu == UNDO_LOWPRI_CPU) { | |
1273 | policy.importance = thread->saved_importance; | |
1274 | thread->saved_importance = 0; | |
1275 | } | |
1276 | ||
1277 | /* Takes thread lock and thread mtx lock */ | |
1278 | if (update_cpu) | |
1279 | thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY, | |
1280 | (thread_policy_t)&policy, | |
1281 | THREAD_PRECEDENCE_POLICY_COUNT); | |
1282 | ||
1283 | if (update_qos) | |
1284 | thread_recompute_qos(thread); | |
1285 | } | |
1286 | ||
1287 | /* | |
1288 | * Calculate priority on a task, loop through its threads, and tell them about | |
1289 | * priority changes and throttle changes. | |
1290 | */ | |
1291 | void | |
1292 | task_policy_update_task_locked(task_t task, | |
1293 | boolean_t update_throttle, | |
1294 | boolean_t update_threads, | |
1295 | boolean_t update_sfi) | |
1296 | { | |
1297 | boolean_t update_priority = FALSE; | |
1298 | ||
1299 | if (task == kernel_task) | |
1300 | panic("Attempting to set task policy on kernel_task"); | |
1301 | ||
1302 | int priority = BASEPRI_DEFAULT; | |
1303 | int max_priority = MAXPRI_USER; | |
1304 | ||
1305 | if (proc_get_effective_task_policy(task, TASK_POLICY_LOWPRI_CPU)) { | |
1306 | priority = MAXPRI_THROTTLE; | |
1307 | max_priority = MAXPRI_THROTTLE; | |
1308 | } else if (proc_get_effective_task_policy(task, TASK_POLICY_SUPPRESSED_CPU)) { | |
1309 | priority = MAXPRI_SUPPRESSED; | |
1310 | max_priority = MAXPRI_SUPPRESSED; | |
1311 | } else { | |
1312 | switch (proc_get_effective_task_policy(task, TASK_POLICY_ROLE)) { | |
1313 | case TASK_CONTROL_APPLICATION: | |
1314 | priority = BASEPRI_CONTROL; | |
1315 | break; | |
1316 | case TASK_GRAPHICS_SERVER: | |
1317 | priority = BASEPRI_GRAPHICS; | |
1318 | max_priority = MAXPRI_RESERVED; | |
1319 | break; | |
1320 | default: | |
1321 | break; | |
1322 | } | |
1323 | ||
1324 | /* factor in 'nice' value */ | |
1325 | priority += task->importance; | |
1326 | ||
1327 | if (task->effective_policy.t_qos_clamp != THREAD_QOS_UNSPECIFIED) { | |
1328 | int qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.t_qos_clamp]; | |
1329 | ||
1330 | priority = MIN(priority, qos_clamp_priority); | |
1331 | max_priority = MIN(max_priority, qos_clamp_priority); | |
1332 | } | |
1333 | } | |
1334 | ||
1335 | /* avoid extra work if priority isn't changing */ | |
1336 | if (task->priority != priority || task->max_priority != max_priority) { | |
1337 | update_priority = TRUE; | |
1338 | ||
1339 | /* update the scheduling priority for the task */ | |
1340 | task->max_priority = max_priority; | |
1341 | ||
1342 | if (priority > task->max_priority) | |
1343 | priority = task->max_priority; | |
1344 | else if (priority < MINPRI) | |
1345 | priority = MINPRI; | |
1346 | ||
1347 | task->priority = priority; | |
1348 | } | |
1349 | ||
1350 | /* Loop over the threads in the task only once, and only if necessary */ | |
1351 | if (update_threads || update_throttle || update_priority || update_sfi ) { | |
1352 | thread_t thread; | |
1353 | ||
1354 | queue_iterate(&task->threads, thread, thread_t, task_threads) { | |
1355 | if (update_priority) { | |
1356 | thread_mtx_lock(thread); | |
1357 | ||
1358 | thread_task_priority(thread, priority, max_priority); | |
1359 | ||
1360 | thread_mtx_unlock(thread); | |
1361 | } | |
1362 | ||
1363 | if (update_throttle) { | |
1364 | rethrottle_thread(thread->uthread); | |
1365 | } | |
1366 | ||
1367 | if (update_sfi) { | |
1368 | sfi_reevaluate(thread); | |
1369 | } | |
1370 | ||
1371 | if (update_threads) { | |
1372 | thread->requested_policy.bg_iotier = task->effective_policy.bg_iotier; | |
1373 | thread->requested_policy.terminated = task->effective_policy.terminated; | |
1374 | ||
1375 | task_policy_update_internal_locked(task, thread, FALSE, NULL); | |
1376 | /* The thread policy must not emit any completion actions due to this change. */ | |
1377 | } | |
1378 | } | |
1379 | } | |
1380 | } | |
1381 | ||
1382 | /* | |
1383 | * Called with task unlocked to do things that can't be done while holding the task lock | |
1384 | */ | |
1385 | void | |
1386 | task_policy_update_complete_unlocked(task_t task, thread_t thread, task_pend_token_t pend_token) | |
1387 | { | |
1388 | boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; | |
1389 | ||
1390 | #ifdef MACH_BSD | |
1391 | if (pend_token->tpt_update_sockets) | |
1392 | proc_apply_task_networkbg(task->bsd_info, thread); | |
1393 | #endif /* MACH_BSD */ | |
1394 | ||
1395 | if (on_task) { | |
1396 | /* The timer throttle has been removed or reduced, we need to look for expired timers and fire them */ | |
1397 | if (pend_token->tpt_update_timers) | |
1398 | ml_timer_evaluate(); | |
1399 | ||
1400 | ||
1401 | if (pend_token->tpt_update_live_donor) | |
1402 | task_importance_update_live_donor(task); | |
1403 | ||
1404 | if (pend_token->tpt_update_coal_sfi) | |
1405 | coalition_sfi_reevaluate(task->coalition, task); | |
1406 | } | |
1407 | } | |
1408 | ||
1409 | /* | |
1410 | * Initiate a task policy state transition | |
1411 | * | |
1412 | * Everything that modifies requested except functions that need to hold the task lock | |
1413 | * should use this function | |
1414 | * | |
1415 | * Argument validation should be performed before reaching this point. | |
1416 | * | |
1417 | * TODO: Do we need to check task->active or thread->active? | |
1418 | */ | |
1419 | void | |
1420 | proc_set_task_policy(task_t task, | |
1421 | thread_t thread, | |
1422 | int category, | |
1423 | int flavor, | |
1424 | int value) | |
1425 | { | |
1426 | struct task_pend_token pend_token = {}; | |
1427 | ||
1428 | task_lock(task); | |
1429 | ||
1430 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1431 | (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START, | |
1432 | targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0); | |
1433 | ||
1434 | proc_set_task_policy_locked(task, thread, category, flavor, value); | |
1435 | ||
1436 | task_policy_update_locked(task, thread, &pend_token); | |
1437 | ||
1438 | task_unlock(task); | |
1439 | ||
1440 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1441 | (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END, | |
1442 | targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0); | |
1443 | ||
1444 | task_policy_update_complete_unlocked(task, thread, &pend_token); | |
1445 | } | |
1446 | ||
1447 | /* | |
1448 | * Initiate a task policy state transition on a thread with its TID | |
1449 | * Useful if you cannot guarantee the thread won't get terminated | |
1450 | */ | |
1451 | void | |
1452 | proc_set_task_policy_thread(task_t task, | |
1453 | uint64_t tid, | |
1454 | int category, | |
1455 | int flavor, | |
1456 | int value) | |
1457 | { | |
1458 | thread_t thread; | |
1459 | thread_t self = current_thread(); | |
1460 | struct task_pend_token pend_token = {}; | |
1461 | ||
1462 | task_lock(task); | |
1463 | ||
1464 | if (tid == TID_NULL || tid == self->thread_id) | |
1465 | thread = self; | |
1466 | else | |
1467 | thread = task_findtid(task, tid); | |
1468 | ||
1469 | if (thread == THREAD_NULL) { | |
1470 | task_unlock(task); | |
1471 | return; | |
1472 | } | |
1473 | ||
1474 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1475 | (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START, | |
1476 | targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0); | |
1477 | ||
1478 | proc_set_task_policy_locked(task, thread, category, flavor, value); | |
1479 | ||
1480 | task_policy_update_locked(task, thread, &pend_token); | |
1481 | ||
1482 | task_unlock(task); | |
1483 | ||
1484 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1485 | (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END, | |
1486 | targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0); | |
1487 | ||
1488 | task_policy_update_complete_unlocked(task, thread, &pend_token); | |
1489 | } | |
1490 | ||
1491 | /* | |
1492 | * Variant of proc_set_task_policy() that sets two scalars in the requested policy structure. | |
1493 | * Same locking rules apply. | |
1494 | */ | |
1495 | void | |
1496 | proc_set_task_policy2(task_t task, thread_t thread, int category, int flavor, int value1, int value2) | |
1497 | { | |
1498 | struct task_pend_token pend_token = {}; | |
1499 | ||
1500 | task_lock(task); | |
1501 | ||
1502 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1503 | (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START, | |
1504 | targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value1, 0); | |
1505 | ||
1506 | proc_set_task_policy2_locked(task, thread, category, flavor, value1, value2); | |
1507 | ||
1508 | task_policy_update_locked(task, thread, &pend_token); | |
1509 | ||
1510 | task_unlock(task); | |
1511 | ||
1512 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
1513 | (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END, | |
1514 | targetid(task, thread), trequested_0(task, thread), trequested_0(task, thread), tpending(&pend_token), 0); | |
1515 | ||
1516 | task_policy_update_complete_unlocked(task, thread, &pend_token); | |
1517 | } | |
1518 | ||
1519 | /* | |
1520 | * Set the requested state for a specific flavor to a specific value. | |
1521 | * | |
1522 | * TODO: | |
1523 | * Verify that arguments to non iopol things are 1 or 0 | |
1524 | */ | |
1525 | static void | |
1526 | proc_set_task_policy_locked(task_t task, | |
1527 | thread_t thread, | |
1528 | int category, | |
1529 | int flavor, | |
1530 | int value) | |
1531 | { | |
1532 | boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; | |
1533 | ||
1534 | int tier, passive; | |
1535 | ||
1536 | struct task_requested_policy requested = | |
1537 | (on_task) ? task->requested_policy : thread->requested_policy; | |
1538 | ||
1539 | switch (flavor) { | |
1540 | ||
1541 | /* Category: EXTERNAL and INTERNAL, thread and task */ | |
1542 | ||
1543 | case TASK_POLICY_DARWIN_BG: | |
1544 | if (category == TASK_POLICY_EXTERNAL) | |
1545 | requested.ext_darwinbg = value; | |
1546 | else | |
1547 | requested.int_darwinbg = value; | |
1548 | break; | |
1549 | ||
1550 | case TASK_POLICY_IOPOL: | |
1551 | proc_iopol_to_tier(value, &tier, &passive); | |
1552 | if (category == TASK_POLICY_EXTERNAL) { | |
1553 | requested.ext_iotier = tier; | |
1554 | requested.ext_iopassive = passive; | |
1555 | } else { | |
1556 | requested.int_iotier = tier; | |
1557 | requested.int_iopassive = passive; | |
1558 | } | |
1559 | break; | |
1560 | ||
1561 | case TASK_POLICY_IO: | |
1562 | if (category == TASK_POLICY_EXTERNAL) | |
1563 | requested.ext_iotier = value; | |
1564 | else | |
1565 | requested.int_iotier = value; | |
1566 | break; | |
1567 | ||
1568 | case TASK_POLICY_PASSIVE_IO: | |
1569 | if (category == TASK_POLICY_EXTERNAL) | |
1570 | requested.ext_iopassive = value; | |
1571 | else | |
1572 | requested.int_iopassive = value; | |
1573 | break; | |
1574 | ||
1575 | /* Category: INTERNAL, task only */ | |
1576 | ||
1577 | case TASK_POLICY_DARWIN_BG_IOPOL: | |
1578 | assert(on_task && category == TASK_POLICY_INTERNAL); | |
1579 | proc_iopol_to_tier(value, &tier, &passive); | |
1580 | requested.bg_iotier = tier; | |
1581 | break; | |
1582 | ||
1583 | /* Category: ATTRIBUTE, task only */ | |
1584 | ||
1585 | case TASK_POLICY_TAL: | |
1586 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1587 | requested.t_tal_enabled = value; | |
1588 | break; | |
1589 | ||
1590 | case TASK_POLICY_BOOST: | |
1591 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1592 | requested.t_boosted = value; | |
1593 | break; | |
1594 | ||
1595 | case TASK_POLICY_ROLE: | |
1596 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1597 | requested.t_role = value; | |
1598 | break; | |
1599 | ||
1600 | case TASK_POLICY_TERMINATED: | |
1601 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1602 | requested.terminated = value; | |
1603 | break; | |
1604 | case TASK_BASE_LATENCY_QOS_POLICY: | |
1605 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1606 | requested.t_base_latency_qos = value; | |
1607 | break; | |
1608 | case TASK_BASE_THROUGHPUT_QOS_POLICY: | |
1609 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1610 | requested.t_base_through_qos = value; | |
1611 | break; | |
1612 | case TASK_POLICY_SFI_MANAGED: | |
1613 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1614 | requested.t_sfi_managed = value; | |
1615 | break; | |
1616 | ||
1617 | /* Category: ATTRIBUTE, thread only */ | |
1618 | ||
1619 | case TASK_POLICY_PIDBIND_BG: | |
1620 | assert(!on_task && category == TASK_POLICY_ATTRIBUTE); | |
1621 | requested.th_pidbind_bg = value; | |
1622 | break; | |
1623 | ||
1624 | case TASK_POLICY_WORKQ_BG: | |
1625 | assert(!on_task && category == TASK_POLICY_ATTRIBUTE); | |
1626 | requested.th_workq_bg = value; | |
1627 | break; | |
1628 | ||
1629 | case TASK_POLICY_QOS: | |
1630 | assert(!on_task && category == TASK_POLICY_ATTRIBUTE); | |
1631 | requested.thrp_qos = value; | |
1632 | break; | |
1633 | ||
1634 | case TASK_POLICY_QOS_OVERRIDE: | |
1635 | assert(!on_task && category == TASK_POLICY_ATTRIBUTE); | |
1636 | requested.thrp_qos_override = value; | |
1637 | break; | |
1638 | ||
1639 | default: | |
1640 | panic("unknown task policy: %d %d %d", category, flavor, value); | |
1641 | break; | |
1642 | } | |
1643 | ||
1644 | if (on_task) | |
1645 | task->requested_policy = requested; | |
1646 | else | |
1647 | thread->requested_policy = requested; | |
1648 | } | |
1649 | ||
1650 | /* | |
1651 | * Variant of proc_set_task_policy_locked() that sets two scalars in the requested policy structure. | |
1652 | */ | |
1653 | static void | |
1654 | proc_set_task_policy2_locked(task_t task, | |
1655 | thread_t thread, | |
1656 | int category, | |
1657 | int flavor, | |
1658 | int value1, | |
1659 | int value2) | |
1660 | { | |
1661 | boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; | |
1662 | ||
1663 | struct task_requested_policy requested = | |
1664 | (on_task) ? task->requested_policy : thread->requested_policy; | |
1665 | ||
1666 | switch (flavor) { | |
1667 | ||
1668 | /* Category: ATTRIBUTE, task only */ | |
1669 | ||
1670 | case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS: | |
1671 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1672 | requested.t_base_latency_qos = value1; | |
1673 | requested.t_base_through_qos = value2; | |
1674 | break; | |
1675 | ||
1676 | case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS: | |
1677 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1678 | requested.t_over_latency_qos = value1; | |
1679 | requested.t_over_through_qos = value2; | |
1680 | break; | |
1681 | ||
1682 | /* Category: ATTRIBUTE, thread only */ | |
1683 | ||
1684 | case TASK_POLICY_QOS_AND_RELPRIO: | |
1685 | ||
1686 | assert(!on_task && category == TASK_POLICY_ATTRIBUTE); | |
1687 | requested.thrp_qos = value1; | |
1688 | requested.thrp_qos_relprio = value2; | |
1689 | DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio); | |
1690 | break; | |
1691 | ||
1692 | default: | |
1693 | panic("unknown task policy: %d %d %d %d", category, flavor, value1, value2); | |
1694 | break; | |
1695 | } | |
1696 | ||
1697 | if (on_task) | |
1698 | task->requested_policy = requested; | |
1699 | else | |
1700 | thread->requested_policy = requested; | |
1701 | } | |
1702 | ||
1703 | ||
1704 | /* | |
1705 | * Gets what you set. Effective values may be different. | |
1706 | */ | |
1707 | int | |
1708 | proc_get_task_policy(task_t task, | |
1709 | thread_t thread, | |
1710 | int category, | |
1711 | int flavor) | |
1712 | { | |
1713 | boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; | |
1714 | ||
1715 | int value = 0; | |
1716 | ||
1717 | task_lock(task); | |
1718 | ||
1719 | struct task_requested_policy requested = | |
1720 | (on_task) ? task->requested_policy : thread->requested_policy; | |
1721 | ||
1722 | switch (flavor) { | |
1723 | case TASK_POLICY_DARWIN_BG: | |
1724 | if (category == TASK_POLICY_EXTERNAL) | |
1725 | value = requested.ext_darwinbg; | |
1726 | else | |
1727 | value = requested.int_darwinbg; | |
1728 | break; | |
1729 | case TASK_POLICY_IOPOL: | |
1730 | if (category == TASK_POLICY_EXTERNAL) | |
1731 | value = proc_tier_to_iopol(requested.ext_iotier, | |
1732 | requested.ext_iopassive); | |
1733 | else | |
1734 | value = proc_tier_to_iopol(requested.int_iotier, | |
1735 | requested.int_iopassive); | |
1736 | break; | |
1737 | case TASK_POLICY_IO: | |
1738 | if (category == TASK_POLICY_EXTERNAL) | |
1739 | value = requested.ext_iotier; | |
1740 | else | |
1741 | value = requested.int_iotier; | |
1742 | break; | |
1743 | case TASK_POLICY_PASSIVE_IO: | |
1744 | if (category == TASK_POLICY_EXTERNAL) | |
1745 | value = requested.ext_iopassive; | |
1746 | else | |
1747 | value = requested.int_iopassive; | |
1748 | break; | |
1749 | case TASK_POLICY_DARWIN_BG_IOPOL: | |
1750 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1751 | value = proc_tier_to_iopol(requested.bg_iotier, 0); | |
1752 | break; | |
1753 | case TASK_POLICY_ROLE: | |
1754 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1755 | value = requested.t_role; | |
1756 | break; | |
1757 | case TASK_POLICY_SFI_MANAGED: | |
1758 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1759 | value = requested.t_sfi_managed; | |
1760 | break; | |
1761 | case TASK_POLICY_QOS: | |
1762 | assert(!on_task && category == TASK_POLICY_ATTRIBUTE); | |
1763 | value = requested.thrp_qos; | |
1764 | break; | |
1765 | case TASK_POLICY_QOS_OVERRIDE: | |
1766 | assert(!on_task && category == TASK_POLICY_ATTRIBUTE); | |
1767 | value = requested.thrp_qos_override; | |
1768 | break; | |
1769 | default: | |
1770 | panic("unknown policy_flavor %d", flavor); | |
1771 | break; | |
1772 | } | |
1773 | ||
1774 | task_unlock(task); | |
1775 | ||
1776 | return value; | |
1777 | } | |
1778 | ||
1779 | /* | |
1780 | * Variant of proc_get_task_policy() that returns two scalar outputs. | |
1781 | */ | |
1782 | void | |
1783 | proc_get_task_policy2(task_t task, thread_t thread, int category __unused, int flavor, int *value1, int *value2) | |
1784 | { | |
1785 | boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; | |
1786 | ||
1787 | task_lock(task); | |
1788 | ||
1789 | struct task_requested_policy requested = | |
1790 | (on_task) ? task->requested_policy : thread->requested_policy; | |
1791 | ||
1792 | switch (flavor) { | |
1793 | /* TASK attributes */ | |
1794 | case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS: | |
1795 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1796 | *value1 = requested.t_base_latency_qos; | |
1797 | *value2 = requested.t_base_through_qos; | |
1798 | break; | |
1799 | ||
1800 | case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS: | |
1801 | assert(on_task && category == TASK_POLICY_ATTRIBUTE); | |
1802 | *value1 = requested.t_over_latency_qos; | |
1803 | *value2 = requested.t_over_through_qos; | |
1804 | break; | |
1805 | ||
1806 | /* THREAD attributes */ | |
1807 | case TASK_POLICY_QOS_AND_RELPRIO: | |
1808 | assert(!on_task && category == TASK_POLICY_ATTRIBUTE); | |
1809 | *value1 = requested.thrp_qos; | |
1810 | *value2 = requested.thrp_qos_relprio; | |
1811 | break; | |
1812 | ||
1813 | default: | |
1814 | panic("unknown policy_flavor %d", flavor); | |
1815 | break; | |
1816 | } | |
1817 | ||
1818 | task_unlock(task); | |
1819 | } | |
1820 | ||
1821 | ||
1822 | /* | |
1823 | * Functions for querying effective state for relevant subsystems | |
1824 | * ONLY the relevant subsystem should query these. | |
1825 | * NEVER take a value from one of the 'effective' functions and stuff it into a setter. | |
1826 | */ | |
1827 | ||
1828 | int | |
1829 | proc_get_effective_task_policy(task_t task, int flavor) | |
1830 | { | |
1831 | return proc_get_effective_policy(task, THREAD_NULL, flavor); | |
1832 | } | |
1833 | ||
1834 | int | |
1835 | proc_get_effective_thread_policy(thread_t thread, int flavor) | |
1836 | { | |
1837 | return proc_get_effective_policy(thread->task, thread, flavor); | |
1838 | } | |
1839 | ||
1840 | /* | |
1841 | * Gets what is actually in effect, for subsystems which pull policy instead of receive updates. | |
1842 | * | |
1843 | * NOTE: This accessor does not take the task lock. | |
1844 | * Notifications of state updates need to be externally synchronized with state queries. | |
1845 | * This routine *MUST* remain interrupt safe, as it is potentially invoked | |
1846 | * within the context of a timer interrupt. It is also called in KDP context for stackshot. | |
1847 | */ | |
1848 | static int | |
1849 | proc_get_effective_policy(task_t task, | |
1850 | thread_t thread, | |
1851 | int flavor) | |
1852 | { | |
1853 | boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE; | |
1854 | int value = 0; | |
1855 | ||
1856 | switch (flavor) { | |
1857 | case TASK_POLICY_DARWIN_BG: | |
1858 | /* | |
1859 | * This backs the KPI call proc_pidbackgrounded to find | |
1860 | * out if a pid is backgrounded, | |
1861 | * as well as proc_get_effective_thread_policy. | |
1862 | * Its main use is within the timer layer, as well as | |
1863 | * prioritizing requests to the graphics system. | |
1864 | * Returns 1 for background mode, 0 for normal mode | |
1865 | */ | |
1866 | if (on_task) | |
1867 | value = task->effective_policy.darwinbg; | |
1868 | else | |
1869 | value = (task->effective_policy.darwinbg || | |
1870 | thread->effective_policy.darwinbg) ? 1 : 0; | |
1871 | break; | |
1872 | case TASK_POLICY_IO: | |
1873 | /* | |
1874 | * The I/O system calls here to find out what throttling tier to apply to an operation. | |
1875 | * Returns THROTTLE_LEVEL_* values. Some userspace spinlock operations can apply | |
1876 | * a temporary iotier override to make the I/O more aggressive to get the lock | |
1877 | * owner to release the spinlock. | |
1878 | */ | |
1879 | if (on_task) | |
1880 | value = task->effective_policy.io_tier; | |
1881 | else { | |
1882 | value = MAX(task->effective_policy.io_tier, | |
1883 | thread->effective_policy.io_tier); | |
1884 | if (thread->iotier_override != THROTTLE_LEVEL_NONE) | |
1885 | value = MIN(value, thread->iotier_override); | |
1886 | } | |
1887 | break; | |
1888 | case TASK_POLICY_PASSIVE_IO: | |
1889 | /* | |
1890 | * The I/O system calls here to find out whether an operation should be passive. | |
1891 | * (i.e. not cause operations with lower throttle tiers to be throttled) | |
1892 | * Returns 1 for passive mode, 0 for normal mode. | |
1893 | * If a userspace spinlock has applied an override, that I/O should always | |
1894 | * be passive to avoid self-throttling when the override is removed and lower | |
1895 | * iotier I/Os are issued. | |
1896 | */ | |
1897 | if (on_task) | |
1898 | value = task->effective_policy.io_passive; | |
1899 | else { | |
1900 | int io_tier = MAX(task->effective_policy.io_tier, thread->effective_policy.io_tier); | |
1901 | boolean_t override_in_effect = (thread->iotier_override != THROTTLE_LEVEL_NONE) && (thread->iotier_override < io_tier); | |
1902 | ||
1903 | value = (task->effective_policy.io_passive || | |
1904 | thread->effective_policy.io_passive || override_in_effect) ? 1 : 0; | |
1905 | } | |
1906 | break; | |
1907 | case TASK_POLICY_ALL_SOCKETS_BG: | |
1908 | /* | |
1909 | * do_background_socket() calls this to determine what it should do to the proc's sockets | |
1910 | * Returns 1 for background mode, 0 for normal mode | |
1911 | * | |
1912 | * This consults both thread and task so un-DBGing a thread while the task is BG | |
1913 | * doesn't get you out of the network throttle. | |
1914 | */ | |
1915 | if (on_task) | |
1916 | value = task->effective_policy.all_sockets_bg; | |
1917 | else | |
1918 | value = (task->effective_policy.all_sockets_bg || | |
1919 | thread->effective_policy.all_sockets_bg) ? 1 : 0; | |
1920 | break; | |
1921 | case TASK_POLICY_NEW_SOCKETS_BG: | |
1922 | /* | |
1923 | * socreate() calls this to determine if it should mark a new socket as background | |
1924 | * Returns 1 for background mode, 0 for normal mode | |
1925 | */ | |
1926 | if (on_task) | |
1927 | value = task->effective_policy.new_sockets_bg; | |
1928 | else | |
1929 | value = (task->effective_policy.new_sockets_bg || | |
1930 | thread->effective_policy.new_sockets_bg) ? 1 : 0; | |
1931 | break; | |
1932 | case TASK_POLICY_LOWPRI_CPU: | |
1933 | /* | |
1934 | * Returns 1 for low priority cpu mode, 0 for normal mode | |
1935 | */ | |
1936 | if (on_task) | |
1937 | value = task->effective_policy.lowpri_cpu; | |
1938 | else | |
1939 | value = (task->effective_policy.lowpri_cpu || | |
1940 | thread->effective_policy.lowpri_cpu) ? 1 : 0; | |
1941 | break; | |
1942 | case TASK_POLICY_SUPPRESSED_CPU: | |
1943 | /* | |
1944 | * Returns 1 for suppressed cpu mode, 0 for normal mode | |
1945 | */ | |
1946 | assert(on_task); | |
1947 | value = task->effective_policy.t_suppressed_cpu; | |
1948 | break; | |
1949 | case TASK_POLICY_LATENCY_QOS: | |
1950 | /* | |
1951 | * timer arming calls into here to find out the timer coalescing level | |
1952 | * Returns a QoS tier (0-6) | |
1953 | */ | |
1954 | if (on_task) { | |
1955 | value = task->effective_policy.t_latency_qos; | |
1956 | } else { | |
1957 | value = MAX(task->effective_policy.t_latency_qos, thread->effective_policy.t_latency_qos); | |
1958 | } | |
1959 | break; | |
1960 | case TASK_POLICY_THROUGH_QOS: | |
1961 | /* | |
1962 | * Returns a QoS tier (0-6) | |
1963 | */ | |
1964 | assert(on_task); | |
1965 | value = task->effective_policy.t_through_qos; | |
1966 | break; | |
1967 | case TASK_POLICY_ROLE: | |
1968 | assert(on_task); | |
1969 | value = task->effective_policy.t_role; | |
1970 | break; | |
1971 | case TASK_POLICY_WATCHERS_BG: | |
1972 | assert(on_task); | |
1973 | value = task->effective_policy.t_watchers_bg; | |
1974 | break; | |
1975 | case TASK_POLICY_SFI_MANAGED: | |
1976 | assert(on_task); | |
1977 | value = task->effective_policy.t_sfi_managed; | |
1978 | break; | |
1979 | case TASK_POLICY_QOS: | |
1980 | assert(!on_task); | |
1981 | value = thread->effective_policy.thep_qos; | |
1982 | break; | |
1983 | default: | |
1984 | panic("unknown policy_flavor %d", flavor); | |
1985 | break; | |
1986 | } | |
1987 | ||
1988 | return value; | |
1989 | } | |
1990 | ||
1991 | /* | |
1992 | * Convert from IOPOL_* values to throttle tiers. | |
1993 | * | |
1994 | * TODO: Can this be made more compact, like an array lookup | |
1995 | * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future | |
1996 | */ | |
1997 | ||
1998 | static void | |
1999 | proc_iopol_to_tier(int iopolicy, int *tier, int *passive) | |
2000 | { | |
2001 | *passive = 0; | |
2002 | *tier = 0; | |
2003 | switch (iopolicy) { | |
2004 | case IOPOL_IMPORTANT: | |
2005 | *tier = THROTTLE_LEVEL_TIER0; | |
2006 | break; | |
2007 | case IOPOL_PASSIVE: | |
2008 | *tier = THROTTLE_LEVEL_TIER0; | |
2009 | *passive = 1; | |
2010 | break; | |
2011 | case IOPOL_STANDARD: | |
2012 | *tier = THROTTLE_LEVEL_TIER1; | |
2013 | break; | |
2014 | case IOPOL_UTILITY: | |
2015 | *tier = THROTTLE_LEVEL_TIER2; | |
2016 | break; | |
2017 | case IOPOL_THROTTLE: | |
2018 | *tier = THROTTLE_LEVEL_TIER3; | |
2019 | break; | |
2020 | default: | |
2021 | panic("unknown I/O policy %d", iopolicy); | |
2022 | break; | |
2023 | } | |
2024 | } | |
2025 | ||
2026 | static int | |
2027 | proc_tier_to_iopol(int tier, int passive) | |
2028 | { | |
2029 | if (passive == 1) { | |
2030 | switch (tier) { | |
2031 | case THROTTLE_LEVEL_TIER0: | |
2032 | return IOPOL_PASSIVE; | |
2033 | break; | |
2034 | default: | |
2035 | panic("unknown passive tier %d", tier); | |
2036 | return IOPOL_DEFAULT; | |
2037 | break; | |
2038 | } | |
2039 | } else { | |
2040 | switch (tier) { | |
2041 | case THROTTLE_LEVEL_NONE: | |
2042 | case THROTTLE_LEVEL_TIER0: | |
2043 | return IOPOL_DEFAULT; | |
2044 | break; | |
2045 | case THROTTLE_LEVEL_TIER1: | |
2046 | return IOPOL_STANDARD; | |
2047 | break; | |
2048 | case THROTTLE_LEVEL_TIER2: | |
2049 | return IOPOL_UTILITY; | |
2050 | break; | |
2051 | case THROTTLE_LEVEL_TIER3: | |
2052 | return IOPOL_THROTTLE; | |
2053 | break; | |
2054 | default: | |
2055 | panic("unknown tier %d", tier); | |
2056 | return IOPOL_DEFAULT; | |
2057 | break; | |
2058 | } | |
2059 | } | |
2060 | } | |
2061 | ||
2062 | /* apply internal backgrounding for workqueue threads */ | |
2063 | int | |
2064 | proc_apply_workq_bgthreadpolicy(thread_t thread) | |
2065 | { | |
2066 | if (thread == THREAD_NULL) | |
2067 | return ESRCH; | |
2068 | ||
2069 | proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, | |
2070 | TASK_POLICY_WORKQ_BG, TASK_POLICY_ENABLE); | |
2071 | ||
2072 | return(0); | |
2073 | } | |
2074 | ||
2075 | /* | |
2076 | * remove internal backgrounding for workqueue threads | |
2077 | * does NOT go find sockets created while BG and unbackground them | |
2078 | */ | |
2079 | int | |
2080 | proc_restore_workq_bgthreadpolicy(thread_t thread) | |
2081 | { | |
2082 | if (thread == THREAD_NULL) | |
2083 | return ESRCH; | |
2084 | ||
2085 | proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE, | |
2086 | TASK_POLICY_WORKQ_BG, TASK_POLICY_DISABLE); | |
2087 | ||
2088 | return(0); | |
2089 | } | |
2090 | ||
2091 | /* here for temporary compatibility */ | |
2092 | int | |
2093 | proc_setthread_saved_importance(__unused thread_t thread, __unused int importance) | |
2094 | { | |
2095 | return(0); | |
2096 | } | |
2097 | ||
2098 | /* | |
2099 | * Set an override on the thread which is consulted with a | |
2100 | * higher priority than the task/thread policy. This should | |
2101 | * only be set for temporary grants until the thread | |
2102 | * returns to the userspace boundary | |
2103 | * | |
2104 | * We use atomic operations to swap in the override, with | |
2105 | * the assumption that the thread itself can | |
2106 | * read the override and clear it on return to userspace. | |
2107 | * | |
2108 | * No locking is performed, since it is acceptable to see | |
2109 | * a stale override for one loop through throttle_lowpri_io(). | |
2110 | * However a thread reference must be held on the thread. | |
2111 | */ | |
2112 | ||
2113 | void set_thread_iotier_override(thread_t thread, int policy) | |
2114 | { | |
2115 | int current_override; | |
2116 | ||
2117 | /* Let most aggressive I/O policy win until user boundary */ | |
2118 | do { | |
2119 | current_override = thread->iotier_override; | |
2120 | ||
2121 | if (current_override != THROTTLE_LEVEL_NONE) | |
2122 | policy = MIN(current_override, policy); | |
2123 | ||
2124 | if (current_override == policy) { | |
2125 | /* no effective change */ | |
2126 | return; | |
2127 | } | |
2128 | } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override)); | |
2129 | ||
2130 | /* | |
2131 | * Since the thread may be currently throttled, | |
2132 | * re-evaluate tiers and potentially break out | |
2133 | * of an msleep | |
2134 | */ | |
2135 | rethrottle_thread(thread->uthread); | |
2136 | } | |
2137 | ||
2138 | /* | |
2139 | * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks, | |
2140 | * semaphores, dispatch_sync) may result in priority inversions where a higher priority | |
2141 | * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower | |
2142 | * priority thread. In these cases, we attempt to propagate the priority token, as long | |
2143 | * as the subsystem informs us of the relationships between the threads. The userspace | |
2144 | * synchronization subsystem should maintain the information of owner->resource and | |
2145 | * resource->waiters itself. | |
2146 | */ | |
2147 | ||
2148 | /* | |
2149 | * This helper canonicalizes the resource/resource_type given the current qos_override_mode | |
2150 | * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need | |
2151 | * to be handled specially in the future, but for now it's fine to slam | |
2152 | * *resource to USER_ADDR_NULL even if it was previously a wildcard. | |
2153 | */ | |
2154 | static void _canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) { | |
2155 | if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) { | |
2156 | /* Map all input resource/type to a single one */ | |
2157 | *resource = USER_ADDR_NULL; | |
2158 | *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN; | |
2159 | } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) { | |
2160 | /* no transform */ | |
2161 | } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH) { | |
2162 | /* Map all dispatch overrides to a single one, to avoid memory overhead */ | |
2163 | if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) { | |
2164 | *resource = USER_ADDR_NULL; | |
2165 | } | |
2166 | } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) { | |
2167 | /* Map all mutex overrides to a single one, to avoid memory overhead */ | |
2168 | if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) { | |
2169 | *resource = USER_ADDR_NULL; | |
2170 | } | |
2171 | } | |
2172 | } | |
2173 | ||
2174 | /* This helper routine finds an existing override if known. Locking should be done by caller */ | |
2175 | static struct thread_qos_override *_find_qos_override(thread_t thread, user_addr_t resource, int resource_type) { | |
2176 | struct thread_qos_override *override; | |
2177 | ||
2178 | override = thread->overrides; | |
2179 | while (override) { | |
2180 | if (override->override_resource == resource && | |
2181 | override->override_resource_type == resource_type) { | |
2182 | return override; | |
2183 | } | |
2184 | ||
2185 | override = override->override_next; | |
2186 | } | |
2187 | ||
2188 | return NULL; | |
2189 | } | |
2190 | ||
2191 | static void _find_and_decrement_qos_override(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset, struct thread_qos_override **free_override_list) { | |
2192 | struct thread_qos_override *override, *override_prev; | |
2193 | ||
2194 | override_prev = NULL; | |
2195 | override = thread->overrides; | |
2196 | while (override) { | |
2197 | struct thread_qos_override *override_next = override->override_next; | |
2198 | ||
2199 | if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) && | |
2200 | override->override_resource_type == resource_type) { | |
2201 | if (reset) { | |
2202 | override->override_contended_resource_count = 0; | |
2203 | } else { | |
2204 | override->override_contended_resource_count--; | |
2205 | } | |
2206 | ||
2207 | if (override->override_contended_resource_count == 0) { | |
2208 | if (override_prev == NULL) { | |
2209 | thread->overrides = override_next; | |
2210 | } else { | |
2211 | override_prev->override_next = override_next; | |
2212 | } | |
2213 | ||
2214 | /* Add to out-param for later zfree */ | |
2215 | override->override_next = *free_override_list; | |
2216 | *free_override_list = override; | |
2217 | } else { | |
2218 | override_prev = override; | |
2219 | } | |
2220 | ||
2221 | if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) { | |
2222 | return; | |
2223 | } | |
2224 | } else { | |
2225 | override_prev = override; | |
2226 | } | |
2227 | ||
2228 | override = override_next; | |
2229 | } | |
2230 | } | |
2231 | ||
2232 | /* This helper recalculates the current requested override using the policy selected at boot */ | |
2233 | static int _calculate_requested_qos_override(thread_t thread) | |
2234 | { | |
2235 | if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) { | |
2236 | return THREAD_QOS_UNSPECIFIED; | |
2237 | } | |
2238 | ||
2239 | /* iterate over all overrides and calculate MAX */ | |
2240 | struct thread_qos_override *override; | |
2241 | int qos_override = THREAD_QOS_UNSPECIFIED; | |
2242 | ||
2243 | override = thread->overrides; | |
2244 | while (override) { | |
2245 | if (qos_override_mode != QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH || | |
2246 | override->override_resource_type != THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) { | |
2247 | qos_override = MAX(qos_override, override->override_qos); | |
2248 | } | |
2249 | ||
2250 | override = override->override_next; | |
2251 | } | |
2252 | ||
2253 | return qos_override; | |
2254 | } | |
2255 | ||
2256 | boolean_t proc_thread_qos_add_override(task_t task, thread_t thread, uint64_t tid, int override_qos, boolean_t first_override_for_resource, user_addr_t resource, int resource_type) | |
2257 | { | |
2258 | thread_t self = current_thread(); | |
2259 | struct task_pend_token pend_token = {}; | |
2260 | ||
2261 | /* XXX move to thread mutex when thread policy does */ | |
2262 | task_lock(task); | |
2263 | ||
2264 | /* | |
2265 | * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference | |
2266 | * to the thread | |
2267 | */ | |
2268 | ||
2269 | if (thread != THREAD_NULL) { | |
2270 | assert(task == thread->task); | |
2271 | } else { | |
2272 | if (tid == self->thread_id) { | |
2273 | thread = self; | |
2274 | } else { | |
2275 | thread = task_findtid(task, tid); | |
2276 | ||
2277 | if (thread == THREAD_NULL) { | |
2278 | KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE, | |
2279 | tid, 0, 0xdead, 0, 0); | |
2280 | task_unlock(task); | |
2281 | return FALSE; | |
2282 | } | |
2283 | } | |
2284 | } | |
2285 | ||
2286 | KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START, | |
2287 | thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0); | |
2288 | ||
2289 | DTRACE_BOOST5(qos_add_override_pre, uint64_t, tid, uint64_t, thread->requested_policy.thrp_qos, | |
2290 | uint64_t, thread->effective_policy.thep_qos, int, override_qos, boolean_t, first_override_for_resource); | |
2291 | ||
2292 | struct task_requested_policy requested = thread->requested_policy; | |
2293 | struct thread_qos_override *override; | |
2294 | struct thread_qos_override *deferred_free_override = NULL; | |
2295 | int new_qos_override, prev_qos_override; | |
2296 | int new_effective_qos; | |
2297 | boolean_t has_thread_reference = FALSE; | |
2298 | ||
2299 | _canonicalize_resource_and_type(&resource, &resource_type); | |
2300 | ||
2301 | if (first_override_for_resource) { | |
2302 | override = _find_qos_override(thread, resource, resource_type); | |
2303 | if (override) { | |
2304 | override->override_contended_resource_count++; | |
2305 | } else { | |
2306 | struct thread_qos_override *override_new; | |
2307 | ||
2308 | /* We need to allocate a new object. Drop the task lock and recheck afterwards in case someone else added the override */ | |
2309 | thread_reference(thread); | |
2310 | has_thread_reference = TRUE; | |
2311 | task_unlock(task); | |
2312 | override_new = zalloc(thread_qos_override_zone); | |
2313 | task_lock(task); | |
2314 | ||
2315 | override = _find_qos_override(thread, resource, resource_type); | |
2316 | if (override) { | |
2317 | /* Someone else already allocated while the task lock was dropped */ | |
2318 | deferred_free_override = override_new; | |
2319 | override->override_contended_resource_count++; | |
2320 | } else { | |
2321 | override = override_new; | |
2322 | override->override_next = thread->overrides; | |
2323 | override->override_contended_resource_count = 1 /* since first_override_for_resource was TRUE */; | |
2324 | override->override_resource = resource; | |
2325 | override->override_resource_type = resource_type; | |
2326 | override->override_qos = THREAD_QOS_UNSPECIFIED; | |
2327 | thread->overrides = override; | |
2328 | } | |
2329 | } | |
2330 | } else { | |
2331 | override = _find_qos_override(thread, resource, resource_type); | |
2332 | } | |
2333 | ||
2334 | if (override) { | |
2335 | if (override->override_qos == THREAD_QOS_UNSPECIFIED) | |
2336 | override->override_qos = override_qos; | |
2337 | else | |
2338 | override->override_qos = MAX(override->override_qos, override_qos); | |
2339 | } | |
2340 | ||
2341 | /* Determine how to combine the various overrides into a single current requested override */ | |
2342 | prev_qos_override = requested.thrp_qos_override; | |
2343 | new_qos_override = _calculate_requested_qos_override(thread); | |
2344 | ||
2345 | if (new_qos_override != prev_qos_override) { | |
2346 | requested.thrp_qos_override = new_qos_override; | |
2347 | ||
2348 | thread->requested_policy = requested; | |
2349 | ||
2350 | task_policy_update_locked(task, thread, &pend_token); | |
2351 | ||
2352 | if (!has_thread_reference) { | |
2353 | thread_reference(thread); | |
2354 | } | |
2355 | ||
2356 | task_unlock(task); | |
2357 | ||
2358 | task_policy_update_complete_unlocked(task, thread, &pend_token); | |
2359 | ||
2360 | new_effective_qos = thread->effective_policy.thep_qos; | |
2361 | ||
2362 | thread_deallocate(thread); | |
2363 | } else { | |
2364 | new_effective_qos = thread->effective_policy.thep_qos; | |
2365 | ||
2366 | task_unlock(task); | |
2367 | ||
2368 | if (has_thread_reference) { | |
2369 | thread_deallocate(thread); | |
2370 | } | |
2371 | } | |
2372 | ||
2373 | if (deferred_free_override) { | |
2374 | zfree(thread_qos_override_zone, deferred_free_override); | |
2375 | } | |
2376 | ||
2377 | DTRACE_BOOST3(qos_add_override_post, int, prev_qos_override, int, new_qos_override, | |
2378 | int, new_effective_qos); | |
2379 | ||
2380 | KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END, | |
2381 | new_qos_override, resource, resource_type, 0, 0); | |
2382 | ||
2383 | return TRUE; | |
2384 | } | |
2385 | ||
2386 | ||
2387 | static boolean_t _proc_thread_qos_remove_override_internal(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type, boolean_t reset) | |
2388 | { | |
2389 | thread_t self = current_thread(); | |
2390 | struct task_pend_token pend_token = {}; | |
2391 | ||
2392 | /* XXX move to thread mutex when thread policy does */ | |
2393 | task_lock(task); | |
2394 | ||
2395 | /* | |
2396 | * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference | |
2397 | * to the thread | |
2398 | */ | |
2399 | if (thread != THREAD_NULL) { | |
2400 | assert(task == thread->task); | |
2401 | } else { | |
2402 | if (tid == self->thread_id) { | |
2403 | thread = self; | |
2404 | } else { | |
2405 | thread = task_findtid(task, tid); | |
2406 | ||
2407 | if (thread == THREAD_NULL) { | |
2408 | KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE, | |
2409 | tid, 0, 0xdead, 0, 0); | |
2410 | task_unlock(task); | |
2411 | return FALSE; | |
2412 | } | |
2413 | } | |
2414 | } | |
2415 | ||
2416 | struct task_requested_policy requested = thread->requested_policy; | |
2417 | struct thread_qos_override *deferred_free_override_list = NULL; | |
2418 | int new_qos_override, prev_qos_override; | |
2419 | ||
2420 | _canonicalize_resource_and_type(&resource, &resource_type); | |
2421 | ||
2422 | _find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list); | |
2423 | ||
2424 | KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START, | |
2425 | thread_tid(thread), resource, reset, 0, 0); | |
2426 | ||
2427 | /* Determine how to combine the various overrides into a single current requested override */ | |
2428 | prev_qos_override = requested.thrp_qos_override; | |
2429 | new_qos_override = _calculate_requested_qos_override(thread); | |
2430 | ||
2431 | if (new_qos_override != prev_qos_override) { | |
2432 | requested.thrp_qos_override = new_qos_override; | |
2433 | ||
2434 | thread->requested_policy = requested; | |
2435 | ||
2436 | task_policy_update_locked(task, thread, &pend_token); | |
2437 | ||
2438 | thread_reference(thread); | |
2439 | ||
2440 | task_unlock(task); | |
2441 | ||
2442 | task_policy_update_complete_unlocked(task, thread, &pend_token); | |
2443 | ||
2444 | thread_deallocate(thread); | |
2445 | } else { | |
2446 | task_unlock(task); | |
2447 | } | |
2448 | ||
2449 | while (deferred_free_override_list) { | |
2450 | struct thread_qos_override *override_next = deferred_free_override_list->override_next; | |
2451 | ||
2452 | zfree(thread_qos_override_zone, deferred_free_override_list); | |
2453 | deferred_free_override_list = override_next; | |
2454 | } | |
2455 | ||
2456 | KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END, | |
2457 | 0, 0, 0, 0, 0); | |
2458 | ||
2459 | return TRUE; | |
2460 | } | |
2461 | ||
2462 | boolean_t proc_thread_qos_remove_override(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type) | |
2463 | { | |
2464 | return _proc_thread_qos_remove_override_internal(task, thread, tid, resource, resource_type, FALSE); | |
2465 | ||
2466 | } | |
2467 | ||
2468 | boolean_t proc_thread_qos_reset_override(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type) | |
2469 | { | |
2470 | return _proc_thread_qos_remove_override_internal(task, thread, tid, resource, resource_type, TRUE); | |
2471 | } | |
2472 | ||
2473 | /* Deallocate before thread termination */ | |
2474 | void proc_thread_qos_deallocate(thread_t thread) | |
2475 | { | |
2476 | task_t task = thread->task; | |
2477 | struct thread_qos_override *override; | |
2478 | ||
2479 | /* XXX move to thread mutex when thread policy does */ | |
2480 | task_lock(task); | |
2481 | override = thread->overrides; | |
2482 | thread->overrides = NULL; /* task policy re-evaluation needed? */ | |
2483 | thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED; | |
2484 | task_unlock(task); | |
2485 | ||
2486 | while (override) { | |
2487 | struct thread_qos_override *override_next = override->override_next; | |
2488 | ||
2489 | zfree(thread_qos_override_zone, override); | |
2490 | override = override_next; | |
2491 | } | |
2492 | } | |
2493 | ||
2494 | /* TODO: remove this variable when interactive daemon audit period is over */ | |
2495 | extern boolean_t ipc_importance_interactive_receiver; | |
2496 | ||
2497 | /* | |
2498 | * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process | |
2499 | * | |
2500 | * TODO: Make this function more table-driven instead of ad-hoc | |
2501 | */ | |
2502 | void | |
2503 | proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, | |
2504 | ipc_port_t * portwatch_ports, int portwatch_count) | |
2505 | { | |
2506 | struct task_pend_token pend_token = {}; | |
2507 | ||
2508 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
2509 | (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START, | |
2510 | audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), | |
2511 | apptype, 0); | |
2512 | ||
2513 | switch (apptype) { | |
2514 | case TASK_APPTYPE_APP_TAL: | |
2515 | case TASK_APPTYPE_APP_DEFAULT: | |
2516 | /* Apps become donors via the 'live-donor' flag instead of the static donor flag */ | |
2517 | task_importance_mark_donor(task, FALSE); | |
2518 | task_importance_mark_live_donor(task, TRUE); | |
2519 | task_importance_mark_receiver(task, FALSE); | |
2520 | /* Apps are de-nap recievers on desktop for suppression behaviors */ | |
2521 | task_importance_mark_denap_receiver(task, TRUE); | |
2522 | break; | |
2523 | ||
2524 | case TASK_APPTYPE_DAEMON_INTERACTIVE: | |
2525 | task_importance_mark_donor(task, TRUE); | |
2526 | task_importance_mark_live_donor(task, FALSE); | |
2527 | ||
2528 | /* | |
2529 | * A boot arg controls whether interactive daemons are importance receivers. | |
2530 | * Normally, they are not. But for testing their behavior as an adaptive | |
2531 | * daemon, the boot-arg can be set. | |
2532 | * | |
2533 | * TODO: remove this when the interactive daemon audit period is over. | |
2534 | */ | |
2535 | task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver); | |
2536 | task_importance_mark_denap_receiver(task, FALSE); | |
2537 | break; | |
2538 | ||
2539 | case TASK_APPTYPE_DAEMON_STANDARD: | |
2540 | task_importance_mark_donor(task, TRUE); | |
2541 | task_importance_mark_live_donor(task, FALSE); | |
2542 | task_importance_mark_receiver(task, FALSE); | |
2543 | task_importance_mark_denap_receiver(task, FALSE); | |
2544 | break; | |
2545 | ||
2546 | case TASK_APPTYPE_DAEMON_ADAPTIVE: | |
2547 | task_importance_mark_donor(task, FALSE); | |
2548 | task_importance_mark_live_donor(task, FALSE); | |
2549 | task_importance_mark_receiver(task, TRUE); | |
2550 | task_importance_mark_denap_receiver(task, FALSE); | |
2551 | break; | |
2552 | ||
2553 | case TASK_APPTYPE_DAEMON_BACKGROUND: | |
2554 | task_importance_mark_donor(task, FALSE); | |
2555 | task_importance_mark_live_donor(task, FALSE); | |
2556 | task_importance_mark_receiver(task, FALSE); | |
2557 | task_importance_mark_denap_receiver(task, FALSE); | |
2558 | break; | |
2559 | ||
2560 | case TASK_APPTYPE_NONE: | |
2561 | break; | |
2562 | } | |
2563 | ||
2564 | if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) { | |
2565 | int portwatch_boosts = 0; | |
2566 | ||
2567 | for (int i = 0; i < portwatch_count; i++) { | |
2568 | ipc_port_t port = NULL; | |
2569 | ||
2570 | if ((port = portwatch_ports[i]) != NULL) { | |
2571 | int boost = 0; | |
2572 | task_add_importance_watchport(task, port, &boost); | |
2573 | portwatch_boosts += boost; | |
2574 | } | |
2575 | } | |
2576 | ||
2577 | if (portwatch_boosts > 0) { | |
2578 | task_importance_hold_internal_assertion(task, portwatch_boosts); | |
2579 | } | |
2580 | } | |
2581 | ||
2582 | task_lock(task); | |
2583 | ||
2584 | if (apptype == TASK_APPTYPE_APP_TAL) { | |
2585 | /* TAL starts off enabled by default */ | |
2586 | task->requested_policy.t_tal_enabled = 1; | |
2587 | } | |
2588 | ||
2589 | if (apptype != TASK_APPTYPE_NONE) { | |
2590 | task->requested_policy.t_apptype = apptype; | |
2591 | ||
2592 | } | |
2593 | ||
2594 | if (qos_clamp != THREAD_QOS_UNSPECIFIED) { | |
2595 | task->requested_policy.t_qos_clamp = qos_clamp; | |
2596 | } | |
2597 | ||
2598 | task_policy_update_locked(task, THREAD_NULL, &pend_token); | |
2599 | ||
2600 | task_unlock(task); | |
2601 | ||
2602 | /* Ensure the donor bit is updated to be in sync with the new live donor status */ | |
2603 | pend_token.tpt_update_live_donor = 1; | |
2604 | ||
2605 | task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token); | |
2606 | ||
2607 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
2608 | (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END, | |
2609 | audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), | |
2610 | task_is_importance_receiver(task), 0); | |
2611 | } | |
2612 | ||
2613 | /* Set up the primordial thread's QoS */ | |
2614 | void | |
2615 | task_set_main_thread_qos(task_t task, thread_t main_thread) { | |
2616 | struct task_pend_token pend_token = {}; | |
2617 | ||
2618 | assert(main_thread->task == task); | |
2619 | ||
2620 | task_lock(task); | |
2621 | ||
2622 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
2623 | (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START, | |
2624 | audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), | |
2625 | main_thread->requested_policy.thrp_qos, 0); | |
2626 | ||
2627 | int primordial_qos = THREAD_QOS_UNSPECIFIED; | |
2628 | ||
2629 | int qos_clamp = task->requested_policy.t_qos_clamp; | |
2630 | ||
2631 | switch (task->requested_policy.t_apptype) { | |
2632 | case TASK_APPTYPE_APP_TAL: | |
2633 | case TASK_APPTYPE_APP_DEFAULT: | |
2634 | primordial_qos = THREAD_QOS_USER_INTERACTIVE; | |
2635 | break; | |
2636 | ||
2637 | case TASK_APPTYPE_DAEMON_INTERACTIVE: | |
2638 | case TASK_APPTYPE_DAEMON_STANDARD: | |
2639 | case TASK_APPTYPE_DAEMON_ADAPTIVE: | |
2640 | primordial_qos = THREAD_QOS_LEGACY; | |
2641 | break; | |
2642 | ||
2643 | case TASK_APPTYPE_DAEMON_BACKGROUND: | |
2644 | primordial_qos = THREAD_QOS_BACKGROUND; | |
2645 | break; | |
2646 | } | |
2647 | ||
2648 | if (qos_clamp != THREAD_QOS_UNSPECIFIED) { | |
2649 | if (primordial_qos != THREAD_QOS_UNSPECIFIED) { | |
2650 | primordial_qos = MIN(qos_clamp, primordial_qos); | |
2651 | } else { | |
2652 | primordial_qos = qos_clamp; | |
2653 | } | |
2654 | } | |
2655 | ||
2656 | main_thread->requested_policy.thrp_qos = primordial_qos; | |
2657 | ||
2658 | task_policy_update_locked(task, main_thread, &pend_token); | |
2659 | ||
2660 | task_unlock(task); | |
2661 | ||
2662 | task_policy_update_complete_unlocked(task, main_thread, &pend_token); | |
2663 | ||
2664 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, | |
2665 | (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END, | |
2666 | audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), | |
2667 | primordial_qos, 0); | |
2668 | } | |
2669 | ||
2670 | /* for process_policy to check before attempting to set */ | |
2671 | boolean_t | |
2672 | proc_task_is_tal(task_t task) | |
2673 | { | |
2674 | return (task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE; | |
2675 | } | |
2676 | ||
2677 | /* for telemetry */ | |
2678 | integer_t | |
2679 | task_grab_latency_qos(task_t task) | |
2680 | { | |
2681 | return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS)); | |
2682 | } | |
2683 | ||
2684 | /* update the darwin background action state in the flags field for libproc */ | |
2685 | int | |
2686 | proc_get_darwinbgstate(task_t task, uint32_t * flagsp) | |
2687 | { | |
2688 | if (task->requested_policy.ext_darwinbg) | |
2689 | *flagsp |= PROC_FLAG_EXT_DARWINBG; | |
2690 | ||
2691 | if (task->requested_policy.int_darwinbg) | |
2692 | *flagsp |= PROC_FLAG_DARWINBG; | |
2693 | ||
2694 | ||
2695 | if (task->requested_policy.t_apptype == TASK_APPTYPE_APP_DEFAULT || | |
2696 | task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL) | |
2697 | *flagsp |= PROC_FLAG_APPLICATION; | |
2698 | ||
2699 | if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) | |
2700 | *flagsp |= PROC_FLAG_ADAPTIVE; | |
2701 | ||
2702 | if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && task->requested_policy.t_boosted == 1) | |
2703 | *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT; | |
2704 | ||
2705 | if (task_is_importance_donor(task)) | |
2706 | *flagsp |= PROC_FLAG_IMPORTANCE_DONOR; | |
2707 | ||
2708 | if (task->effective_policy.t_sup_active) | |
2709 | *flagsp |= PROC_FLAG_SUPPRESSED; | |
2710 | ||
2711 | return(0); | |
2712 | } | |
2713 | ||
2714 | /* All per-thread state is in the first 32-bits of the bitfield */ | |
2715 | void | |
2716 | proc_get_thread_policy(thread_t thread, thread_policy_state_t info) | |
2717 | { | |
2718 | task_t task = thread->task; | |
2719 | task_lock(task); | |
2720 | info->requested = (integer_t)task_requested_bitfield(task, thread); | |
2721 | info->effective = (integer_t)task_effective_bitfield(task, thread); | |
2722 | info->pending = 0; | |
2723 | task_unlock(task); | |
2724 | } | |
2725 | ||
2726 | /* | |
2727 | * Tracepoint data... Reading the tracepoint data can be somewhat complicated. | |
2728 | * The current scheme packs as much data into a single tracepoint as it can. | |
2729 | * | |
2730 | * Each task/thread requested/effective structure is 64 bits in size. Any | |
2731 | * given tracepoint will emit either requested or effective data, but not both. | |
2732 | * | |
2733 | * A tracepoint may emit any of task, thread, or task & thread data. | |
2734 | * | |
2735 | * The type of data emitted varies with pointer size. Where possible, both | |
2736 | * task and thread data are emitted. In LP32 systems, the first and second | |
2737 | * halves of either the task or thread data is emitted. | |
2738 | * | |
2739 | * The code uses uintptr_t array indexes instead of high/low to avoid | |
2740 | * confusion WRT big vs little endian. | |
2741 | * | |
2742 | * The truth table for the tracepoint data functions is below, and has the | |
2743 | * following invariants: | |
2744 | * | |
2745 | * 1) task and thread are uintptr_t* | |
2746 | * 2) task may never be NULL | |
2747 | * | |
2748 | * | |
2749 | * LP32 LP64 | |
2750 | * trequested_0(task, NULL) task[0] task[0] | |
2751 | * trequested_1(task, NULL) task[1] NULL | |
2752 | * trequested_0(task, thread) thread[0] task[0] | |
2753 | * trequested_1(task, thread) thread[1] thread[0] | |
2754 | * | |
2755 | * Basically, you get a full task or thread on LP32, and both on LP64. | |
2756 | * | |
2757 | * The uintptr_t munging here is squicky enough to deserve a comment. | |
2758 | * | |
2759 | * The variables we are accessing are laid out in memory like this: | |
2760 | * | |
2761 | * [ LP64 uintptr_t 0 ] | |
2762 | * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ] | |
2763 | * | |
2764 | * 1 2 3 4 5 6 7 8 | |
2765 | * | |
2766 | */ | |
2767 | ||
2768 | static uintptr_t | |
2769 | trequested_0(task_t task, thread_t thread) | |
2770 | { | |
2771 | assert(task); | |
2772 | _Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated"); | |
2773 | _Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated"); | |
2774 | ||
2775 | uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy); | |
2776 | return raw[0]; | |
2777 | } | |
2778 | ||
2779 | static uintptr_t | |
2780 | trequested_1(task_t task, thread_t thread) | |
2781 | { | |
2782 | assert(task); | |
2783 | _Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated"); | |
2784 | _Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated"); | |
2785 | ||
2786 | #if defined __LP64__ | |
2787 | return (thread == NULL) ? 0 : *(uintptr_t*)&thread->requested_policy; | |
2788 | #else | |
2789 | uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy); | |
2790 | return raw[1]; | |
2791 | #endif | |
2792 | } | |
2793 | ||
2794 | static uintptr_t | |
2795 | teffective_0(task_t task, thread_t thread) | |
2796 | { | |
2797 | assert(task); | |
2798 | _Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated"); | |
2799 | _Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated"); | |
2800 | ||
2801 | uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy); | |
2802 | return raw[0]; | |
2803 | } | |
2804 | ||
2805 | static uintptr_t | |
2806 | teffective_1(task_t task, thread_t thread) | |
2807 | { | |
2808 | assert(task); | |
2809 | _Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated"); | |
2810 | _Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated"); | |
2811 | ||
2812 | #if defined __LP64__ | |
2813 | return (thread == NULL) ? 0 : *(uintptr_t*)&thread->effective_policy; | |
2814 | #else | |
2815 | uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy); | |
2816 | return raw[1]; | |
2817 | #endif | |
2818 | } | |
2819 | ||
2820 | /* dump pending for tracepoint */ | |
2821 | static uint32_t tpending(task_pend_token_t pend_token) { return *(uint32_t*)(void*)(pend_token); } | |
2822 | ||
2823 | uint64_t | |
2824 | task_requested_bitfield(task_t task, thread_t thread) | |
2825 | { | |
2826 | uint64_t bits = 0; | |
2827 | struct task_requested_policy requested = | |
2828 | (thread == THREAD_NULL) ? task->requested_policy : thread->requested_policy; | |
2829 | ||
2830 | bits |= (requested.int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0); | |
2831 | bits |= (requested.ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0); | |
2832 | bits |= (requested.int_iotier ? (((uint64_t)requested.int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0); | |
2833 | bits |= (requested.ext_iotier ? (((uint64_t)requested.ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0); | |
2834 | bits |= (requested.int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0); | |
2835 | bits |= (requested.ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0); | |
2836 | bits |= (requested.bg_iotier ? (((uint64_t)requested.bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT) : 0); | |
2837 | bits |= (requested.terminated ? POLICY_REQ_TERMINATED : 0); | |
2838 | ||
2839 | bits |= (requested.th_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0); | |
2840 | bits |= (requested.th_workq_bg ? POLICY_REQ_WORKQ_BG : 0); | |
2841 | ||
2842 | if (thread != THREAD_NULL) { | |
2843 | bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0); | |
2844 | bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0); | |
2845 | } | |
2846 | ||
2847 | bits |= (requested.t_boosted ? POLICY_REQ_BOOSTED : 0); | |
2848 | bits |= (requested.t_tal_enabled ? POLICY_REQ_TAL_ENABLED : 0); | |
2849 | bits |= (requested.t_apptype ? (((uint64_t)requested.t_apptype) << POLICY_REQ_APPTYPE_SHIFT) : 0); | |
2850 | bits |= (requested.t_role ? (((uint64_t)requested.t_role) << POLICY_REQ_ROLE_SHIFT) : 0); | |
2851 | ||
2852 | bits |= (requested.t_sup_active ? POLICY_REQ_SUP_ACTIVE : 0); | |
2853 | bits |= (requested.t_sup_lowpri_cpu ? POLICY_REQ_SUP_LOWPRI_CPU : 0); | |
2854 | bits |= (requested.t_sup_cpu ? POLICY_REQ_SUP_CPU : 0); | |
2855 | bits |= (requested.t_sup_timer ? (((uint64_t)requested.t_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0); | |
2856 | bits |= (requested.t_sup_throughput ? (((uint64_t)requested.t_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT) : 0); | |
2857 | bits |= (requested.t_sup_disk ? POLICY_REQ_SUP_DISK_THROTTLE : 0); | |
2858 | bits |= (requested.t_sup_cpu_limit ? POLICY_REQ_SUP_CPU_LIMIT : 0); | |
2859 | bits |= (requested.t_sup_suspend ? POLICY_REQ_SUP_SUSPEND : 0); | |
2860 | bits |= (requested.t_sup_bg_sockets ? POLICY_REQ_SUP_BG_SOCKETS : 0); | |
2861 | bits |= (requested.t_base_latency_qos ? (((uint64_t)requested.t_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0); | |
2862 | bits |= (requested.t_over_latency_qos ? (((uint64_t)requested.t_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0); | |
2863 | bits |= (requested.t_base_through_qos ? (((uint64_t)requested.t_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0); | |
2864 | bits |= (requested.t_over_through_qos ? (((uint64_t)requested.t_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0); | |
2865 | bits |= (requested.t_sfi_managed ? POLICY_REQ_SFI_MANAGED : 0); | |
2866 | bits |= (requested.t_qos_clamp ? (((uint64_t)requested.t_qos_clamp) << POLICY_REQ_QOS_CLAMP_SHIFT) : 0); | |
2867 | ||
2868 | return bits; | |
2869 | } | |
2870 | ||
2871 | uint64_t | |
2872 | task_effective_bitfield(task_t task, thread_t thread) | |
2873 | { | |
2874 | uint64_t bits = 0; | |
2875 | struct task_effective_policy effective = | |
2876 | (thread == THREAD_NULL) ? task->effective_policy : thread->effective_policy; | |
2877 | ||
2878 | bits |= (effective.io_tier ? (((uint64_t)effective.io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0); | |
2879 | bits |= (effective.io_passive ? POLICY_EFF_IO_PASSIVE : 0); | |
2880 | bits |= (effective.darwinbg ? POLICY_EFF_DARWIN_BG : 0); | |
2881 | bits |= (effective.lowpri_cpu ? POLICY_EFF_LOWPRI_CPU : 0); | |
2882 | bits |= (effective.terminated ? POLICY_EFF_TERMINATED : 0); | |
2883 | bits |= (effective.all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0); | |
2884 | bits |= (effective.new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0); | |
2885 | bits |= (effective.bg_iotier ? (((uint64_t)effective.bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0); | |
2886 | bits |= (effective.qos_ui_is_urgent ? POLICY_EFF_QOS_UI_IS_URGENT : 0); | |
2887 | ||
2888 | if (thread != THREAD_NULL) | |
2889 | bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0); | |
2890 | ||
2891 | bits |= (effective.t_tal_engaged ? POLICY_EFF_TAL_ENGAGED : 0); | |
2892 | bits |= (effective.t_suspended ? POLICY_EFF_SUSPENDED : 0); | |
2893 | bits |= (effective.t_watchers_bg ? POLICY_EFF_WATCHERS_BG : 0); | |
2894 | bits |= (effective.t_sup_active ? POLICY_EFF_SUP_ACTIVE : 0); | |
2895 | bits |= (effective.t_suppressed_cpu ? POLICY_EFF_SUP_CPU : 0); | |
2896 | bits |= (effective.t_role ? (((uint64_t)effective.t_role) << POLICY_EFF_ROLE_SHIFT) : 0); | |
2897 | bits |= (effective.t_latency_qos ? (((uint64_t)effective.t_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0); | |
2898 | bits |= (effective.t_through_qos ? (((uint64_t)effective.t_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0); | |
2899 | bits |= (effective.t_sfi_managed ? POLICY_EFF_SFI_MANAGED : 0); | |
2900 | bits |= (effective.t_qos_ceiling ? (((uint64_t)effective.t_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0); | |
2901 | ||
2902 | return bits; | |
2903 | } | |
2904 | ||
2905 | ||
2906 | /* | |
2907 | * Resource usage and CPU related routines | |
2908 | */ | |
2909 | ||
2910 | int | |
2911 | proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep) | |
2912 | { | |
2913 | ||
2914 | int error = 0; | |
2915 | int scope; | |
2916 | ||
2917 | task_lock(task); | |
2918 | ||
2919 | ||
2920 | error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope); | |
2921 | task_unlock(task); | |
2922 | ||
2923 | /* | |
2924 | * Reverse-map from CPU resource limit scopes back to policies (see comment below). | |
2925 | */ | |
2926 | if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { | |
2927 | *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC; | |
2928 | } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) { | |
2929 | *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE; | |
2930 | } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) { | |
2931 | *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; | |
2932 | } | |
2933 | ||
2934 | return(error); | |
2935 | } | |
2936 | ||
2937 | /* | |
2938 | * Configure the default CPU usage monitor parameters. | |
2939 | * | |
2940 | * For tasks which have this mechanism activated: if any thread in the | |
2941 | * process consumes more CPU than this, an EXC_RESOURCE exception will be generated. | |
2942 | */ | |
2943 | void | |
2944 | proc_init_cpumon_params(void) | |
2945 | { | |
2946 | if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage, | |
2947 | sizeof (proc_max_cpumon_percentage))) { | |
2948 | proc_max_cpumon_percentage = DEFAULT_CPUMON_PERCENTAGE; | |
2949 | } | |
2950 | ||
2951 | if (proc_max_cpumon_percentage > 100) { | |
2952 | proc_max_cpumon_percentage = 100; | |
2953 | } | |
2954 | ||
2955 | /* The interval should be specified in seconds. */ | |
2956 | if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval, | |
2957 | sizeof (proc_max_cpumon_interval))) { | |
2958 | proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL; | |
2959 | } | |
2960 | ||
2961 | proc_max_cpumon_interval *= NSEC_PER_SEC; | |
2962 | ||
2963 | /* TEMPORARY boot arg to control App suppression */ | |
2964 | PE_parse_boot_argn("task_policy_suppression_disable", | |
2965 | &task_policy_suppression_disable, | |
2966 | sizeof(task_policy_suppression_disable)); | |
2967 | } | |
2968 | ||
2969 | /* | |
2970 | * Currently supported configurations for CPU limits. | |
2971 | * | |
2972 | * Policy | Deadline-based CPU limit | Percentage-based CPU limit | |
2973 | * -------------------------------------+--------------------------+------------------------------ | |
2974 | * PROC_POLICY_RSRCACT_THROTTLE | ENOTSUP | Task-wide scope only | |
2975 | * PROC_POLICY_RSRCACT_SUSPEND | Task-wide scope only | ENOTSUP | |
2976 | * PROC_POLICY_RSRCACT_TERMINATE | Task-wide scope only | ENOTSUP | |
2977 | * PROC_POLICY_RSRCACT_NOTIFY_KQ | Task-wide scope only | ENOTSUP | |
2978 | * PROC_POLICY_RSRCACT_NOTIFY_EXC | ENOTSUP | Per-thread scope only | |
2979 | * | |
2980 | * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed | |
2981 | * after the specified amount of wallclock time has elapsed. | |
2982 | * | |
2983 | * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time | |
2984 | * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an | |
2985 | * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads | |
2986 | * in the task are added together), or by any one thread in the task (so-called "per-thread" scope). | |
2987 | * | |
2988 | * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them | |
2989 | * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action | |
2990 | * after I have used some amount of CPU time; this is different than the recurring percentage/interval model) | |
2991 | * but the potential consumer of the API at the time was insisting on wallclock time instead. | |
2992 | * | |
2993 | * Currently, requesting notification via an exception is the only way to get per-thread scope for a | |
2994 | * CPU limit. All other types of notifications force task-wide scope for the limit. | |
2995 | */ | |
2996 | int | |
2997 | proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline, | |
2998 | int cpumon_entitled) | |
2999 | { | |
3000 | int error = 0; | |
3001 | int scope; | |
3002 | ||
3003 | /* | |
3004 | * Enforce the matrix of supported configurations for policy, percentage, and deadline. | |
3005 | */ | |
3006 | switch (policy) { | |
3007 | // If no policy is explicitly given, the default is to throttle. | |
3008 | case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE: | |
3009 | case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE: | |
3010 | if (deadline != 0) | |
3011 | return (ENOTSUP); | |
3012 | scope = TASK_RUSECPU_FLAGS_PROC_LIMIT; | |
3013 | break; | |
3014 | case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND: | |
3015 | case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE: | |
3016 | case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ: | |
3017 | if (percentage != 0) | |
3018 | return (ENOTSUP); | |
3019 | scope = TASK_RUSECPU_FLAGS_DEADLINE; | |
3020 | break; | |
3021 | case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC: | |
3022 | if (deadline != 0) | |
3023 | return (ENOTSUP); | |
3024 | scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT; | |
3025 | #ifdef CONFIG_NOMONITORS | |
3026 | return (error); | |
3027 | #endif /* CONFIG_NOMONITORS */ | |
3028 | break; | |
3029 | default: | |
3030 | return (EINVAL); | |
3031 | } | |
3032 | ||
3033 | task_lock(task); | |
3034 | if (task != current_task()) { | |
3035 | task->policy_ru_cpu_ext = policy; | |
3036 | } else { | |
3037 | task->policy_ru_cpu = policy; | |
3038 | } | |
3039 | error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled); | |
3040 | task_unlock(task); | |
3041 | return(error); | |
3042 | } | |
3043 | ||
3044 | int | |
3045 | proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled) | |
3046 | { | |
3047 | int error = 0; | |
3048 | int action; | |
3049 | void * bsdinfo = NULL; | |
3050 | ||
3051 | task_lock(task); | |
3052 | if (task != current_task()) { | |
3053 | task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT; | |
3054 | } else { | |
3055 | task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT; | |
3056 | } | |
3057 | ||
3058 | error = task_clear_cpuusage_locked(task, cpumon_entitled); | |
3059 | if (error != 0) | |
3060 | goto out; | |
3061 | ||
3062 | action = task->applied_ru_cpu; | |
3063 | if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { | |
3064 | /* reset action */ | |
3065 | task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; | |
3066 | } | |
3067 | if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { | |
3068 | bsdinfo = task->bsd_info; | |
3069 | task_unlock(task); | |
3070 | proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action); | |
3071 | goto out1; | |
3072 | } | |
3073 | ||
3074 | out: | |
3075 | task_unlock(task); | |
3076 | out1: | |
3077 | return(error); | |
3078 | ||
3079 | } | |
3080 | ||
3081 | /* used to apply resource limit related actions */ | |
3082 | static int | |
3083 | task_apply_resource_actions(task_t task, int type) | |
3084 | { | |
3085 | int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE; | |
3086 | void * bsdinfo = NULL; | |
3087 | ||
3088 | switch (type) { | |
3089 | case TASK_POLICY_CPU_RESOURCE_USAGE: | |
3090 | break; | |
3091 | case TASK_POLICY_WIREDMEM_RESOURCE_USAGE: | |
3092 | case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE: | |
3093 | case TASK_POLICY_DISK_RESOURCE_USAGE: | |
3094 | case TASK_POLICY_NETWORK_RESOURCE_USAGE: | |
3095 | case TASK_POLICY_POWER_RESOURCE_USAGE: | |
3096 | return(0); | |
3097 | ||
3098 | default: | |
3099 | return(1); | |
3100 | }; | |
3101 | ||
3102 | /* only cpu actions for now */ | |
3103 | task_lock(task); | |
3104 | ||
3105 | if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { | |
3106 | /* apply action */ | |
3107 | task->applied_ru_cpu_ext = task->policy_ru_cpu_ext; | |
3108 | action = task->applied_ru_cpu_ext; | |
3109 | } else { | |
3110 | action = task->applied_ru_cpu_ext; | |
3111 | } | |
3112 | ||
3113 | if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) { | |
3114 | bsdinfo = task->bsd_info; | |
3115 | task_unlock(task); | |
3116 | proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action); | |
3117 | } else | |
3118 | task_unlock(task); | |
3119 | ||
3120 | return(0); | |
3121 | } | |
3122 | ||
3123 | /* | |
3124 | * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API | |
3125 | * only allows for one at a time. This means that if there is a per-thread limit active, the other | |
3126 | * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest | |
3127 | * to the caller, and prefer that, but there's no need for that at the moment. | |
3128 | */ | |
3129 | int | |
3130 | task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope) | |
3131 | { | |
3132 | *percentagep = 0; | |
3133 | *intervalp = 0; | |
3134 | *deadlinep = 0; | |
3135 | ||
3136 | if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) { | |
3137 | *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT; | |
3138 | *percentagep = task->rusage_cpu_perthr_percentage; | |
3139 | *intervalp = task->rusage_cpu_perthr_interval; | |
3140 | } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) { | |
3141 | *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT; | |
3142 | *percentagep = task->rusage_cpu_percentage; | |
3143 | *intervalp = task->rusage_cpu_interval; | |
3144 | } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) { | |
3145 | *scope = TASK_RUSECPU_FLAGS_DEADLINE; | |
3146 | *deadlinep = task->rusage_cpu_deadline; | |
3147 | } else { | |
3148 | *scope = 0; | |
3149 | } | |
3150 | ||
3151 | return(0); | |
3152 | } | |
3153 | ||
3154 | /* | |
3155 | * Disable the CPU usage monitor for the task. Return value indicates | |
3156 | * if the mechanism was actually enabled. | |
3157 | */ | |
3158 | int | |
3159 | task_disable_cpumon(task_t task) { | |
3160 | thread_t thread; | |
3161 | ||
3162 | task_lock_assert_owned(task); | |
3163 | ||
3164 | if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) { | |
3165 | return (KERN_INVALID_ARGUMENT); | |
3166 | } | |
3167 | ||
3168 | #if CONFIG_TELEMETRY | |
3169 | /* | |
3170 | * Disable task-wide telemetry if it was ever enabled by the CPU usage | |
3171 | * monitor's warning zone. | |
3172 | */ | |
3173 | telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0); | |
3174 | #endif | |
3175 | ||
3176 | /* | |
3177 | * Disable the monitor for the task, and propagate that change to each thread. | |
3178 | */ | |
3179 | task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON); | |
3180 | queue_iterate(&task->threads, thread, thread_t, task_threads) { | |
3181 | set_astledger(thread); | |
3182 | } | |
3183 | task->rusage_cpu_perthr_percentage = 0; | |
3184 | task->rusage_cpu_perthr_interval = 0; | |
3185 | ||
3186 | return (KERN_SUCCESS); | |
3187 | } | |
3188 | ||
3189 | int | |
3190 | task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled) | |
3191 | { | |
3192 | thread_t thread; | |
3193 | uint64_t abstime = 0; | |
3194 | uint64_t limittime = 0; | |
3195 | ||
3196 | lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED); | |
3197 | ||
3198 | /* By default, refill once per second */ | |
3199 | if (interval == 0) | |
3200 | interval = NSEC_PER_SEC; | |
3201 | ||
3202 | if (percentage != 0) { | |
3203 | if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { | |
3204 | boolean_t warn = FALSE; | |
3205 | ||
3206 | /* | |
3207 | * A per-thread CPU limit on a task generates an exception | |
3208 | * (LEDGER_ACTION_EXCEPTION) if any one thread in the task | |
3209 | * exceeds the limit. | |
3210 | */ | |
3211 | ||
3212 | if (percentage == TASK_POLICY_CPUMON_DISABLE) { | |
3213 | if (cpumon_entitled) { | |
3214 | task_disable_cpumon(task); | |
3215 | return (0); | |
3216 | } | |
3217 | ||
3218 | /* | |
3219 | * This task wishes to disable the CPU usage monitor, but it's | |
3220 | * missing the required entitlement: | |
3221 | * com.apple.private.kernel.override-cpumon | |
3222 | * | |
3223 | * Instead, treat this as a request to reset its params | |
3224 | * back to the defaults. | |
3225 | */ | |
3226 | warn = TRUE; | |
3227 | percentage = TASK_POLICY_CPUMON_DEFAULTS; | |
3228 | } | |
3229 | ||
3230 | if (percentage == TASK_POLICY_CPUMON_DEFAULTS) { | |
3231 | percentage = proc_max_cpumon_percentage; | |
3232 | interval = proc_max_cpumon_interval; | |
3233 | } | |
3234 | ||
3235 | if (percentage > 100) { | |
3236 | percentage = 100; | |
3237 | } | |
3238 | ||
3239 | /* | |
3240 | * Passing in an interval of -1 means either: | |
3241 | * - Leave the interval as-is, if there's already a per-thread | |
3242 | * limit configured | |
3243 | * - Use the system default. | |
3244 | */ | |
3245 | if (interval == -1ULL) { | |
3246 | if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) { | |
3247 | interval = task->rusage_cpu_perthr_interval; | |
3248 | } else { | |
3249 | interval = proc_max_cpumon_interval; | |
3250 | } | |
3251 | } | |
3252 | ||
3253 | /* | |
3254 | * Enforce global caps on CPU usage monitor here if the process is not | |
3255 | * entitled to escape the global caps. | |
3256 | */ | |
3257 | if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) { | |
3258 | warn = TRUE; | |
3259 | percentage = proc_max_cpumon_percentage; | |
3260 | } | |
3261 | ||
3262 | if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) { | |
3263 | warn = TRUE; | |
3264 | interval = proc_max_cpumon_interval; | |
3265 | } | |
3266 | ||
3267 | if (warn) { | |
3268 | int pid = 0; | |
3269 | char *procname = (char *)"unknown"; | |
3270 | ||
3271 | #ifdef MACH_BSD | |
3272 | pid = proc_selfpid(); | |
3273 | if (current_task()->bsd_info != NULL) { | |
3274 | procname = proc_name_address(current_task()->bsd_info); | |
3275 | } | |
3276 | #endif | |
3277 | ||
3278 | printf("process %s[%d] denied attempt to escape CPU monitor" | |
3279 | " (missing required entitlement).\n", procname, pid); | |
3280 | } | |
3281 | ||
3282 | task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT; | |
3283 | task->rusage_cpu_perthr_percentage = percentage; | |
3284 | task->rusage_cpu_perthr_interval = interval; | |
3285 | queue_iterate(&task->threads, thread, thread_t, task_threads) { | |
3286 | set_astledger(thread); | |
3287 | } | |
3288 | } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) { | |
3289 | /* | |
3290 | * Currently, a proc-wide CPU limit always blocks if the limit is | |
3291 | * exceeded (LEDGER_ACTION_BLOCK). | |
3292 | */ | |
3293 | task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT; | |
3294 | task->rusage_cpu_percentage = percentage; | |
3295 | task->rusage_cpu_interval = interval; | |
3296 | ||
3297 | limittime = (interval * percentage) / 100; | |
3298 | nanoseconds_to_absolutetime(limittime, &abstime); | |
3299 | ||
3300 | ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0); | |
3301 | ledger_set_period(task->ledger, task_ledgers.cpu_time, interval); | |
3302 | ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK); | |
3303 | } | |
3304 | } | |
3305 | ||
3306 | if (deadline != 0) { | |
3307 | assert(scope == TASK_RUSECPU_FLAGS_DEADLINE); | |
3308 | ||
3309 | /* if already in use, cancel and wait for it to cleanout */ | |
3310 | if (task->rusage_cpu_callt != NULL) { | |
3311 | task_unlock(task); | |
3312 | thread_call_cancel_wait(task->rusage_cpu_callt); | |
3313 | task_lock(task); | |
3314 | } | |
3315 | if (task->rusage_cpu_callt == NULL) { | |
3316 | task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL); | |
3317 | } | |
3318 | /* setup callout */ | |
3319 | if (task->rusage_cpu_callt != 0) { | |
3320 | uint64_t save_abstime = 0; | |
3321 | ||
3322 | task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE; | |
3323 | task->rusage_cpu_deadline = deadline; | |
3324 | ||
3325 | nanoseconds_to_absolutetime(deadline, &abstime); | |
3326 | save_abstime = abstime; | |
3327 | clock_absolutetime_interval_to_deadline(save_abstime, &abstime); | |
3328 | thread_call_enter_delayed(task->rusage_cpu_callt, abstime); | |
3329 | } | |
3330 | } | |
3331 | ||
3332 | return(0); | |
3333 | } | |
3334 | ||
3335 | int | |
3336 | task_clear_cpuusage(task_t task, int cpumon_entitled) | |
3337 | { | |
3338 | int retval = 0; | |
3339 | ||
3340 | task_lock(task); | |
3341 | retval = task_clear_cpuusage_locked(task, cpumon_entitled); | |
3342 | task_unlock(task); | |
3343 | ||
3344 | return(retval); | |
3345 | } | |
3346 | ||
3347 | int | |
3348 | task_clear_cpuusage_locked(task_t task, int cpumon_entitled) | |
3349 | { | |
3350 | thread_call_t savecallt; | |
3351 | ||
3352 | /* cancel percentage handling if set */ | |
3353 | if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) { | |
3354 | task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT; | |
3355 | ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0); | |
3356 | task->rusage_cpu_percentage = 0; | |
3357 | task->rusage_cpu_interval = 0; | |
3358 | } | |
3359 | ||
3360 | /* | |
3361 | * Disable the CPU usage monitor. | |
3362 | */ | |
3363 | if (cpumon_entitled) { | |
3364 | task_disable_cpumon(task); | |
3365 | } | |
3366 | ||
3367 | /* cancel deadline handling if set */ | |
3368 | if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) { | |
3369 | task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE; | |
3370 | if (task->rusage_cpu_callt != 0) { | |
3371 | savecallt = task->rusage_cpu_callt; | |
3372 | task->rusage_cpu_callt = NULL; | |
3373 | task->rusage_cpu_deadline = 0; | |
3374 | task_unlock(task); | |
3375 | thread_call_cancel_wait(savecallt); | |
3376 | thread_call_free(savecallt); | |
3377 | task_lock(task); | |
3378 | } | |
3379 | } | |
3380 | return(0); | |
3381 | } | |
3382 | ||
3383 | /* called by ledger unit to enforce action due to resource usage criteria being met */ | |
3384 | void | |
3385 | task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1) | |
3386 | { | |
3387 | task_t task = (task_t)param0; | |
3388 | (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE); | |
3389 | return; | |
3390 | } | |
3391 | ||
3392 | ||
3393 | /* | |
3394 | * Routines for taskwatch and pidbind | |
3395 | */ | |
3396 | ||
3397 | ||
3398 | /* | |
3399 | * Routines for importance donation/inheritance/boosting | |
3400 | */ | |
3401 | ||
3402 | static void | |
3403 | task_importance_update_live_donor(task_t target_task) | |
3404 | { | |
3405 | #if IMPORTANCE_INHERITANCE | |
3406 | ||
3407 | ipc_importance_task_t task_imp; | |
3408 | ||
3409 | task_imp = ipc_importance_for_task(target_task, FALSE); | |
3410 | if (IIT_NULL != task_imp) { | |
3411 | ipc_importance_task_update_live_donor(task_imp); | |
3412 | ipc_importance_task_release(task_imp); | |
3413 | } | |
3414 | #endif /* IMPORTANCE_INHERITANCE */ | |
3415 | } | |
3416 | ||
3417 | void | |
3418 | task_importance_mark_donor(task_t task, boolean_t donating) | |
3419 | { | |
3420 | #if IMPORTANCE_INHERITANCE | |
3421 | ipc_importance_task_t task_imp; | |
3422 | ||
3423 | task_imp = ipc_importance_for_task(task, FALSE); | |
3424 | if (IIT_NULL != task_imp) { | |
3425 | ipc_importance_task_mark_donor(task_imp, donating); | |
3426 | ipc_importance_task_release(task_imp); | |
3427 | } | |
3428 | #endif /* IMPORTANCE_INHERITANCE */ | |
3429 | } | |
3430 | ||
3431 | void | |
3432 | task_importance_mark_live_donor(task_t task, boolean_t live_donating) | |
3433 | { | |
3434 | #if IMPORTANCE_INHERITANCE | |
3435 | ipc_importance_task_t task_imp; | |
3436 | ||
3437 | task_imp = ipc_importance_for_task(task, FALSE); | |
3438 | if (IIT_NULL != task_imp) { | |
3439 | ipc_importance_task_mark_live_donor(task_imp, live_donating); | |
3440 | ipc_importance_task_release(task_imp); | |
3441 | } | |
3442 | #endif /* IMPORTANCE_INHERITANCE */ | |
3443 | } | |
3444 | ||
3445 | void | |
3446 | task_importance_mark_receiver(task_t task, boolean_t receiving) | |
3447 | { | |
3448 | #if IMPORTANCE_INHERITANCE | |
3449 | ipc_importance_task_t task_imp; | |
3450 | ||
3451 | task_imp = ipc_importance_for_task(task, FALSE); | |
3452 | if (IIT_NULL != task_imp) { | |
3453 | ipc_importance_task_mark_receiver(task_imp, receiving); | |
3454 | ipc_importance_task_release(task_imp); | |
3455 | } | |
3456 | #endif /* IMPORTANCE_INHERITANCE */ | |
3457 | } | |
3458 | ||
3459 | void | |
3460 | task_importance_mark_denap_receiver(task_t task, boolean_t denap) | |
3461 | { | |
3462 | #if IMPORTANCE_INHERITANCE | |
3463 | ipc_importance_task_t task_imp; | |
3464 | ||
3465 | task_imp = ipc_importance_for_task(task, FALSE); | |
3466 | if (IIT_NULL != task_imp) { | |
3467 | ipc_importance_task_mark_denap_receiver(task_imp, denap); | |
3468 | ipc_importance_task_release(task_imp); | |
3469 | } | |
3470 | #endif /* IMPORTANCE_INHERITANCE */ | |
3471 | } | |
3472 | ||
3473 | void | |
3474 | task_importance_reset(__imp_only task_t task) | |
3475 | { | |
3476 | #if IMPORTANCE_INHERITANCE | |
3477 | ipc_importance_task_t task_imp; | |
3478 | ||
3479 | /* TODO: Lower importance downstream before disconnect */ | |
3480 | task_imp = task->task_imp_base; | |
3481 | ipc_importance_reset(task_imp, FALSE); | |
3482 | task_importance_update_live_donor(task); | |
3483 | #endif /* IMPORTANCE_INHERITANCE */ | |
3484 | } | |
3485 | ||
3486 | #if IMPORTANCE_INHERITANCE | |
3487 | ||
3488 | /* | |
3489 | * Sets the task boost bit to the provided value. Does NOT run the update function. | |
3490 | * | |
3491 | * Task lock must be held. | |
3492 | */ | |
3493 | void | |
3494 | task_set_boost_locked(task_t task, boolean_t boost_active) | |
3495 | { | |
3496 | #if IMPORTANCE_DEBUG | |
3497 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START), | |
3498 | proc_selfpid(), audit_token_pid_from_task(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0); | |
3499 | #endif | |
3500 | ||
3501 | task->requested_policy.t_boosted = boost_active; | |
3502 | ||
3503 | #if IMPORTANCE_DEBUG | |
3504 | if (boost_active == TRUE){ | |
3505 | DTRACE_BOOST2(boost, task_t, task, int, audit_token_pid_from_task(task)); | |
3506 | } else { | |
3507 | DTRACE_BOOST2(unboost, task_t, task, int, audit_token_pid_from_task(task)); | |
3508 | } | |
3509 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END), | |
3510 | proc_selfpid(), audit_token_pid_from_task(task), | |
3511 | trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0); | |
3512 | #endif | |
3513 | } | |
3514 | ||
3515 | /* | |
3516 | * Sets the task boost bit to the provided value and applies the update. | |
3517 | * | |
3518 | * Task lock must be held. Must call update complete after unlocking the task. | |
3519 | */ | |
3520 | void | |
3521 | task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token) | |
3522 | { | |
3523 | task_set_boost_locked(task, boost_active); | |
3524 | ||
3525 | task_policy_update_locked(task, THREAD_NULL, pend_token); | |
3526 | } | |
3527 | ||
3528 | /* | |
3529 | * Check if this task should donate importance. | |
3530 | * | |
3531 | * May be called without taking the task lock. In that case, donor status can change | |
3532 | * so you must check only once for each donation event. | |
3533 | */ | |
3534 | boolean_t | |
3535 | task_is_importance_donor(task_t task) | |
3536 | { | |
3537 | if (task->task_imp_base == IIT_NULL) | |
3538 | return FALSE; | |
3539 | return ipc_importance_task_is_donor(task->task_imp_base); | |
3540 | } | |
3541 | ||
3542 | /* | |
3543 | * Query the status of the task's donor mark. | |
3544 | */ | |
3545 | boolean_t | |
3546 | task_is_marked_importance_donor(task_t task) | |
3547 | { | |
3548 | if (task->task_imp_base == IIT_NULL) | |
3549 | return FALSE; | |
3550 | return ipc_importance_task_is_marked_donor(task->task_imp_base); | |
3551 | } | |
3552 | ||
3553 | /* | |
3554 | * Query the status of the task's live donor and donor mark. | |
3555 | */ | |
3556 | boolean_t | |
3557 | task_is_marked_live_importance_donor(task_t task) | |
3558 | { | |
3559 | if (task->task_imp_base == IIT_NULL) | |
3560 | return FALSE; | |
3561 | return ipc_importance_task_is_marked_live_donor(task->task_imp_base); | |
3562 | } | |
3563 | ||
3564 | ||
3565 | /* | |
3566 | * This routine may be called without holding task lock | |
3567 | * since the value of imp_receiver can never be unset. | |
3568 | */ | |
3569 | boolean_t | |
3570 | task_is_importance_receiver(task_t task) | |
3571 | { | |
3572 | if (task->task_imp_base == IIT_NULL) | |
3573 | return FALSE; | |
3574 | return ipc_importance_task_is_marked_receiver(task->task_imp_base); | |
3575 | } | |
3576 | ||
3577 | /* | |
3578 | * Query the task's receiver mark. | |
3579 | */ | |
3580 | boolean_t | |
3581 | task_is_marked_importance_receiver(task_t task) | |
3582 | { | |
3583 | if (task->task_imp_base == IIT_NULL) | |
3584 | return FALSE; | |
3585 | return ipc_importance_task_is_marked_receiver(task->task_imp_base); | |
3586 | } | |
3587 | ||
3588 | /* | |
3589 | * This routine may be called without holding task lock | |
3590 | * since the value of de-nap receiver can never be unset. | |
3591 | */ | |
3592 | boolean_t | |
3593 | task_is_importance_denap_receiver(task_t task) | |
3594 | { | |
3595 | if (task->task_imp_base == IIT_NULL) | |
3596 | return FALSE; | |
3597 | return ipc_importance_task_is_denap_receiver(task->task_imp_base); | |
3598 | } | |
3599 | ||
3600 | /* | |
3601 | * Query the task's de-nap receiver mark. | |
3602 | */ | |
3603 | boolean_t | |
3604 | task_is_marked_importance_denap_receiver(task_t task) | |
3605 | { | |
3606 | if (task->task_imp_base == IIT_NULL) | |
3607 | return FALSE; | |
3608 | return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base); | |
3609 | } | |
3610 | ||
3611 | /* | |
3612 | * This routine may be called without holding task lock | |
3613 | * since the value of imp_receiver can never be unset. | |
3614 | */ | |
3615 | boolean_t | |
3616 | task_is_importance_receiver_type(task_t task) | |
3617 | { | |
3618 | if (task->task_imp_base == IIT_NULL) | |
3619 | return FALSE; | |
3620 | return (task_is_importance_receiver(task) || | |
3621 | task_is_importance_denap_receiver(task)); | |
3622 | } | |
3623 | ||
3624 | /* | |
3625 | * External importance assertions are managed by the process in userspace | |
3626 | * Internal importance assertions are the responsibility of the kernel | |
3627 | * Assertions are changed from internal to external via task_importance_externalize_assertion | |
3628 | */ | |
3629 | ||
3630 | int | |
3631 | task_importance_hold_watchport_assertion(task_t target_task, uint32_t count) | |
3632 | { | |
3633 | ipc_importance_task_t task_imp; | |
3634 | kern_return_t ret; | |
3635 | ||
3636 | /* must already have set up an importance */ | |
3637 | task_imp = target_task->task_imp_base; | |
3638 | assert(IIT_NULL != task_imp); | |
3639 | ||
3640 | ret = ipc_importance_task_hold_internal_assertion(task_imp, count); | |
3641 | return (KERN_SUCCESS != ret) ? ENOTSUP : 0; | |
3642 | } | |
3643 | ||
3644 | int | |
3645 | task_importance_hold_internal_assertion(task_t target_task, uint32_t count) | |
3646 | { | |
3647 | ipc_importance_task_t task_imp; | |
3648 | kern_return_t ret; | |
3649 | ||
3650 | /* may be first time, so allow for possible importance setup */ | |
3651 | task_imp = ipc_importance_for_task(target_task, FALSE); | |
3652 | if (IIT_NULL == task_imp) { | |
3653 | return EOVERFLOW; | |
3654 | } | |
3655 | ret = ipc_importance_task_hold_internal_assertion(task_imp, count); | |
3656 | ipc_importance_task_release(task_imp); | |
3657 | ||
3658 | return (KERN_SUCCESS != ret) ? ENOTSUP : 0; | |
3659 | } | |
3660 | ||
3661 | int | |
3662 | task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count) | |
3663 | { | |
3664 | ipc_importance_task_t task_imp; | |
3665 | kern_return_t ret; | |
3666 | ||
3667 | /* may be first time, so allow for possible importance setup */ | |
3668 | task_imp = ipc_importance_for_task(target_task, FALSE); | |
3669 | if (IIT_NULL == task_imp) { | |
3670 | return EOVERFLOW; | |
3671 | } | |
3672 | ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count); | |
3673 | ipc_importance_task_release(task_imp); | |
3674 | ||
3675 | return (KERN_SUCCESS != ret) ? ENOTSUP : 0; | |
3676 | } | |
3677 | ||
3678 | int | |
3679 | task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count) | |
3680 | { | |
3681 | ipc_importance_task_t task_imp; | |
3682 | kern_return_t ret; | |
3683 | ||
3684 | /* must already have set up an importance */ | |
3685 | task_imp = target_task->task_imp_base; | |
3686 | if (IIT_NULL == task_imp) { | |
3687 | return EOVERFLOW; | |
3688 | } | |
3689 | ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count); | |
3690 | return (KERN_SUCCESS != ret) ? ENOTSUP : 0; | |
3691 | } | |
3692 | ||
3693 | int | |
3694 | task_importance_drop_internal_assertion(task_t target_task, uint32_t count) | |
3695 | { | |
3696 | ipc_importance_task_t task_imp; | |
3697 | kern_return_t ret; | |
3698 | ||
3699 | /* must already have set up an importance */ | |
3700 | task_imp = target_task->task_imp_base; | |
3701 | if (IIT_NULL == task_imp) { | |
3702 | return EOVERFLOW; | |
3703 | } | |
3704 | ret = ipc_importance_task_drop_internal_assertion(target_task->task_imp_base, count); | |
3705 | return (KERN_SUCCESS != ret) ? ENOTSUP : 0; | |
3706 | } | |
3707 | ||
3708 | int | |
3709 | task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count) | |
3710 | { | |
3711 | ipc_importance_task_t task_imp; | |
3712 | kern_return_t ret; | |
3713 | ||
3714 | /* must already have set up an importance */ | |
3715 | task_imp = target_task->task_imp_base; | |
3716 | if (IIT_NULL == task_imp) { | |
3717 | return EOVERFLOW; | |
3718 | } | |
3719 | ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count); | |
3720 | return (KERN_SUCCESS != ret) ? EOVERFLOW : 0; | |
3721 | } | |
3722 | ||
3723 | int | |
3724 | task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count) | |
3725 | { | |
3726 | ipc_importance_task_t task_imp; | |
3727 | kern_return_t ret; | |
3728 | ||
3729 | /* must already have set up an importance */ | |
3730 | task_imp = target_task->task_imp_base; | |
3731 | if (IIT_NULL == task_imp) { | |
3732 | return EOVERFLOW; | |
3733 | } | |
3734 | ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count); | |
3735 | return (KERN_SUCCESS != ret) ? EOVERFLOW : 0; | |
3736 | } | |
3737 | ||
3738 | static void | |
3739 | task_add_importance_watchport(task_t task, mach_port_t port, int *boostp) | |
3740 | { | |
3741 | int boost = 0; | |
3742 | ||
3743 | __impdebug_only int released_pid = 0; | |
3744 | __impdebug_only int pid = audit_token_pid_from_task(task); | |
3745 | ||
3746 | ipc_importance_task_t release_imp_task = IIT_NULL; | |
3747 | ||
3748 | if (IP_VALID(port) != 0) { | |
3749 | ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE); | |
3750 | ||
3751 | ip_lock(port); | |
3752 | ||
3753 | /* | |
3754 | * The port must have been marked tempowner already. | |
3755 | * This also filters out ports whose receive rights | |
3756 | * are already enqueued in a message, as you can't | |
3757 | * change the right's destination once it's already | |
3758 | * on its way. | |
3759 | */ | |
3760 | if (port->ip_tempowner != 0) { | |
3761 | assert(port->ip_impdonation != 0); | |
3762 | ||
3763 | boost = port->ip_impcount; | |
3764 | if (IIT_NULL != port->ip_imp_task) { | |
3765 | /* | |
3766 | * if this port is already bound to a task, | |
3767 | * release the task reference and drop any | |
3768 | * watchport-forwarded boosts | |
3769 | */ | |
3770 | release_imp_task = port->ip_imp_task; | |
3771 | port->ip_imp_task = IIT_NULL; | |
3772 | } | |
3773 | ||
3774 | /* mark the port is watching another task (reference held in port->ip_imp_task) */ | |
3775 | if (ipc_importance_task_is_marked_receiver(new_imp_task)) { | |
3776 | port->ip_imp_task = new_imp_task; | |
3777 | new_imp_task = IIT_NULL; | |
3778 | } | |
3779 | } | |
3780 | ip_unlock(port); | |
3781 | ||
3782 | if (IIT_NULL != new_imp_task) { | |
3783 | ipc_importance_task_release(new_imp_task); | |
3784 | } | |
3785 | ||
3786 | if (IIT_NULL != release_imp_task) { | |
3787 | if (boost > 0) | |
3788 | ipc_importance_task_drop_internal_assertion(release_imp_task, boost); | |
3789 | ||
3790 | // released_pid = audit_token_pid_from_task(release_imp_task); /* TODO: Need ref-safe way to get pid */ | |
3791 | ipc_importance_task_release(release_imp_task); | |
3792 | } | |
3793 | #if IMPORTANCE_DEBUG | |
3794 | KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE, | |
3795 | proc_selfpid(), pid, boost, released_pid, 0); | |
3796 | #endif /* IMPORTANCE_DEBUG */ | |
3797 | } | |
3798 | ||
3799 | *boostp = boost; | |
3800 | return; | |
3801 | } | |
3802 | ||
3803 | #endif /* IMPORTANCE_INHERITANCE */ | |
3804 | ||
3805 | /* | |
3806 | * Routines for VM to query task importance | |
3807 | */ | |
3808 | ||
3809 | ||
3810 | /* | |
3811 | * Order to be considered while estimating importance | |
3812 | * for low memory notification and purging purgeable memory. | |
3813 | */ | |
3814 | #define TASK_IMPORTANCE_FOREGROUND 4 | |
3815 | #define TASK_IMPORTANCE_NOTDARWINBG 1 | |
3816 | ||
3817 | ||
3818 | /* | |
3819 | * Checks if the task is already notified. | |
3820 | * | |
3821 | * Condition: task lock should be held while calling this function. | |
3822 | */ | |
3823 | boolean_t | |
3824 | task_has_been_notified(task_t task, int pressurelevel) | |
3825 | { | |
3826 | if (task == NULL) { | |
3827 | return FALSE; | |
3828 | } | |
3829 | ||
3830 | if (pressurelevel == kVMPressureWarning) | |
3831 | return (task->low_mem_notified_warn ? TRUE : FALSE); | |
3832 | else if (pressurelevel == kVMPressureCritical) | |
3833 | return (task->low_mem_notified_critical ? TRUE : FALSE); | |
3834 | else | |
3835 | return TRUE; | |
3836 | } | |
3837 | ||
3838 | ||
3839 | /* | |
3840 | * Checks if the task is used for purging. | |
3841 | * | |
3842 | * Condition: task lock should be held while calling this function. | |
3843 | */ | |
3844 | boolean_t | |
3845 | task_used_for_purging(task_t task, int pressurelevel) | |
3846 | { | |
3847 | if (task == NULL) { | |
3848 | return FALSE; | |
3849 | } | |
3850 | ||
3851 | if (pressurelevel == kVMPressureWarning) | |
3852 | return (task->purged_memory_warn ? TRUE : FALSE); | |
3853 | else if (pressurelevel == kVMPressureCritical) | |
3854 | return (task->purged_memory_critical ? TRUE : FALSE); | |
3855 | else | |
3856 | return TRUE; | |
3857 | } | |
3858 | ||
3859 | ||
3860 | /* | |
3861 | * Mark the task as notified with memory notification. | |
3862 | * | |
3863 | * Condition: task lock should be held while calling this function. | |
3864 | */ | |
3865 | void | |
3866 | task_mark_has_been_notified(task_t task, int pressurelevel) | |
3867 | { | |
3868 | if (task == NULL) { | |
3869 | return; | |
3870 | } | |
3871 | ||
3872 | if (pressurelevel == kVMPressureWarning) | |
3873 | task->low_mem_notified_warn = 1; | |
3874 | else if (pressurelevel == kVMPressureCritical) | |
3875 | task->low_mem_notified_critical = 1; | |
3876 | } | |
3877 | ||
3878 | ||
3879 | /* | |
3880 | * Mark the task as purged. | |
3881 | * | |
3882 | * Condition: task lock should be held while calling this function. | |
3883 | */ | |
3884 | void | |
3885 | task_mark_used_for_purging(task_t task, int pressurelevel) | |
3886 | { | |
3887 | if (task == NULL) { | |
3888 | return; | |
3889 | } | |
3890 | ||
3891 | if (pressurelevel == kVMPressureWarning) | |
3892 | task->purged_memory_warn = 1; | |
3893 | else if (pressurelevel == kVMPressureCritical) | |
3894 | task->purged_memory_critical = 1; | |
3895 | } | |
3896 | ||
3897 | ||
3898 | /* | |
3899 | * Mark the task eligible for low memory notification. | |
3900 | * | |
3901 | * Condition: task lock should be held while calling this function. | |
3902 | */ | |
3903 | void | |
3904 | task_clear_has_been_notified(task_t task, int pressurelevel) | |
3905 | { | |
3906 | if (task == NULL) { | |
3907 | return; | |
3908 | } | |
3909 | ||
3910 | if (pressurelevel == kVMPressureWarning) | |
3911 | task->low_mem_notified_warn = 0; | |
3912 | else if (pressurelevel == kVMPressureCritical) | |
3913 | task->low_mem_notified_critical = 0; | |
3914 | } | |
3915 | ||
3916 | ||
3917 | /* | |
3918 | * Mark the task eligible for purging its purgeable memory. | |
3919 | * | |
3920 | * Condition: task lock should be held while calling this function. | |
3921 | */ | |
3922 | void | |
3923 | task_clear_used_for_purging(task_t task) | |
3924 | { | |
3925 | if (task == NULL) { | |
3926 | return; | |
3927 | } | |
3928 | ||
3929 | task->purged_memory_warn = 0; | |
3930 | task->purged_memory_critical = 0; | |
3931 | } | |
3932 | ||
3933 | ||
3934 | /* | |
3935 | * Estimate task importance for purging its purgeable memory | |
3936 | * and low memory notification. | |
3937 | * | |
3938 | * Importance is calculated in the following order of criteria: | |
3939 | * -Task role : Background vs Foreground | |
3940 | * -Boost status: Not boosted vs Boosted | |
3941 | * -Darwin BG status. | |
3942 | * | |
3943 | * Returns: Estimated task importance. Less important task will have lower | |
3944 | * estimated importance. | |
3945 | */ | |
3946 | int | |
3947 | task_importance_estimate(task_t task) | |
3948 | { | |
3949 | int task_importance = 0; | |
3950 | ||
3951 | if (task == NULL) { | |
3952 | return 0; | |
3953 | } | |
3954 | ||
3955 | if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION) | |
3956 | task_importance += TASK_IMPORTANCE_FOREGROUND; | |
3957 | ||
3958 | if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0) | |
3959 | task_importance += TASK_IMPORTANCE_NOTDARWINBG; | |
3960 | ||
3961 | return task_importance; | |
3962 | } | |
3963 |