]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task_policy.c
xnu-3248.40.184.tar.gz
[apple/xnu.git] / osfmk / kern / task_policy.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/task_server.h>
31
32 #include <kern/sched.h>
33 #include <kern/task.h>
34 #include <mach/thread_policy.h>
35 #include <sys/errno.h>
36 #include <sys/resource.h>
37 #include <machine/limits.h>
38 #include <kern/ledger.h>
39 #include <kern/thread_call.h>
40 #include <kern/sfi.h>
41 #include <kern/coalition.h>
42 #if CONFIG_TELEMETRY
43 #include <kern/telemetry.h>
44 #endif
45
46 #if IMPORTANCE_INHERITANCE
47 #include <ipc/ipc_importance.h>
48 #if IMPORTANCE_DEBUG
49 #include <mach/machine/sdt.h>
50 #endif /* IMPORTANCE_DEBUG */
51 #endif /* IMPORTANCE_INHERITACE */
52
53 #include <sys/kdebug.h>
54
55 /*
56 * Task Policy
57 *
58 * This subsystem manages task and thread IO priority and backgrounding,
59 * as well as importance inheritance, process suppression, task QoS, and apptype.
60 * These properties have a suprising number of complex interactions, so they are
61 * centralized here in one state machine to simplify the implementation of those interactions.
62 *
63 * Architecture:
64 * Threads and tasks have three policy fields: requested, effective, and pending.
65 * Requested represents the wishes of each interface that influences task policy.
66 * Effective represents the distillation of that policy into a set of behaviors.
67 * Pending represents updates that haven't been applied yet.
68 *
69 * Each interface that has an input into the task policy state machine controls a field in requested.
70 * If the interface has a getter, it returns what is in the field in requested, but that is
71 * not necessarily what is actually in effect.
72 *
73 * All kernel subsystems that behave differently based on task policy call into
74 * the get_effective_policy function, which returns the decision of the task policy state machine
75 * for that subsystem by querying only the 'effective' field.
76 *
77 * Policy change operations:
78 * Here are the steps to change a policy on a task or thread:
79 * 1) Lock task
80 * 2) Change requested field for the relevant policy
81 * 3) Run a task policy update, which recalculates effective based on requested,
82 * then takes a diff between the old and new versions of requested and calls the relevant
83 * other subsystems to apply these changes, and updates the pending field.
84 * 4) Unlock task
85 * 5) Run task policy update complete, which looks at the pending field to update
86 * subsystems which cannot be touched while holding the task lock.
87 *
88 * To add a new requested policy, add the field in the requested struct, the flavor in task.h,
89 * the setter and getter in proc_(set|get)_task_policy*, and dump the state in task_requested_bitfield,
90 * then set up the effects of that behavior in task_policy_update*. If the policy manifests
91 * itself as a distinct effective policy, add it to the effective struct and add it to the
92 * proc_get_effective_policy accessor.
93 *
94 * Most policies are set via proc_set_task_policy, but policies that don't fit that interface
95 * roll their own lock/set/update/unlock/complete code inside this file.
96 *
97 *
98 * Suppression policy
99 *
100 * These are a set of behaviors that can be requested for a task. They currently have specific
101 * implied actions when they're enabled, but they may be made customizable in the future.
102 *
103 * When the affected task is boosted, we temporarily disable the suppression behaviors
104 * so that the affected process has a chance to run so it can call the API to permanently
105 * disable the suppression behaviors.
106 *
107 * Locking
108 *
109 * Changing task policy on a task or thread takes the task lock, and not the thread lock.
110 * TODO: Should changing policy on a thread take the thread lock instead?
111 *
112 * Querying the effective policy does not take the task lock, to prevent deadlocks or slowdown in sensitive code.
113 * This means that any notification of state change needs to be externally synchronized.
114 *
115 */
116
117 extern const qos_policy_params_t thread_qos_policy_params;
118
119 /* for task holds without dropping the lock */
120 extern void task_hold_locked(task_t task);
121 extern void task_release_locked(task_t task);
122 extern void task_wait_locked(task_t task, boolean_t until_not_runnable);
123
124 extern void thread_recompute_qos(thread_t thread);
125
126 /* Task policy related helper functions */
127 static void proc_set_task_policy_locked(task_t task, thread_t thread, int category, int flavor, int value);
128 static void proc_set_task_policy2_locked(task_t task, thread_t thread, int category, int flavor, int value1, int value2);
129
130 static void task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token);
131 static void task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token);
132 static void task_policy_update_task_locked(task_t task, boolean_t update_throttle, boolean_t update_bg_throttle, boolean_t update_sfi);
133 static void task_policy_update_thread_locked(thread_t thread, int update_cpu, boolean_t update_throttle, boolean_t update_sfi, boolean_t update_qos);
134
135 #if CONFIG_SCHED_SFI
136 static boolean_t task_policy_update_coalition_focal_tasks(task_t task, int prev_role, int next_role);
137 #endif
138
139 static int proc_get_effective_policy(task_t task, thread_t thread, int policy);
140
141 static void proc_iopol_to_tier(int iopolicy, int *tier, int *passive);
142 static int proc_tier_to_iopol(int tier, int passive);
143
144 static uintptr_t trequested_0(task_t task, thread_t thread);
145 static uintptr_t trequested_1(task_t task, thread_t thread);
146 static uintptr_t teffective_0(task_t task, thread_t thread);
147 static uintptr_t teffective_1(task_t task, thread_t thread);
148 static uint32_t tpending(task_pend_token_t pend_token);
149 static uint64_t task_requested_bitfield(task_t task, thread_t thread);
150 static uint64_t task_effective_bitfield(task_t task, thread_t thread);
151
152 void proc_get_thread_policy(thread_t thread, thread_policy_state_t info);
153
154 /* CPU Limits related helper functions */
155 static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope);
156 int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled);
157 static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled);
158 int task_disable_cpumon(task_t task);
159 static int task_apply_resource_actions(task_t task, int type);
160 void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1);
161 void proc_init_cpumon_params(void);
162
163 #ifdef MACH_BSD
164 int proc_pid(void *proc);
165 extern int proc_selfpid(void);
166 extern char * proc_name_address(void *p);
167 extern void rethrottle_thread(void * uthread);
168 extern void proc_apply_task_networkbg(void * bsd_info, thread_t thread);
169 #endif /* MACH_BSD */
170
171 extern zone_t thread_qos_override_zone;
172 static boolean_t _proc_thread_qos_remove_override_internal(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type, boolean_t reset);
173
174
175 /* Importance Inheritance related helper functions */
176
177 #if IMPORTANCE_INHERITANCE
178
179 static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp);
180 static void task_importance_update_live_donor(task_t target_task);
181
182 #endif /* IMPORTANCE_INHERITANCE */
183
184 #if IMPORTANCE_DEBUG
185 #define __impdebug_only
186 #else
187 #define __impdebug_only __unused
188 #endif
189
190 #if IMPORTANCE_INHERITANCE
191 #define __imp_only
192 #else
193 #define __imp_only __unused
194 #endif
195
196 #define TASK_LOCKED 1
197 #define TASK_UNLOCKED 0
198
199 #define DO_LOWPRI_CPU 1
200 #define UNDO_LOWPRI_CPU 2
201
202 /* Macros for making tracing simpler */
203
204 #define tpriority(task, thread) ((uintptr_t)(thread == THREAD_NULL ? (task->priority) : (thread->base_pri)))
205 #define tisthread(thread) (thread == THREAD_NULL ? TASK_POLICY_TASK : TASK_POLICY_THREAD)
206 #define targetid(task, thread) ((uintptr_t)(thread == THREAD_NULL ? (task_pid(task)) : (thread->thread_id)))
207
208 /*
209 * Default parameters for certain policies
210 */
211
212 int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1;
213 int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1;
214 int proc_tal_disk_tier = THROTTLE_LEVEL_TIER1;
215
216 int proc_graphics_timer_qos = (LATENCY_QOS_TIER_0 & 0xFF);
217
218 const int proc_default_bg_iotier = THROTTLE_LEVEL_TIER2;
219
220 /* Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation */
221 const struct task_requested_policy default_task_requested_policy = {
222 .bg_iotier = proc_default_bg_iotier
223 };
224 const struct task_effective_policy default_task_effective_policy = {};
225 const struct task_pended_policy default_task_pended_policy = {};
226
227 /*
228 * Default parameters for CPU usage monitor.
229 *
230 * Default setting is 50% over 3 minutes.
231 */
232 #define DEFAULT_CPUMON_PERCENTAGE 50
233 #define DEFAULT_CPUMON_INTERVAL (3 * 60)
234
235 uint8_t proc_max_cpumon_percentage;
236 uint64_t proc_max_cpumon_interval;
237
238 kern_return_t
239 qos_latency_policy_validate(task_latency_qos_t ltier) {
240 if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) &&
241 ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0)))
242 return KERN_INVALID_ARGUMENT;
243
244 return KERN_SUCCESS;
245 }
246
247 kern_return_t
248 qos_throughput_policy_validate(task_throughput_qos_t ttier) {
249 if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) &&
250 ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0)))
251 return KERN_INVALID_ARGUMENT;
252
253 return KERN_SUCCESS;
254 }
255
256 static kern_return_t
257 task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count) {
258 if (count < TASK_QOS_POLICY_COUNT)
259 return KERN_INVALID_ARGUMENT;
260
261 task_latency_qos_t ltier = qosinfo->task_latency_qos_tier;
262 task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier;
263
264 kern_return_t kr = qos_latency_policy_validate(ltier);
265
266 if (kr != KERN_SUCCESS)
267 return kr;
268
269 kr = qos_throughput_policy_validate(ttier);
270
271 return kr;
272 }
273
274 uint32_t
275 qos_extract(uint32_t qv) {
276 return (qv & 0xFF);
277 }
278
279 uint32_t
280 qos_latency_policy_package(uint32_t qv) {
281 return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv);
282 }
283
284 uint32_t
285 qos_throughput_policy_package(uint32_t qv) {
286 return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv);
287 }
288
289 /* TEMPORARY boot-arg controlling task_policy suppression (App Nap) */
290 static boolean_t task_policy_suppression_disable = FALSE;
291
292 kern_return_t
293 task_policy_set(
294 task_t task,
295 task_policy_flavor_t flavor,
296 task_policy_t policy_info,
297 mach_msg_type_number_t count)
298 {
299 kern_return_t result = KERN_SUCCESS;
300
301 if (task == TASK_NULL || task == kernel_task)
302 return (KERN_INVALID_ARGUMENT);
303
304 switch (flavor) {
305
306 case TASK_CATEGORY_POLICY: {
307 task_category_policy_t info = (task_category_policy_t)policy_info;
308
309 if (count < TASK_CATEGORY_POLICY_COUNT)
310 return (KERN_INVALID_ARGUMENT);
311
312
313 switch(info->role) {
314 case TASK_FOREGROUND_APPLICATION:
315 case TASK_BACKGROUND_APPLICATION:
316 case TASK_DEFAULT_APPLICATION:
317 proc_set_task_policy(task, THREAD_NULL,
318 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
319 info->role);
320 break;
321
322 case TASK_CONTROL_APPLICATION:
323 if (task != current_task() || task->sec_token.val[0] != 0)
324 result = KERN_INVALID_ARGUMENT;
325 else
326 proc_set_task_policy(task, THREAD_NULL,
327 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
328 info->role);
329 break;
330
331 case TASK_GRAPHICS_SERVER:
332 /* TODO: Restrict this role to FCFS <rdar://problem/12552788> */
333 if (task != current_task() || task->sec_token.val[0] != 0)
334 result = KERN_INVALID_ARGUMENT;
335 else
336 proc_set_task_policy(task, THREAD_NULL,
337 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
338 info->role);
339 break;
340 default:
341 result = KERN_INVALID_ARGUMENT;
342 break;
343 } /* switch (info->role) */
344
345 break;
346 }
347
348 /* Desired energy-efficiency/performance "quality-of-service" */
349 case TASK_BASE_QOS_POLICY:
350 case TASK_OVERRIDE_QOS_POLICY:
351 {
352 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
353 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
354
355 if (kr != KERN_SUCCESS)
356 return kr;
357
358
359 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
360 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
361
362 proc_set_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
363 flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS,
364 lqos, tqos);
365 }
366 break;
367
368 case TASK_BASE_LATENCY_QOS_POLICY:
369 {
370 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
371 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
372
373 if (kr != KERN_SUCCESS)
374 return kr;
375
376 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
377
378 proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, lqos);
379 }
380 break;
381
382 case TASK_BASE_THROUGHPUT_QOS_POLICY:
383 {
384 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
385 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
386
387 if (kr != KERN_SUCCESS)
388 return kr;
389
390 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
391
392 proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, tqos);
393 }
394 break;
395
396 case TASK_SUPPRESSION_POLICY:
397 {
398
399 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
400
401 if (count < TASK_SUPPRESSION_POLICY_COUNT)
402 return (KERN_INVALID_ARGUMENT);
403
404 struct task_qos_policy qosinfo;
405
406 qosinfo.task_latency_qos_tier = info->timer_throttle;
407 qosinfo.task_throughput_qos_tier = info->throughput_qos;
408
409 kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT);
410
411 if (kr != KERN_SUCCESS)
412 return kr;
413
414 /* TEMPORARY disablement of task suppression */
415 if (task_policy_suppression_disable && info->active)
416 return KERN_SUCCESS;
417
418 struct task_pend_token pend_token = {};
419
420 task_lock(task);
421
422 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
423 (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START,
424 proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL),
425 trequested_1(task, THREAD_NULL), 0);
426
427 task->requested_policy.t_sup_active = (info->active) ? 1 : 0;
428 task->requested_policy.t_sup_lowpri_cpu = (info->lowpri_cpu) ? 1 : 0;
429 task->requested_policy.t_sup_timer = qos_extract(info->timer_throttle);
430 task->requested_policy.t_sup_disk = (info->disk_throttle) ? 1 : 0;
431 task->requested_policy.t_sup_cpu_limit = (info->cpu_limit) ? 1 : 0;
432 task->requested_policy.t_sup_suspend = (info->suspend) ? 1 : 0;
433 task->requested_policy.t_sup_throughput = qos_extract(info->throughput_qos);
434 task->requested_policy.t_sup_cpu = (info->suppressed_cpu) ? 1 : 0;
435 task->requested_policy.t_sup_bg_sockets = (info->background_sockets) ? 1 : 0;
436
437 task_policy_update_locked(task, THREAD_NULL, &pend_token);
438
439 task_unlock(task);
440
441 task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token);
442
443 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
444 (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END,
445 proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL),
446 trequested_1(task, THREAD_NULL), 0);
447
448 break;
449
450 }
451
452 default:
453 result = KERN_INVALID_ARGUMENT;
454 break;
455 }
456
457 return (result);
458 }
459
460 /* Sets BSD 'nice' value on the task */
461 kern_return_t
462 task_importance(
463 task_t task,
464 integer_t importance)
465 {
466 if (task == TASK_NULL || task == kernel_task)
467 return (KERN_INVALID_ARGUMENT);
468
469 task_lock(task);
470
471 if (!task->active) {
472 task_unlock(task);
473
474 return (KERN_TERMINATED);
475 }
476
477 if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) {
478 task_unlock(task);
479
480 return (KERN_INVALID_ARGUMENT);
481 }
482
483 task->importance = importance;
484
485 /* TODO: tracepoint? */
486
487 /* Redrive only the task priority calculation */
488 task_policy_update_task_locked(task, FALSE, FALSE, FALSE);
489
490 task_unlock(task);
491
492 return (KERN_SUCCESS);
493 }
494
495 kern_return_t
496 task_policy_get(
497 task_t task,
498 task_policy_flavor_t flavor,
499 task_policy_t policy_info,
500 mach_msg_type_number_t *count,
501 boolean_t *get_default)
502 {
503 if (task == TASK_NULL || task == kernel_task)
504 return (KERN_INVALID_ARGUMENT);
505
506 switch (flavor) {
507
508 case TASK_CATEGORY_POLICY:
509 {
510 task_category_policy_t info = (task_category_policy_t)policy_info;
511
512 if (*count < TASK_CATEGORY_POLICY_COUNT)
513 return (KERN_INVALID_ARGUMENT);
514
515 if (*get_default)
516 info->role = TASK_UNSPECIFIED;
517 else
518 info->role = proc_get_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
519 break;
520 }
521
522 case TASK_BASE_QOS_POLICY: /* FALLTHRU */
523 case TASK_OVERRIDE_QOS_POLICY:
524 {
525 task_qos_policy_t info = (task_qos_policy_t)policy_info;
526
527 if (*count < TASK_QOS_POLICY_COUNT)
528 return (KERN_INVALID_ARGUMENT);
529
530 if (*get_default) {
531 info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED;
532 info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED;
533 } else if (flavor == TASK_BASE_QOS_POLICY) {
534 int value1, value2;
535
536 proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
537
538 info->task_latency_qos_tier = qos_latency_policy_package(value1);
539 info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
540
541 } else if (flavor == TASK_OVERRIDE_QOS_POLICY) {
542 int value1, value2;
543
544 proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
545
546 info->task_latency_qos_tier = qos_latency_policy_package(value1);
547 info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
548 }
549
550 break;
551 }
552
553 case TASK_POLICY_STATE:
554 {
555 task_policy_state_t info = (task_policy_state_t)policy_info;
556
557 if (*count < TASK_POLICY_STATE_COUNT)
558 return (KERN_INVALID_ARGUMENT);
559
560 /* Only root can get this info */
561 if (current_task()->sec_token.val[0] != 0)
562 return KERN_PROTECTION_FAILURE;
563
564 if (*get_default) {
565 info->requested = 0;
566 info->effective = 0;
567 info->pending = 0;
568 info->imp_assertcnt = 0;
569 info->imp_externcnt = 0;
570 info->flags = 0;
571 info->imp_transitions = 0;
572 } else {
573 task_lock(task);
574
575 info->requested = task_requested_bitfield(task, THREAD_NULL);
576 info->effective = task_effective_bitfield(task, THREAD_NULL);
577 info->pending = 0;
578
579 info->flags = 0;
580 if (task->task_imp_base != NULL) {
581 info->imp_assertcnt = task->task_imp_base->iit_assertcnt;
582 info->imp_externcnt = IIT_EXTERN(task->task_imp_base);
583 info->flags |= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : 0);
584 info->flags |= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : 0);
585 info->flags |= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : 0);
586 info->flags |= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : 0);
587 info->imp_transitions = task->task_imp_base->iit_transitions;
588 } else {
589 info->imp_assertcnt = 0;
590 info->imp_externcnt = 0;
591 info->imp_transitions = 0;
592 }
593 task_unlock(task);
594 }
595
596 info->reserved[0] = 0;
597 info->reserved[1] = 0;
598
599 break;
600 }
601
602 case TASK_SUPPRESSION_POLICY:
603 {
604 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
605
606 if (*count < TASK_SUPPRESSION_POLICY_COUNT)
607 return (KERN_INVALID_ARGUMENT);
608
609 task_lock(task);
610
611 if (*get_default) {
612 info->active = 0;
613 info->lowpri_cpu = 0;
614 info->timer_throttle = LATENCY_QOS_TIER_UNSPECIFIED;
615 info->disk_throttle = 0;
616 info->cpu_limit = 0;
617 info->suspend = 0;
618 info->throughput_qos = 0;
619 info->suppressed_cpu = 0;
620 } else {
621 info->active = task->requested_policy.t_sup_active;
622 info->lowpri_cpu = task->requested_policy.t_sup_lowpri_cpu;
623 info->timer_throttle = qos_latency_policy_package(task->requested_policy.t_sup_timer);
624 info->disk_throttle = task->requested_policy.t_sup_disk;
625 info->cpu_limit = task->requested_policy.t_sup_cpu_limit;
626 info->suspend = task->requested_policy.t_sup_suspend;
627 info->throughput_qos = qos_throughput_policy_package(task->requested_policy.t_sup_throughput);
628 info->suppressed_cpu = task->requested_policy.t_sup_cpu;
629 info->background_sockets = task->requested_policy.t_sup_bg_sockets;
630 }
631
632 task_unlock(task);
633 break;
634 }
635
636 default:
637 return (KERN_INVALID_ARGUMENT);
638 }
639
640 return (KERN_SUCCESS);
641 }
642
643 /*
644 * Called at task creation
645 * We calculate the correct effective but don't apply it to anything yet.
646 * The threads, etc will inherit from the task as they get created.
647 */
648 void
649 task_policy_create(task_t task, int parent_boosted)
650 {
651 if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
652 if (parent_boosted) {
653 task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
654 task_importance_mark_donor(task, TRUE);
655 } else {
656 task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
657 task_importance_mark_receiver(task, FALSE);
658 }
659 }
660
661 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
662 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START,
663 task_pid(task), teffective_0(task, THREAD_NULL),
664 teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0);
665
666 task_policy_update_internal_locked(task, THREAD_NULL, TRUE, NULL);
667
668 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
669 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END,
670 task_pid(task), teffective_0(task, THREAD_NULL),
671 teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0);
672
673 task_importance_update_live_donor(task);
674 task_policy_update_task_locked(task, FALSE, FALSE, FALSE);
675 }
676
677 void
678 thread_policy_create(thread_t thread)
679 {
680 task_t task = thread->task;
681
682 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
683 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
684 targetid(task, thread), teffective_0(task, thread),
685 teffective_1(task, thread), tpriority(task, thread), 0);
686
687 task_policy_update_internal_locked(task, thread, TRUE, NULL);
688
689 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
690 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
691 targetid(task, thread), teffective_0(task, thread),
692 teffective_1(task, thread), tpriority(task, thread), 0);
693 }
694
695 static void
696 task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token)
697 {
698 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
699 (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread)) | DBG_FUNC_START),
700 targetid(task, thread), teffective_0(task, thread),
701 teffective_1(task, thread), tpriority(task, thread), 0);
702
703 task_policy_update_internal_locked(task, thread, FALSE, pend_token);
704
705 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
706 (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread))) | DBG_FUNC_END,
707 targetid(task, thread), teffective_0(task, thread),
708 teffective_1(task, thread), tpriority(task, thread), 0);
709 }
710
711 /*
712 * One state update function TO RULE THEM ALL
713 *
714 * This function updates the task or thread effective policy fields
715 * and pushes the results to the relevant subsystems.
716 *
717 * Must call update_complete after unlocking the task,
718 * as some subsystems cannot be updated while holding the task lock.
719 *
720 * Called with task locked, not thread
721 */
722
723 static void
724 task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token)
725 {
726 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
727
728 /*
729 * Step 1:
730 * Gather requested policy
731 */
732
733 struct task_requested_policy requested =
734 (on_task) ? task->requested_policy : thread->requested_policy;
735
736
737 /*
738 * Step 2:
739 * Calculate new effective policies from requested policy and task state
740 * Rules:
741 * If in an 'on_task' block, must only look at and set fields starting with t_
742 * If operating on a task, don't touch anything starting with th_
743 * If operating on a thread, don't touch anything starting with t_
744 * Don't change requested, it won't take effect
745 */
746
747 struct task_effective_policy next = {};
748 struct task_effective_policy task_effective;
749
750 /* Calculate QoS policies */
751
752 if (on_task) {
753 /* Update task role */
754 next.t_role = requested.t_role;
755
756 /* Set task qos clamp and ceiling */
757 next.t_qos_clamp = requested.t_qos_clamp;
758
759 if (requested.t_apptype == TASK_APPTYPE_APP_DEFAULT ||
760 requested.t_apptype == TASK_APPTYPE_APP_TAL) {
761
762 switch (next.t_role) {
763 case TASK_FOREGROUND_APPLICATION:
764 /* Foreground apps get urgent scheduler priority */
765 next.qos_ui_is_urgent = 1;
766 next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
767 break;
768
769 case TASK_BACKGROUND_APPLICATION:
770 /* This is really 'non-focal but on-screen' */
771 next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
772 break;
773
774 case TASK_DEFAULT_APPLICATION:
775 /* This is 'may render UI but we don't know if it's focal/nonfocal' */
776 next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
777 break;
778
779 case TASK_NONUI_APPLICATION:
780 /* i.e. 'off-screen' */
781 next.t_qos_ceiling = THREAD_QOS_LEGACY;
782 break;
783
784 case TASK_CONTROL_APPLICATION:
785 case TASK_GRAPHICS_SERVER:
786 next.qos_ui_is_urgent = 1;
787 next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
788 break;
789
790 case TASK_THROTTLE_APPLICATION:
791 /* i.e. 'TAL launch' */
792 next.t_qos_ceiling = THREAD_QOS_UTILITY;
793 break;
794
795 case TASK_UNSPECIFIED:
796 default:
797 /* Apps that don't have an application role get
798 * USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */
799 next.t_qos_ceiling = THREAD_QOS_LEGACY;
800 break;
801 }
802 } else {
803 /* Daemons get USER_INTERACTIVE squashed to USER_INITIATED */
804 next.t_qos_ceiling = THREAD_QOS_USER_INITIATED;
805 }
806 } else {
807 /*
808 * Set thread qos tier
809 * Note that an override only overrides the QoS field, not other policy settings.
810 * A thread must already be participating in QoS for override to take effect
811 */
812
813 /* Snapshot the task's effective policy */
814 task_effective = task->effective_policy;
815
816 next.qos_ui_is_urgent = task_effective.qos_ui_is_urgent;
817
818 if ((requested.thrp_qos_override != THREAD_QOS_UNSPECIFIED) && (requested.thrp_qos != THREAD_QOS_UNSPECIFIED))
819 next.thep_qos = MAX(requested.thrp_qos_override, requested.thrp_qos);
820 else
821 next.thep_qos = requested.thrp_qos;
822
823 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
824 if (task_effective.t_qos_clamp != THREAD_QOS_UNSPECIFIED) {
825 if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
826 next.thep_qos = MIN(task_effective.t_qos_clamp, next.thep_qos);
827 else
828 next.thep_qos = task_effective.t_qos_clamp;
829 }
830
831 /* The ceiling only applies to threads that are in the QoS world */
832 if (task_effective.t_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
833 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
834 next.thep_qos = MIN(task_effective.t_qos_ceiling, next.thep_qos);
835 }
836
837 /*
838 * The QoS relative priority is only applicable when the original programmer's
839 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
840 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
841 * since otherwise it would be lower than unclamped threads. Similarly, in the
842 * presence of boosting, the programmer doesn't know what other actors
843 * are boosting the thread.
844 */
845 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
846 (requested.thrp_qos == next.thep_qos) &&
847 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
848 next.thep_qos_relprio = requested.thrp_qos_relprio;
849 } else {
850 next.thep_qos_relprio = 0;
851 }
852 }
853
854 /* Calculate DARWIN_BG */
855 boolean_t wants_darwinbg = FALSE;
856 boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */
857 boolean_t wants_watchersbg = FALSE; /* Do I want my pidbound threads to be bg */
858
859 /*
860 * If DARWIN_BG has been requested at either level, it's engaged.
861 * Only true DARWIN_BG changes cause watchers to transition.
862 *
863 * Backgrounding due to apptype does.
864 */
865 if (requested.int_darwinbg || requested.ext_darwinbg)
866 wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE;
867
868 if (on_task) {
869 /* Background TAL apps are throttled when TAL is enabled */
870 if (requested.t_apptype == TASK_APPTYPE_APP_TAL &&
871 requested.t_role == TASK_BACKGROUND_APPLICATION &&
872 requested.t_tal_enabled == 1) {
873 next.t_tal_engaged = 1;
874 }
875
876 if ((requested.t_apptype == TASK_APPTYPE_APP_DEFAULT ||
877 requested.t_apptype == TASK_APPTYPE_APP_TAL) &&
878 requested.t_role == TASK_THROTTLE_APPLICATION) {
879 next.t_tal_engaged = 1;
880 }
881
882 /* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */
883 if (requested.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
884 requested.t_boosted == 0)
885 wants_darwinbg = TRUE;
886
887 /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */
888 if (requested.t_apptype == TASK_APPTYPE_DAEMON_BACKGROUND)
889 wants_darwinbg = TRUE;
890
891 if (next.t_qos_clamp == THREAD_QOS_BACKGROUND || next.t_qos_clamp == THREAD_QOS_MAINTENANCE)
892 wants_darwinbg = TRUE;
893 } else {
894 if (requested.th_pidbind_bg)
895 wants_all_sockets_bg = wants_darwinbg = TRUE;
896
897 if (requested.th_workq_bg)
898 wants_darwinbg = TRUE;
899
900 if (next.thep_qos == THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_MAINTENANCE)
901 wants_darwinbg = TRUE;
902 }
903
904 /* Calculate side effects of DARWIN_BG */
905
906 if (wants_darwinbg) {
907 next.darwinbg = 1;
908 /* darwinbg threads/tasks always create bg sockets, but we don't always loop over all sockets */
909 next.new_sockets_bg = 1;
910 next.lowpri_cpu = 1;
911 }
912
913 if (wants_all_sockets_bg)
914 next.all_sockets_bg = 1;
915
916 if (on_task && wants_watchersbg)
917 next.t_watchers_bg = 1;
918
919 /* darwinbg on either task or thread implies background QOS (or lower) */
920 if (!on_task &&
921 (wants_darwinbg || task_effective.darwinbg) &&
922 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)){
923 next.thep_qos = THREAD_QOS_BACKGROUND;
924 next.thep_qos_relprio = 0;
925 }
926
927 /* Calculate low CPU priority */
928
929 boolean_t wants_lowpri_cpu = FALSE;
930
931 if (wants_darwinbg)
932 wants_lowpri_cpu = TRUE;
933
934 if (next.t_tal_engaged)
935 wants_lowpri_cpu = TRUE;
936
937 if (on_task && requested.t_sup_lowpri_cpu && requested.t_boosted == 0)
938 wants_lowpri_cpu = TRUE;
939
940 if (wants_lowpri_cpu)
941 next.lowpri_cpu = 1;
942
943 /* Calculate IO policy */
944
945 /* Update BG IO policy (so we can see if it has changed) */
946 next.bg_iotier = requested.bg_iotier;
947
948 int iopol = THROTTLE_LEVEL_TIER0;
949
950 if (wants_darwinbg)
951 iopol = MAX(iopol, requested.bg_iotier);
952
953 if (on_task) {
954 if (requested.t_apptype == TASK_APPTYPE_DAEMON_STANDARD)
955 iopol = MAX(iopol, proc_standard_daemon_tier);
956
957 if (requested.t_sup_disk && requested.t_boosted == 0)
958 iopol = MAX(iopol, proc_suppressed_disk_tier);
959
960 if (next.t_tal_engaged)
961 iopol = MAX(iopol, proc_tal_disk_tier);
962
963 if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
964 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.t_qos_clamp]);
965
966 } else {
967 /* Look up the associated IO tier value for the QoS class */
968 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
969 }
970
971 iopol = MAX(iopol, requested.int_iotier);
972 iopol = MAX(iopol, requested.ext_iotier);
973
974 next.io_tier = iopol;
975
976 /* Calculate Passive IO policy */
977
978 if (requested.ext_iopassive || requested.int_iopassive)
979 next.io_passive = 1;
980
981 /* Calculate miscellaneous policy */
982
983 if (on_task) {
984 /* Calculate suppression-active flag */
985 if (requested.t_sup_active && requested.t_boosted == 0)
986 next.t_sup_active = 1;
987
988 /* Calculate suspend policy */
989 if (requested.t_sup_suspend && requested.t_boosted == 0)
990 next.t_suspended = 1;
991
992 /* Calculate timer QOS */
993 int latency_qos = requested.t_base_latency_qos;
994
995 if (requested.t_sup_timer && requested.t_boosted == 0)
996 latency_qos = requested.t_sup_timer;
997
998 if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
999 latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.t_qos_clamp]);
1000
1001 if (requested.t_over_latency_qos != 0)
1002 latency_qos = requested.t_over_latency_qos;
1003
1004 /* Treat the windowserver special */
1005 if (requested.t_role == TASK_GRAPHICS_SERVER)
1006 latency_qos = proc_graphics_timer_qos;
1007
1008 next.t_latency_qos = latency_qos;
1009
1010 /* Calculate throughput QOS */
1011 int through_qos = requested.t_base_through_qos;
1012
1013 if (requested.t_sup_throughput && requested.t_boosted == 0)
1014 through_qos = requested.t_sup_throughput;
1015
1016 if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
1017 through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.t_qos_clamp]);
1018
1019 if (requested.t_over_through_qos != 0)
1020 through_qos = requested.t_over_through_qos;
1021
1022 next.t_through_qos = through_qos;
1023
1024 /* Calculate suppressed CPU priority */
1025 if (requested.t_sup_cpu && requested.t_boosted == 0)
1026 next.t_suppressed_cpu = 1;
1027
1028 /*
1029 * Calculate background sockets
1030 * Don't take into account boosting to limit transition frequency.
1031 */
1032 if (requested.t_sup_bg_sockets){
1033 next.all_sockets_bg = 1;
1034 next.new_sockets_bg = 1;
1035 }
1036
1037 /* Apply SFI Managed class bit */
1038 next.t_sfi_managed = requested.t_sfi_managed;
1039
1040 /* Calculate 'live donor' status for live importance */
1041 switch (requested.t_apptype) {
1042 case TASK_APPTYPE_APP_TAL:
1043 case TASK_APPTYPE_APP_DEFAULT:
1044 if (requested.ext_darwinbg == 0)
1045 next.t_live_donor = 1;
1046 else
1047 next.t_live_donor = 0;
1048 break;
1049
1050 case TASK_APPTYPE_DAEMON_INTERACTIVE:
1051 case TASK_APPTYPE_DAEMON_STANDARD:
1052 case TASK_APPTYPE_DAEMON_ADAPTIVE:
1053 case TASK_APPTYPE_DAEMON_BACKGROUND:
1054 default:
1055 next.t_live_donor = 0;
1056 break;
1057 }
1058 }
1059
1060 if (requested.terminated) {
1061 /*
1062 * Shoot down the throttles that slow down exit or response to SIGTERM
1063 * We don't need to shoot down:
1064 * passive (don't want to cause others to throttle)
1065 * all_sockets_bg (don't need to iterate FDs on every exit)
1066 * new_sockets_bg (doesn't matter for exiting process)
1067 * pidsuspend (jetsam-ed BG process shouldn't run again)
1068 * watchers_bg (watcher threads don't need to be unthrottled)
1069 * t_latency_qos (affects userspace timers only)
1070 */
1071
1072 next.terminated = 1;
1073 next.darwinbg = 0;
1074 next.lowpri_cpu = 0;
1075 next.io_tier = THROTTLE_LEVEL_TIER0;
1076 if (on_task) {
1077 next.t_tal_engaged = 0;
1078 next.t_role = TASK_UNSPECIFIED;
1079 next.t_suppressed_cpu = 0;
1080
1081 /* TODO: This should only be shot down on SIGTERM, not exit */
1082 next.t_suspended = 0;
1083 } else {
1084 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1085 }
1086 }
1087
1088 /*
1089 * Step 3:
1090 * Swap out old policy for new policy
1091 */
1092
1093 if (!on_task) {
1094 /* Acquire thread mutex to synchronize against
1095 * thread_policy_set(). Consider reworking to separate qos
1096 * fields, or locking the task in thread_policy_set.
1097 * A more efficient model would be to make the thread bits
1098 * authoritative.
1099 */
1100 thread_mtx_lock(thread);
1101 }
1102
1103 struct task_effective_policy prev =
1104 (on_task) ? task->effective_policy : thread->effective_policy;
1105
1106 /*
1107 * Check for invalid transitions here for easier debugging
1108 * TODO: dump the structs as hex in the panic string
1109 */
1110 if (task == kernel_task && prev.all_sockets_bg != next.all_sockets_bg)
1111 panic("unexpected network change for kernel task");
1112
1113 /* This is the point where the new values become visible to other threads */
1114 if (on_task)
1115 task->effective_policy = next;
1116 else {
1117 /* Preserve thread specific latency/throughput QoS modified via
1118 * thread_policy_set(). Inelegant in the extreme, to be reworked.
1119 *
1120 * If thread QoS class is set, we don't need to preserve the previously set values.
1121 * We should ensure to not accidentally preserve previous thread QoS values if you set a thread
1122 * back to default QoS.
1123 */
1124 uint32_t lqos = thread->effective_policy.t_latency_qos, tqos = thread->effective_policy.t_through_qos;
1125
1126 if (prev.thep_qos == THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) {
1127 next.t_latency_qos = lqos;
1128 next.t_through_qos = tqos;
1129 } else if (prev.thep_qos != THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) {
1130 next.t_latency_qos = 0;
1131 next.t_through_qos = 0;
1132 } else {
1133 next.t_latency_qos = thread_qos_policy_params.qos_latency_qos[next.thep_qos];
1134 next.t_through_qos = thread_qos_policy_params.qos_through_qos[next.thep_qos];
1135 }
1136
1137 thread_update_qos_cpu_time(thread, TRUE);
1138 thread->effective_policy = next;
1139 thread_mtx_unlock(thread);
1140 }
1141
1142 /* Don't do anything further to a half-formed task or thread */
1143 if (in_create)
1144 return;
1145
1146 /*
1147 * Step 4:
1148 * Pend updates that can't be done while holding the task lock
1149 */
1150
1151 if (prev.all_sockets_bg != next.all_sockets_bg)
1152 pend_token->tpt_update_sockets = 1;
1153
1154 if (on_task) {
1155 /* Only re-scan the timer list if the qos level is getting less strong */
1156 if (prev.t_latency_qos > next.t_latency_qos)
1157 pend_token->tpt_update_timers = 1;
1158
1159
1160 if (prev.t_live_donor != next.t_live_donor)
1161 pend_token->tpt_update_live_donor = 1;
1162 }
1163
1164 /*
1165 * Step 5:
1166 * Update other subsystems as necessary if something has changed
1167 */
1168
1169 boolean_t update_throttle = (prev.io_tier != next.io_tier) ? TRUE : FALSE;
1170
1171 if (on_task) {
1172 if (prev.t_suspended == 0 && next.t_suspended == 1 && task->active) {
1173 task_hold_locked(task);
1174 task_wait_locked(task, FALSE);
1175 }
1176 if (prev.t_suspended == 1 && next.t_suspended == 0 && task->active) {
1177 task_release_locked(task);
1178 }
1179
1180 boolean_t update_threads = FALSE;
1181 boolean_t update_sfi = FALSE;
1182
1183 if (prev.bg_iotier != next.bg_iotier ||
1184 prev.terminated != next.terminated ||
1185 prev.t_qos_clamp != next.t_qos_clamp ||
1186 prev.t_qos_ceiling != next.t_qos_ceiling ||
1187 prev.qos_ui_is_urgent != next.qos_ui_is_urgent ||
1188 prev.darwinbg != next.darwinbg)
1189 update_threads = TRUE;
1190
1191 /*
1192 * A bit of a layering violation. We know what task policy attributes
1193 * sfi_thread_classify() consults, so if they change, trigger SFI
1194 * re-evaluation.
1195 */
1196 if ((prev.t_latency_qos != next.t_latency_qos) ||
1197 (prev.t_role != next.t_role) ||
1198 (prev.darwinbg != next.darwinbg) ||
1199 (prev.t_sfi_managed != next.t_sfi_managed))
1200 update_sfi = TRUE;
1201
1202 #if CONFIG_SCHED_SFI
1203 if (prev.t_role != next.t_role && task_policy_update_coalition_focal_tasks(task, prev.t_role, next.t_role)) {
1204 update_sfi = TRUE;
1205 pend_token->tpt_update_coal_sfi = 1;
1206 }
1207 #endif /* !CONFIG_SCHED_SFI */
1208
1209 task_policy_update_task_locked(task, update_throttle, update_threads, update_sfi);
1210 } else {
1211 int update_cpu = 0;
1212 boolean_t update_sfi = FALSE;
1213 boolean_t update_qos = FALSE;
1214
1215 if (prev.lowpri_cpu != next.lowpri_cpu)
1216 update_cpu = (next.lowpri_cpu ? DO_LOWPRI_CPU : UNDO_LOWPRI_CPU);
1217
1218 if (prev.darwinbg != next.darwinbg ||
1219 prev.thep_qos != next.thep_qos)
1220 update_sfi = TRUE;
1221
1222 if (prev.thep_qos != next.thep_qos ||
1223 prev.thep_qos_relprio != next.thep_qos_relprio ||
1224 prev.qos_ui_is_urgent != next.qos_ui_is_urgent ||
1225 prev.terminated != next.terminated) {
1226 update_qos = TRUE;
1227 }
1228
1229 task_policy_update_thread_locked(thread, update_cpu, update_throttle, update_sfi, update_qos);
1230 }
1231 }
1232
1233
1234 #if CONFIG_SCHED_SFI
1235 /*
1236 * Yet another layering violation. We reach out and bang on the coalition directly.
1237 */
1238 static boolean_t
1239 task_policy_update_coalition_focal_tasks(task_t task,
1240 int prev_role,
1241 int next_role)
1242 {
1243 boolean_t sfi_transition = FALSE;
1244
1245 /* task moving into/out-of the foreground */
1246 if (prev_role != TASK_FOREGROUND_APPLICATION && next_role == TASK_FOREGROUND_APPLICATION) {
1247 if (task_coalition_adjust_focal_count(task, 1) == 1)
1248 sfi_transition = TRUE;
1249 } else if (prev_role == TASK_FOREGROUND_APPLICATION && next_role != TASK_FOREGROUND_APPLICATION) {
1250 if (task_coalition_adjust_focal_count(task, -1) == 0)
1251 sfi_transition = TRUE;
1252 }
1253
1254 /* task moving into/out-of background */
1255 if (prev_role != TASK_BACKGROUND_APPLICATION && next_role == TASK_BACKGROUND_APPLICATION) {
1256 if (task_coalition_adjust_nonfocal_count(task, 1) == 1)
1257 sfi_transition = TRUE;
1258 } else if (prev_role == TASK_BACKGROUND_APPLICATION && next_role != TASK_BACKGROUND_APPLICATION) {
1259 if (task_coalition_adjust_nonfocal_count(task, -1) == 0)
1260 sfi_transition = TRUE;
1261 }
1262
1263 return sfi_transition;
1264 }
1265 #endif /* CONFIG_SCHED_SFI */
1266
1267 /* Despite the name, the thread's task is locked, the thread is not */
1268 void
1269 task_policy_update_thread_locked(thread_t thread,
1270 int update_cpu,
1271 boolean_t update_throttle,
1272 boolean_t update_sfi,
1273 boolean_t update_qos)
1274 {
1275 thread_precedence_policy_data_t policy;
1276
1277 if (update_throttle) {
1278 rethrottle_thread(thread->uthread);
1279 }
1280
1281 if (update_sfi) {
1282 sfi_reevaluate(thread);
1283 }
1284
1285 /*
1286 * TODO: pidbind needs to stuff remembered importance into saved_importance
1287 * properly deal with bg'ed threads being pidbound and unbging while pidbound
1288 *
1289 * TODO: A BG thread's priority is 0 on desktop and 4 on embedded. Need to reconcile this.
1290 * */
1291 if (update_cpu == DO_LOWPRI_CPU) {
1292 thread->saved_importance = thread->importance;
1293 policy.importance = INT_MIN;
1294 } else if (update_cpu == UNDO_LOWPRI_CPU) {
1295 policy.importance = thread->saved_importance;
1296 thread->saved_importance = 0;
1297 }
1298
1299 /* Takes thread lock and thread mtx lock */
1300 if (update_cpu)
1301 thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
1302 (thread_policy_t)&policy,
1303 THREAD_PRECEDENCE_POLICY_COUNT);
1304
1305 if (update_qos)
1306 thread_recompute_qos(thread);
1307 }
1308
1309 /*
1310 * Calculate priority on a task, loop through its threads, and tell them about
1311 * priority changes and throttle changes.
1312 */
1313 void
1314 task_policy_update_task_locked(task_t task,
1315 boolean_t update_throttle,
1316 boolean_t update_threads,
1317 boolean_t update_sfi)
1318 {
1319 boolean_t update_priority = FALSE;
1320
1321 if (task == kernel_task)
1322 panic("Attempting to set task policy on kernel_task");
1323
1324 int priority = BASEPRI_DEFAULT;
1325 int max_priority = MAXPRI_USER;
1326
1327 if (proc_get_effective_task_policy(task, TASK_POLICY_LOWPRI_CPU)) {
1328 priority = MAXPRI_THROTTLE;
1329 max_priority = MAXPRI_THROTTLE;
1330 } else if (proc_get_effective_task_policy(task, TASK_POLICY_SUPPRESSED_CPU)) {
1331 priority = MAXPRI_SUPPRESSED;
1332 max_priority = MAXPRI_SUPPRESSED;
1333 } else {
1334 switch (proc_get_effective_task_policy(task, TASK_POLICY_ROLE)) {
1335 case TASK_CONTROL_APPLICATION:
1336 priority = BASEPRI_CONTROL;
1337 break;
1338 case TASK_GRAPHICS_SERVER:
1339 priority = BASEPRI_GRAPHICS;
1340 max_priority = MAXPRI_RESERVED;
1341 break;
1342 default:
1343 break;
1344 }
1345
1346 /* factor in 'nice' value */
1347 priority += task->importance;
1348
1349 if (task->effective_policy.t_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1350 int qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.t_qos_clamp];
1351
1352 priority = MIN(priority, qos_clamp_priority);
1353 max_priority = MIN(max_priority, qos_clamp_priority);
1354 }
1355 }
1356
1357 /* avoid extra work if priority isn't changing */
1358 if (task->priority != priority || task->max_priority != max_priority) {
1359 update_priority = TRUE;
1360
1361 /* update the scheduling priority for the task */
1362 task->max_priority = max_priority;
1363
1364 if (priority > task->max_priority)
1365 priority = task->max_priority;
1366 else if (priority < MINPRI)
1367 priority = MINPRI;
1368
1369 task->priority = priority;
1370 }
1371
1372 /* Loop over the threads in the task only once, and only if necessary */
1373 if (update_threads || update_throttle || update_priority || update_sfi ) {
1374 thread_t thread;
1375
1376 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1377 if (update_priority) {
1378 thread_mtx_lock(thread);
1379
1380 thread_task_priority(thread, priority, max_priority);
1381
1382 thread_mtx_unlock(thread);
1383 }
1384
1385 if (update_throttle) {
1386 rethrottle_thread(thread->uthread);
1387 }
1388
1389 if (update_sfi) {
1390 sfi_reevaluate(thread);
1391 }
1392
1393 if (update_threads) {
1394 thread->requested_policy.bg_iotier = task->effective_policy.bg_iotier;
1395 thread->requested_policy.terminated = task->effective_policy.terminated;
1396
1397 task_policy_update_internal_locked(task, thread, FALSE, NULL);
1398 /* The thread policy must not emit any completion actions due to this change. */
1399 }
1400 }
1401 }
1402 }
1403
1404 #if CONFIG_SCHED_SFI
1405 /* coalition object is locked */
1406 static void
1407 task_sfi_reevaluate_cb(coalition_t coal, void *ctx, task_t task)
1408 {
1409 thread_t thread;
1410
1411 /* unused for now */
1412 (void)coal;
1413
1414 /* skip the task we're re-evaluating on behalf of: it's already updated */
1415 if (task == (task_t)ctx)
1416 return;
1417
1418 task_lock(task);
1419
1420 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1421 sfi_reevaluate(thread);
1422 }
1423
1424 task_unlock(task);
1425 }
1426 #endif /* CONFIG_SCHED_SFI */
1427
1428 /*
1429 * Called with task unlocked to do things that can't be done while holding the task lock
1430 */
1431 void
1432 task_policy_update_complete_unlocked(task_t task, thread_t thread, task_pend_token_t pend_token)
1433 {
1434 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1435
1436 #ifdef MACH_BSD
1437 if (pend_token->tpt_update_sockets)
1438 proc_apply_task_networkbg(task->bsd_info, thread);
1439 #endif /* MACH_BSD */
1440
1441 if (on_task) {
1442 /* The timer throttle has been removed or reduced, we need to look for expired timers and fire them */
1443 if (pend_token->tpt_update_timers)
1444 ml_timer_evaluate();
1445
1446
1447 if (pend_token->tpt_update_live_donor)
1448 task_importance_update_live_donor(task);
1449
1450 #if CONFIG_SCHED_SFI
1451 /* use the resource coalition for SFI re-evaluation */
1452 if (pend_token->tpt_update_coal_sfi)
1453 coalition_for_each_task(task->coalition[COALITION_TYPE_RESOURCE],
1454 (void *)task, task_sfi_reevaluate_cb);
1455 #endif /* CONFIG_SCHED_SFI */
1456 }
1457 }
1458
1459 /*
1460 * Initiate a task policy state transition
1461 *
1462 * Everything that modifies requested except functions that need to hold the task lock
1463 * should use this function
1464 *
1465 * Argument validation should be performed before reaching this point.
1466 *
1467 * TODO: Do we need to check task->active or thread->active?
1468 */
1469 void
1470 proc_set_task_policy(task_t task,
1471 thread_t thread,
1472 int category,
1473 int flavor,
1474 int value)
1475 {
1476 struct task_pend_token pend_token = {};
1477
1478 task_lock(task);
1479
1480 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1481 (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START,
1482 targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0);
1483
1484 proc_set_task_policy_locked(task, thread, category, flavor, value);
1485
1486 task_policy_update_locked(task, thread, &pend_token);
1487
1488 task_unlock(task);
1489
1490 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1491 (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END,
1492 targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0);
1493
1494 task_policy_update_complete_unlocked(task, thread, &pend_token);
1495 }
1496
1497 /*
1498 * Initiate a task policy state transition on a thread with its TID
1499 * Useful if you cannot guarantee the thread won't get terminated
1500 */
1501 void
1502 proc_set_task_policy_thread(task_t task,
1503 uint64_t tid,
1504 int category,
1505 int flavor,
1506 int value)
1507 {
1508 thread_t thread;
1509 thread_t self = current_thread();
1510 struct task_pend_token pend_token = {};
1511
1512 task_lock(task);
1513
1514 if (tid == TID_NULL || tid == self->thread_id)
1515 thread = self;
1516 else
1517 thread = task_findtid(task, tid);
1518
1519 if (thread == THREAD_NULL) {
1520 task_unlock(task);
1521 return;
1522 }
1523
1524 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1525 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1526 targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0);
1527
1528 proc_set_task_policy_locked(task, thread, category, flavor, value);
1529
1530 task_policy_update_locked(task, thread, &pend_token);
1531
1532 task_unlock(task);
1533
1534 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1535 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1536 targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0);
1537
1538 task_policy_update_complete_unlocked(task, thread, &pend_token);
1539 }
1540
1541 /*
1542 * Variant of proc_set_task_policy() that sets two scalars in the requested policy structure.
1543 * Same locking rules apply.
1544 */
1545 void
1546 proc_set_task_policy2(task_t task, thread_t thread, int category, int flavor, int value1, int value2)
1547 {
1548 struct task_pend_token pend_token = {};
1549
1550 task_lock(task);
1551
1552 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1553 (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START,
1554 targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value1, 0);
1555
1556 proc_set_task_policy2_locked(task, thread, category, flavor, value1, value2);
1557
1558 task_policy_update_locked(task, thread, &pend_token);
1559
1560 task_unlock(task);
1561
1562 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1563 (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END,
1564 targetid(task, thread), trequested_0(task, thread), trequested_0(task, thread), tpending(&pend_token), 0);
1565
1566 task_policy_update_complete_unlocked(task, thread, &pend_token);
1567 }
1568
1569 /*
1570 * Set the requested state for a specific flavor to a specific value.
1571 *
1572 * TODO:
1573 * Verify that arguments to non iopol things are 1 or 0
1574 */
1575 static void
1576 proc_set_task_policy_locked(task_t task,
1577 thread_t thread,
1578 int category,
1579 int flavor,
1580 int value)
1581 {
1582 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1583
1584 int tier, passive;
1585
1586 struct task_requested_policy requested =
1587 (on_task) ? task->requested_policy : thread->requested_policy;
1588
1589 switch (flavor) {
1590
1591 /* Category: EXTERNAL and INTERNAL, thread and task */
1592
1593 case TASK_POLICY_DARWIN_BG:
1594 if (category == TASK_POLICY_EXTERNAL)
1595 requested.ext_darwinbg = value;
1596 else
1597 requested.int_darwinbg = value;
1598 break;
1599
1600 case TASK_POLICY_IOPOL:
1601 proc_iopol_to_tier(value, &tier, &passive);
1602 if (category == TASK_POLICY_EXTERNAL) {
1603 requested.ext_iotier = tier;
1604 requested.ext_iopassive = passive;
1605 } else {
1606 requested.int_iotier = tier;
1607 requested.int_iopassive = passive;
1608 }
1609 break;
1610
1611 case TASK_POLICY_IO:
1612 if (category == TASK_POLICY_EXTERNAL)
1613 requested.ext_iotier = value;
1614 else
1615 requested.int_iotier = value;
1616 break;
1617
1618 case TASK_POLICY_PASSIVE_IO:
1619 if (category == TASK_POLICY_EXTERNAL)
1620 requested.ext_iopassive = value;
1621 else
1622 requested.int_iopassive = value;
1623 break;
1624
1625 /* Category: INTERNAL, task only */
1626
1627 case TASK_POLICY_DARWIN_BG_IOPOL:
1628 assert(on_task && category == TASK_POLICY_INTERNAL);
1629 proc_iopol_to_tier(value, &tier, &passive);
1630 requested.bg_iotier = tier;
1631 break;
1632
1633 /* Category: ATTRIBUTE, task only */
1634
1635 case TASK_POLICY_TAL:
1636 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1637 requested.t_tal_enabled = value;
1638 break;
1639
1640 case TASK_POLICY_BOOST:
1641 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1642 requested.t_boosted = value;
1643 break;
1644
1645 case TASK_POLICY_ROLE:
1646 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1647 requested.t_role = value;
1648 break;
1649
1650 case TASK_POLICY_TERMINATED:
1651 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1652 requested.terminated = value;
1653 break;
1654 case TASK_BASE_LATENCY_QOS_POLICY:
1655 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1656 requested.t_base_latency_qos = value;
1657 break;
1658 case TASK_BASE_THROUGHPUT_QOS_POLICY:
1659 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1660 requested.t_base_through_qos = value;
1661 break;
1662 case TASK_POLICY_SFI_MANAGED:
1663 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1664 requested.t_sfi_managed = value;
1665 break;
1666
1667 /* Category: ATTRIBUTE, thread only */
1668
1669 case TASK_POLICY_PIDBIND_BG:
1670 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1671 requested.th_pidbind_bg = value;
1672 break;
1673
1674 case TASK_POLICY_WORKQ_BG:
1675 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1676 requested.th_workq_bg = value;
1677 break;
1678
1679 case TASK_POLICY_QOS:
1680 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1681 requested.thrp_qos = value;
1682 break;
1683
1684 case TASK_POLICY_QOS_OVERRIDE:
1685 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1686 requested.thrp_qos_override = value;
1687 break;
1688
1689 default:
1690 panic("unknown task policy: %d %d %d", category, flavor, value);
1691 break;
1692 }
1693
1694 if (on_task)
1695 task->requested_policy = requested;
1696 else
1697 thread->requested_policy = requested;
1698 }
1699
1700 /*
1701 * Variant of proc_set_task_policy_locked() that sets two scalars in the requested policy structure.
1702 */
1703 static void
1704 proc_set_task_policy2_locked(task_t task,
1705 thread_t thread,
1706 int category,
1707 int flavor,
1708 int value1,
1709 int value2)
1710 {
1711 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1712
1713 struct task_requested_policy requested =
1714 (on_task) ? task->requested_policy : thread->requested_policy;
1715
1716 switch (flavor) {
1717
1718 /* Category: ATTRIBUTE, task only */
1719
1720 case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1721 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1722 requested.t_base_latency_qos = value1;
1723 requested.t_base_through_qos = value2;
1724 break;
1725
1726 case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1727 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1728 requested.t_over_latency_qos = value1;
1729 requested.t_over_through_qos = value2;
1730 break;
1731
1732 /* Category: ATTRIBUTE, thread only */
1733
1734 case TASK_POLICY_QOS_AND_RELPRIO:
1735
1736 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1737 requested.thrp_qos = value1;
1738 requested.thrp_qos_relprio = value2;
1739 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1740 break;
1741
1742 default:
1743 panic("unknown task policy: %d %d %d %d", category, flavor, value1, value2);
1744 break;
1745 }
1746
1747 if (on_task)
1748 task->requested_policy = requested;
1749 else
1750 thread->requested_policy = requested;
1751 }
1752
1753
1754 /*
1755 * Gets what you set. Effective values may be different.
1756 */
1757 int
1758 proc_get_task_policy(task_t task,
1759 thread_t thread,
1760 int category,
1761 int flavor)
1762 {
1763 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1764
1765 int value = 0;
1766
1767 task_lock(task);
1768
1769 struct task_requested_policy requested =
1770 (on_task) ? task->requested_policy : thread->requested_policy;
1771
1772 switch (flavor) {
1773 case TASK_POLICY_DARWIN_BG:
1774 if (category == TASK_POLICY_EXTERNAL)
1775 value = requested.ext_darwinbg;
1776 else
1777 value = requested.int_darwinbg;
1778 break;
1779 case TASK_POLICY_IOPOL:
1780 if (category == TASK_POLICY_EXTERNAL)
1781 value = proc_tier_to_iopol(requested.ext_iotier,
1782 requested.ext_iopassive);
1783 else
1784 value = proc_tier_to_iopol(requested.int_iotier,
1785 requested.int_iopassive);
1786 break;
1787 case TASK_POLICY_IO:
1788 if (category == TASK_POLICY_EXTERNAL)
1789 value = requested.ext_iotier;
1790 else
1791 value = requested.int_iotier;
1792 break;
1793 case TASK_POLICY_PASSIVE_IO:
1794 if (category == TASK_POLICY_EXTERNAL)
1795 value = requested.ext_iopassive;
1796 else
1797 value = requested.int_iopassive;
1798 break;
1799 case TASK_POLICY_DARWIN_BG_IOPOL:
1800 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1801 value = proc_tier_to_iopol(requested.bg_iotier, 0);
1802 break;
1803 case TASK_POLICY_ROLE:
1804 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1805 value = requested.t_role;
1806 break;
1807 case TASK_POLICY_SFI_MANAGED:
1808 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1809 value = requested.t_sfi_managed;
1810 break;
1811 case TASK_POLICY_QOS:
1812 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1813 value = requested.thrp_qos;
1814 break;
1815 case TASK_POLICY_QOS_OVERRIDE:
1816 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1817 value = requested.thrp_qos_override;
1818 break;
1819 default:
1820 panic("unknown policy_flavor %d", flavor);
1821 break;
1822 }
1823
1824 task_unlock(task);
1825
1826 return value;
1827 }
1828
1829 /*
1830 * Variant of proc_get_task_policy() that returns two scalar outputs.
1831 */
1832 void
1833 proc_get_task_policy2(task_t task, thread_t thread, int category __unused, int flavor, int *value1, int *value2)
1834 {
1835 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1836
1837 task_lock(task);
1838
1839 struct task_requested_policy requested =
1840 (on_task) ? task->requested_policy : thread->requested_policy;
1841
1842 switch (flavor) {
1843 /* TASK attributes */
1844 case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1845 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1846 *value1 = requested.t_base_latency_qos;
1847 *value2 = requested.t_base_through_qos;
1848 break;
1849
1850 case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1851 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1852 *value1 = requested.t_over_latency_qos;
1853 *value2 = requested.t_over_through_qos;
1854 break;
1855
1856 /* THREAD attributes */
1857 case TASK_POLICY_QOS_AND_RELPRIO:
1858 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1859 *value1 = requested.thrp_qos;
1860 *value2 = requested.thrp_qos_relprio;
1861 break;
1862
1863 default:
1864 panic("unknown policy_flavor %d", flavor);
1865 break;
1866 }
1867
1868 task_unlock(task);
1869 }
1870
1871
1872 /*
1873 * Functions for querying effective state for relevant subsystems
1874 * ONLY the relevant subsystem should query these.
1875 * NEVER take a value from one of the 'effective' functions and stuff it into a setter.
1876 */
1877
1878 int
1879 proc_get_effective_task_policy(task_t task, int flavor)
1880 {
1881 return proc_get_effective_policy(task, THREAD_NULL, flavor);
1882 }
1883
1884 int
1885 proc_get_effective_thread_policy(thread_t thread, int flavor)
1886 {
1887 return proc_get_effective_policy(thread->task, thread, flavor);
1888 }
1889
1890 /*
1891 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1892 *
1893 * NOTE: This accessor does not take the task lock.
1894 * Notifications of state updates need to be externally synchronized with state queries.
1895 * This routine *MUST* remain interrupt safe, as it is potentially invoked
1896 * within the context of a timer interrupt. It is also called in KDP context for stackshot.
1897 */
1898 static int
1899 proc_get_effective_policy(task_t task,
1900 thread_t thread,
1901 int flavor)
1902 {
1903 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1904 int value = 0;
1905
1906 switch (flavor) {
1907 case TASK_POLICY_DARWIN_BG:
1908 /*
1909 * This backs the KPI call proc_pidbackgrounded to find
1910 * out if a pid is backgrounded,
1911 * as well as proc_get_effective_thread_policy.
1912 * Its main use is within the timer layer, as well as
1913 * prioritizing requests to the graphics system.
1914 * Returns 1 for background mode, 0 for normal mode
1915 */
1916 if (on_task)
1917 value = task->effective_policy.darwinbg;
1918 else
1919 value = (task->effective_policy.darwinbg ||
1920 thread->effective_policy.darwinbg) ? 1 : 0;
1921 break;
1922 case TASK_POLICY_IO:
1923 /*
1924 * The I/O system calls here to find out what throttling tier to apply to an operation.
1925 * Returns THROTTLE_LEVEL_* values. Some userspace spinlock operations can apply
1926 * a temporary iotier override to make the I/O more aggressive to get the lock
1927 * owner to release the spinlock.
1928 */
1929 if (on_task)
1930 value = task->effective_policy.io_tier;
1931 else {
1932 value = MAX(task->effective_policy.io_tier,
1933 thread->effective_policy.io_tier);
1934 if (thread->iotier_override != THROTTLE_LEVEL_NONE)
1935 value = MIN(value, thread->iotier_override);
1936 }
1937 break;
1938 case TASK_POLICY_PASSIVE_IO:
1939 /*
1940 * The I/O system calls here to find out whether an operation should be passive.
1941 * (i.e. not cause operations with lower throttle tiers to be throttled)
1942 * Returns 1 for passive mode, 0 for normal mode.
1943 * If a userspace spinlock has applied an override, that I/O should always
1944 * be passive to avoid self-throttling when the override is removed and lower
1945 * iotier I/Os are issued.
1946 */
1947 if (on_task)
1948 value = task->effective_policy.io_passive;
1949 else {
1950 int io_tier = MAX(task->effective_policy.io_tier, thread->effective_policy.io_tier);
1951 boolean_t override_in_effect = (thread->iotier_override != THROTTLE_LEVEL_NONE) && (thread->iotier_override < io_tier);
1952
1953 value = (task->effective_policy.io_passive ||
1954 thread->effective_policy.io_passive || override_in_effect) ? 1 : 0;
1955 }
1956 break;
1957 case TASK_POLICY_ALL_SOCKETS_BG:
1958 /*
1959 * do_background_socket() calls this to determine what it should do to the proc's sockets
1960 * Returns 1 for background mode, 0 for normal mode
1961 *
1962 * This consults both thread and task so un-DBGing a thread while the task is BG
1963 * doesn't get you out of the network throttle.
1964 */
1965 if (on_task)
1966 value = task->effective_policy.all_sockets_bg;
1967 else
1968 value = (task->effective_policy.all_sockets_bg ||
1969 thread->effective_policy.all_sockets_bg) ? 1 : 0;
1970 break;
1971 case TASK_POLICY_NEW_SOCKETS_BG:
1972 /*
1973 * socreate() calls this to determine if it should mark a new socket as background
1974 * Returns 1 for background mode, 0 for normal mode
1975 */
1976 if (on_task)
1977 value = task->effective_policy.new_sockets_bg;
1978 else
1979 value = (task->effective_policy.new_sockets_bg ||
1980 thread->effective_policy.new_sockets_bg) ? 1 : 0;
1981 break;
1982 case TASK_POLICY_LOWPRI_CPU:
1983 /*
1984 * Returns 1 for low priority cpu mode, 0 for normal mode
1985 */
1986 if (on_task)
1987 value = task->effective_policy.lowpri_cpu;
1988 else
1989 value = (task->effective_policy.lowpri_cpu ||
1990 thread->effective_policy.lowpri_cpu) ? 1 : 0;
1991 break;
1992 case TASK_POLICY_SUPPRESSED_CPU:
1993 /*
1994 * Returns 1 for suppressed cpu mode, 0 for normal mode
1995 */
1996 assert(on_task);
1997 value = task->effective_policy.t_suppressed_cpu;
1998 break;
1999 case TASK_POLICY_LATENCY_QOS:
2000 /*
2001 * timer arming calls into here to find out the timer coalescing level
2002 * Returns a QoS tier (0-6)
2003 */
2004 if (on_task) {
2005 value = task->effective_policy.t_latency_qos;
2006 } else {
2007 value = MAX(task->effective_policy.t_latency_qos, thread->effective_policy.t_latency_qos);
2008 }
2009 break;
2010 case TASK_POLICY_THROUGH_QOS:
2011 /*
2012 * Returns a QoS tier (0-6)
2013 */
2014 assert(on_task);
2015 value = task->effective_policy.t_through_qos;
2016 break;
2017 case TASK_POLICY_ROLE:
2018 assert(on_task);
2019 value = task->effective_policy.t_role;
2020 break;
2021 case TASK_POLICY_WATCHERS_BG:
2022 assert(on_task);
2023 value = task->effective_policy.t_watchers_bg;
2024 break;
2025 case TASK_POLICY_SFI_MANAGED:
2026 assert(on_task);
2027 value = task->effective_policy.t_sfi_managed;
2028 break;
2029 case TASK_POLICY_QOS:
2030 assert(!on_task);
2031 value = thread->effective_policy.thep_qos;
2032 break;
2033 default:
2034 panic("unknown policy_flavor %d", flavor);
2035 break;
2036 }
2037
2038 return value;
2039 }
2040
2041 /*
2042 * Convert from IOPOL_* values to throttle tiers.
2043 *
2044 * TODO: Can this be made more compact, like an array lookup
2045 * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future
2046 */
2047
2048 static void
2049 proc_iopol_to_tier(int iopolicy, int *tier, int *passive)
2050 {
2051 *passive = 0;
2052 *tier = 0;
2053 switch (iopolicy) {
2054 case IOPOL_IMPORTANT:
2055 *tier = THROTTLE_LEVEL_TIER0;
2056 break;
2057 case IOPOL_PASSIVE:
2058 *tier = THROTTLE_LEVEL_TIER0;
2059 *passive = 1;
2060 break;
2061 case IOPOL_STANDARD:
2062 *tier = THROTTLE_LEVEL_TIER1;
2063 break;
2064 case IOPOL_UTILITY:
2065 *tier = THROTTLE_LEVEL_TIER2;
2066 break;
2067 case IOPOL_THROTTLE:
2068 *tier = THROTTLE_LEVEL_TIER3;
2069 break;
2070 default:
2071 panic("unknown I/O policy %d", iopolicy);
2072 break;
2073 }
2074 }
2075
2076 static int
2077 proc_tier_to_iopol(int tier, int passive)
2078 {
2079 if (passive == 1) {
2080 switch (tier) {
2081 case THROTTLE_LEVEL_TIER0:
2082 return IOPOL_PASSIVE;
2083 break;
2084 default:
2085 panic("unknown passive tier %d", tier);
2086 return IOPOL_DEFAULT;
2087 break;
2088 }
2089 } else {
2090 switch (tier) {
2091 case THROTTLE_LEVEL_NONE:
2092 case THROTTLE_LEVEL_TIER0:
2093 return IOPOL_DEFAULT;
2094 break;
2095 case THROTTLE_LEVEL_TIER1:
2096 return IOPOL_STANDARD;
2097 break;
2098 case THROTTLE_LEVEL_TIER2:
2099 return IOPOL_UTILITY;
2100 break;
2101 case THROTTLE_LEVEL_TIER3:
2102 return IOPOL_THROTTLE;
2103 break;
2104 default:
2105 panic("unknown tier %d", tier);
2106 return IOPOL_DEFAULT;
2107 break;
2108 }
2109 }
2110 }
2111
2112 int
2113 proc_darwin_role_to_task_role(int darwin_role, int* task_role)
2114 {
2115 integer_t role = TASK_UNSPECIFIED;
2116
2117 switch (darwin_role) {
2118 case PRIO_DARWIN_ROLE_DEFAULT:
2119 role = TASK_UNSPECIFIED;
2120 break;
2121 case PRIO_DARWIN_ROLE_UI_FOCAL:
2122 role = TASK_FOREGROUND_APPLICATION;
2123 break;
2124 case PRIO_DARWIN_ROLE_UI:
2125 role = TASK_DEFAULT_APPLICATION;
2126 break;
2127 case PRIO_DARWIN_ROLE_NON_UI:
2128 role = TASK_NONUI_APPLICATION;
2129 break;
2130 case PRIO_DARWIN_ROLE_UI_NON_FOCAL:
2131 role = TASK_BACKGROUND_APPLICATION;
2132 break;
2133 case PRIO_DARWIN_ROLE_TAL_LAUNCH:
2134 role = TASK_THROTTLE_APPLICATION;
2135 break;
2136 default:
2137 return EINVAL;
2138 }
2139
2140 *task_role = role;
2141
2142 return 0;
2143 }
2144
2145 int
2146 proc_task_role_to_darwin_role(int task_role)
2147 {
2148 switch (task_role) {
2149 case TASK_FOREGROUND_APPLICATION:
2150 return PRIO_DARWIN_ROLE_UI_FOCAL;
2151 case TASK_BACKGROUND_APPLICATION:
2152 return PRIO_DARWIN_ROLE_UI;
2153 case TASK_NONUI_APPLICATION:
2154 return PRIO_DARWIN_ROLE_NON_UI;
2155 case TASK_DEFAULT_APPLICATION:
2156 return PRIO_DARWIN_ROLE_UI_NON_FOCAL;
2157 case TASK_THROTTLE_APPLICATION:
2158 return PRIO_DARWIN_ROLE_TAL_LAUNCH;
2159 case TASK_UNSPECIFIED:
2160 default:
2161 return PRIO_DARWIN_ROLE_DEFAULT;
2162 }
2163 }
2164
2165
2166 /* apply internal backgrounding for workqueue threads */
2167 int
2168 proc_apply_workq_bgthreadpolicy(thread_t thread)
2169 {
2170 if (thread == THREAD_NULL)
2171 return ESRCH;
2172
2173 proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE,
2174 TASK_POLICY_WORKQ_BG, TASK_POLICY_ENABLE);
2175
2176 return(0);
2177 }
2178
2179 /*
2180 * remove internal backgrounding for workqueue threads
2181 * does NOT go find sockets created while BG and unbackground them
2182 */
2183 int
2184 proc_restore_workq_bgthreadpolicy(thread_t thread)
2185 {
2186 if (thread == THREAD_NULL)
2187 return ESRCH;
2188
2189 proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE,
2190 TASK_POLICY_WORKQ_BG, TASK_POLICY_DISABLE);
2191
2192 return(0);
2193 }
2194
2195 /* here for temporary compatibility */
2196 int
2197 proc_setthread_saved_importance(__unused thread_t thread, __unused int importance)
2198 {
2199 return(0);
2200 }
2201
2202 /*
2203 * Set an override on the thread which is consulted with a
2204 * higher priority than the task/thread policy. This should
2205 * only be set for temporary grants until the thread
2206 * returns to the userspace boundary
2207 *
2208 * We use atomic operations to swap in the override, with
2209 * the assumption that the thread itself can
2210 * read the override and clear it on return to userspace.
2211 *
2212 * No locking is performed, since it is acceptable to see
2213 * a stale override for one loop through throttle_lowpri_io().
2214 * However a thread reference must be held on the thread.
2215 */
2216
2217 void set_thread_iotier_override(thread_t thread, int policy)
2218 {
2219 int current_override;
2220
2221 /* Let most aggressive I/O policy win until user boundary */
2222 do {
2223 current_override = thread->iotier_override;
2224
2225 if (current_override != THROTTLE_LEVEL_NONE)
2226 policy = MIN(current_override, policy);
2227
2228 if (current_override == policy) {
2229 /* no effective change */
2230 return;
2231 }
2232 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2233
2234 /*
2235 * Since the thread may be currently throttled,
2236 * re-evaluate tiers and potentially break out
2237 * of an msleep
2238 */
2239 rethrottle_thread(thread->uthread);
2240 }
2241
2242 /*
2243 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2244 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2245 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2246 * priority thread. In these cases, we attempt to propagate the priority token, as long
2247 * as the subsystem informs us of the relationships between the threads. The userspace
2248 * synchronization subsystem should maintain the information of owner->resource and
2249 * resource->waiters itself.
2250 */
2251
2252 /*
2253 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2254 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2255 * to be handled specially in the future, but for now it's fine to slam
2256 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2257 */
2258 static void _canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) {
2259 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2260 /* Map all input resource/type to a single one */
2261 *resource = USER_ADDR_NULL;
2262 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2263 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2264 /* no transform */
2265 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH) {
2266 /* Map all dispatch overrides to a single one, to avoid memory overhead */
2267 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2268 *resource = USER_ADDR_NULL;
2269 }
2270 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2271 /* Map all mutex overrides to a single one, to avoid memory overhead */
2272 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2273 *resource = USER_ADDR_NULL;
2274 }
2275 }
2276 }
2277
2278 /* This helper routine finds an existing override if known. Locking should be done by caller */
2279 static struct thread_qos_override *_find_qos_override(thread_t thread, user_addr_t resource, int resource_type) {
2280 struct thread_qos_override *override;
2281
2282 override = thread->overrides;
2283 while (override) {
2284 if (override->override_resource == resource &&
2285 override->override_resource_type == resource_type) {
2286 return override;
2287 }
2288
2289 override = override->override_next;
2290 }
2291
2292 return NULL;
2293 }
2294
2295 static void _find_and_decrement_qos_override(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset, struct thread_qos_override **free_override_list) {
2296 struct thread_qos_override *override, *override_prev;
2297
2298 override_prev = NULL;
2299 override = thread->overrides;
2300 while (override) {
2301 struct thread_qos_override *override_next = override->override_next;
2302
2303 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2304 override->override_resource_type == resource_type) {
2305 if (reset) {
2306 override->override_contended_resource_count = 0;
2307 } else {
2308 override->override_contended_resource_count--;
2309 }
2310
2311 if (override->override_contended_resource_count == 0) {
2312 if (override_prev == NULL) {
2313 thread->overrides = override_next;
2314 } else {
2315 override_prev->override_next = override_next;
2316 }
2317
2318 /* Add to out-param for later zfree */
2319 override->override_next = *free_override_list;
2320 *free_override_list = override;
2321 } else {
2322 override_prev = override;
2323 }
2324
2325 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2326 return;
2327 }
2328 } else {
2329 override_prev = override;
2330 }
2331
2332 override = override_next;
2333 }
2334 }
2335
2336 /* This helper recalculates the current requested override using the policy selected at boot */
2337 static int _calculate_requested_qos_override(thread_t thread)
2338 {
2339 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2340 return THREAD_QOS_UNSPECIFIED;
2341 }
2342
2343 /* iterate over all overrides and calculate MAX */
2344 struct thread_qos_override *override;
2345 int qos_override = THREAD_QOS_UNSPECIFIED;
2346
2347 override = thread->overrides;
2348 while (override) {
2349 if (qos_override_mode != QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH ||
2350 override->override_resource_type != THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2351 qos_override = MAX(qos_override, override->override_qos);
2352 }
2353
2354 override = override->override_next;
2355 }
2356
2357 return qos_override;
2358 }
2359
2360 boolean_t proc_thread_qos_add_override(task_t task, thread_t thread, uint64_t tid, int override_qos, boolean_t first_override_for_resource, user_addr_t resource, int resource_type)
2361 {
2362 thread_t self = current_thread();
2363 struct task_pend_token pend_token = {};
2364
2365 /* XXX move to thread mutex when thread policy does */
2366 task_lock(task);
2367
2368 /*
2369 * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference
2370 * to the thread
2371 */
2372
2373 if (thread != THREAD_NULL) {
2374 assert(task == thread->task);
2375 } else {
2376 if (tid == self->thread_id) {
2377 thread = self;
2378 } else {
2379 thread = task_findtid(task, tid);
2380
2381 if (thread == THREAD_NULL) {
2382 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2383 tid, 0, 0xdead, 0, 0);
2384 task_unlock(task);
2385 return FALSE;
2386 }
2387 }
2388 }
2389
2390 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2391 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2392
2393 DTRACE_BOOST5(qos_add_override_pre, uint64_t, tid, uint64_t, thread->requested_policy.thrp_qos,
2394 uint64_t, thread->effective_policy.thep_qos, int, override_qos, boolean_t, first_override_for_resource);
2395
2396 struct task_requested_policy requested = thread->requested_policy;
2397 struct thread_qos_override *override;
2398 struct thread_qos_override *deferred_free_override = NULL;
2399 int new_qos_override, prev_qos_override;
2400 int new_effective_qos;
2401 boolean_t has_thread_reference = FALSE;
2402
2403 _canonicalize_resource_and_type(&resource, &resource_type);
2404
2405 if (first_override_for_resource) {
2406 override = _find_qos_override(thread, resource, resource_type);
2407 if (override) {
2408 override->override_contended_resource_count++;
2409 } else {
2410 struct thread_qos_override *override_new;
2411
2412 /* We need to allocate a new object. Drop the task lock and recheck afterwards in case someone else added the override */
2413 thread_reference(thread);
2414 has_thread_reference = TRUE;
2415 task_unlock(task);
2416 override_new = zalloc(thread_qos_override_zone);
2417 task_lock(task);
2418
2419 override = _find_qos_override(thread, resource, resource_type);
2420 if (override) {
2421 /* Someone else already allocated while the task lock was dropped */
2422 deferred_free_override = override_new;
2423 override->override_contended_resource_count++;
2424 } else {
2425 override = override_new;
2426 override->override_next = thread->overrides;
2427 override->override_contended_resource_count = 1 /* since first_override_for_resource was TRUE */;
2428 override->override_resource = resource;
2429 override->override_resource_type = resource_type;
2430 override->override_qos = THREAD_QOS_UNSPECIFIED;
2431 thread->overrides = override;
2432 }
2433 }
2434 } else {
2435 override = _find_qos_override(thread, resource, resource_type);
2436 }
2437
2438 if (override) {
2439 if (override->override_qos == THREAD_QOS_UNSPECIFIED)
2440 override->override_qos = override_qos;
2441 else
2442 override->override_qos = MAX(override->override_qos, override_qos);
2443 }
2444
2445 /* Determine how to combine the various overrides into a single current requested override */
2446 prev_qos_override = requested.thrp_qos_override;
2447 new_qos_override = _calculate_requested_qos_override(thread);
2448
2449 if (new_qos_override != prev_qos_override) {
2450 requested.thrp_qos_override = new_qos_override;
2451
2452 thread->requested_policy = requested;
2453
2454 task_policy_update_locked(task, thread, &pend_token);
2455
2456 if (!has_thread_reference) {
2457 thread_reference(thread);
2458 }
2459
2460 task_unlock(task);
2461
2462 task_policy_update_complete_unlocked(task, thread, &pend_token);
2463
2464 new_effective_qos = thread->effective_policy.thep_qos;
2465
2466 thread_deallocate(thread);
2467 } else {
2468 new_effective_qos = thread->effective_policy.thep_qos;
2469
2470 task_unlock(task);
2471
2472 if (has_thread_reference) {
2473 thread_deallocate(thread);
2474 }
2475 }
2476
2477 if (deferred_free_override) {
2478 zfree(thread_qos_override_zone, deferred_free_override);
2479 }
2480
2481 DTRACE_BOOST3(qos_add_override_post, int, prev_qos_override, int, new_qos_override,
2482 int, new_effective_qos);
2483
2484 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2485 new_qos_override, resource, resource_type, 0, 0);
2486
2487 return TRUE;
2488 }
2489
2490
2491 static boolean_t _proc_thread_qos_remove_override_internal(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type, boolean_t reset)
2492 {
2493 thread_t self = current_thread();
2494 struct task_pend_token pend_token = {};
2495
2496 /* XXX move to thread mutex when thread policy does */
2497 task_lock(task);
2498
2499 /*
2500 * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference
2501 * to the thread
2502 */
2503 if (thread != THREAD_NULL) {
2504 assert(task == thread->task);
2505 } else {
2506 if (tid == self->thread_id) {
2507 thread = self;
2508 } else {
2509 thread = task_findtid(task, tid);
2510
2511 if (thread == THREAD_NULL) {
2512 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2513 tid, 0, 0xdead, 0, 0);
2514 task_unlock(task);
2515 return FALSE;
2516 }
2517 }
2518 }
2519
2520 struct task_requested_policy requested = thread->requested_policy;
2521 struct thread_qos_override *deferred_free_override_list = NULL;
2522 int new_qos_override, prev_qos_override;
2523
2524 _canonicalize_resource_and_type(&resource, &resource_type);
2525
2526 _find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2527
2528 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2529 thread_tid(thread), resource, reset, 0, 0);
2530
2531 /* Determine how to combine the various overrides into a single current requested override */
2532 prev_qos_override = requested.thrp_qos_override;
2533 new_qos_override = _calculate_requested_qos_override(thread);
2534
2535 if (new_qos_override != prev_qos_override) {
2536 requested.thrp_qos_override = new_qos_override;
2537
2538 thread->requested_policy = requested;
2539
2540 task_policy_update_locked(task, thread, &pend_token);
2541
2542 thread_reference(thread);
2543
2544 task_unlock(task);
2545
2546 task_policy_update_complete_unlocked(task, thread, &pend_token);
2547
2548 thread_deallocate(thread);
2549 } else {
2550 task_unlock(task);
2551 }
2552
2553 while (deferred_free_override_list) {
2554 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2555
2556 zfree(thread_qos_override_zone, deferred_free_override_list);
2557 deferred_free_override_list = override_next;
2558 }
2559
2560 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2561 0, 0, 0, 0, 0);
2562
2563 return TRUE;
2564 }
2565
2566 boolean_t proc_thread_qos_remove_override(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type)
2567 {
2568 return _proc_thread_qos_remove_override_internal(task, thread, tid, resource, resource_type, FALSE);
2569
2570 }
2571
2572 boolean_t proc_thread_qos_reset_override(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type)
2573 {
2574 return _proc_thread_qos_remove_override_internal(task, thread, tid, resource, resource_type, TRUE);
2575 }
2576
2577 /* Deallocate before thread termination */
2578 void proc_thread_qos_deallocate(thread_t thread)
2579 {
2580 task_t task = thread->task;
2581 struct thread_qos_override *override;
2582
2583 /* XXX move to thread mutex when thread policy does */
2584 task_lock(task);
2585 override = thread->overrides;
2586 thread->overrides = NULL; /* task policy re-evaluation needed? */
2587 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2588 task_unlock(task);
2589
2590 while (override) {
2591 struct thread_qos_override *override_next = override->override_next;
2592
2593 zfree(thread_qos_override_zone, override);
2594 override = override_next;
2595 }
2596 }
2597
2598 /* TODO: remove this variable when interactive daemon audit period is over */
2599 extern boolean_t ipc_importance_interactive_receiver;
2600
2601 /*
2602 * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process
2603 *
2604 * TODO: Make this function more table-driven instead of ad-hoc
2605 */
2606 void
2607 proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, int role,
2608 ipc_port_t * portwatch_ports, int portwatch_count)
2609 {
2610 struct task_pend_token pend_token = {};
2611
2612 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2613 (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START,
2614 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2615 apptype, 0);
2616
2617 switch (apptype) {
2618 case TASK_APPTYPE_APP_TAL:
2619 case TASK_APPTYPE_APP_DEFAULT:
2620 /* Apps become donors via the 'live-donor' flag instead of the static donor flag */
2621 task_importance_mark_donor(task, FALSE);
2622 task_importance_mark_live_donor(task, TRUE);
2623 task_importance_mark_receiver(task, FALSE);
2624 /* Apps are de-nap recievers on desktop for suppression behaviors */
2625 task_importance_mark_denap_receiver(task, TRUE);
2626 break;
2627
2628 case TASK_APPTYPE_DAEMON_INTERACTIVE:
2629 task_importance_mark_donor(task, TRUE);
2630 task_importance_mark_live_donor(task, FALSE);
2631
2632 /*
2633 * A boot arg controls whether interactive daemons are importance receivers.
2634 * Normally, they are not. But for testing their behavior as an adaptive
2635 * daemon, the boot-arg can be set.
2636 *
2637 * TODO: remove this when the interactive daemon audit period is over.
2638 */
2639 task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver);
2640 task_importance_mark_denap_receiver(task, FALSE);
2641 break;
2642
2643 case TASK_APPTYPE_DAEMON_STANDARD:
2644 task_importance_mark_donor(task, TRUE);
2645 task_importance_mark_live_donor(task, FALSE);
2646 task_importance_mark_receiver(task, FALSE);
2647 task_importance_mark_denap_receiver(task, FALSE);
2648 break;
2649
2650 case TASK_APPTYPE_DAEMON_ADAPTIVE:
2651 task_importance_mark_donor(task, FALSE);
2652 task_importance_mark_live_donor(task, FALSE);
2653 task_importance_mark_receiver(task, TRUE);
2654 task_importance_mark_denap_receiver(task, FALSE);
2655 break;
2656
2657 case TASK_APPTYPE_DAEMON_BACKGROUND:
2658 task_importance_mark_donor(task, FALSE);
2659 task_importance_mark_live_donor(task, FALSE);
2660 task_importance_mark_receiver(task, FALSE);
2661 task_importance_mark_denap_receiver(task, FALSE);
2662 break;
2663
2664 case TASK_APPTYPE_NONE:
2665 break;
2666 }
2667
2668 if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2669 int portwatch_boosts = 0;
2670
2671 for (int i = 0; i < portwatch_count; i++) {
2672 ipc_port_t port = NULL;
2673
2674 if ((port = portwatch_ports[i]) != NULL) {
2675 int boost = 0;
2676 task_add_importance_watchport(task, port, &boost);
2677 portwatch_boosts += boost;
2678 }
2679 }
2680
2681 if (portwatch_boosts > 0) {
2682 task_importance_hold_internal_assertion(task, portwatch_boosts);
2683 }
2684 }
2685
2686 task_lock(task);
2687
2688 if (apptype == TASK_APPTYPE_APP_TAL) {
2689 /* TAL starts off enabled by default */
2690 task->requested_policy.t_tal_enabled = 1;
2691 }
2692
2693 if (apptype != TASK_APPTYPE_NONE) {
2694 task->requested_policy.t_apptype = apptype;
2695 }
2696
2697 if (role != TASK_UNSPECIFIED) {
2698 task->requested_policy.t_role = role;
2699 }
2700
2701 if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2702 task->requested_policy.t_qos_clamp = qos_clamp;
2703 }
2704
2705 task_policy_update_locked(task, THREAD_NULL, &pend_token);
2706
2707 task_unlock(task);
2708
2709 /* Ensure the donor bit is updated to be in sync with the new live donor status */
2710 pend_token.tpt_update_live_donor = 1;
2711
2712 task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token);
2713
2714 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2715 (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END,
2716 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2717 task_is_importance_receiver(task), 0);
2718 }
2719
2720 extern task_t bsd_init_task;
2721
2722 /* Set up the primordial thread's QoS */
2723 void
2724 task_set_main_thread_qos(task_t task, thread_t main_thread) {
2725 struct task_pend_token pend_token = {};
2726
2727 assert(main_thread->task == task);
2728
2729 task_lock(task);
2730
2731 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2732 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2733 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2734 main_thread->requested_policy.thrp_qos, 0);
2735
2736 int primordial_qos = THREAD_QOS_UNSPECIFIED;
2737
2738 int qos_clamp = task->requested_policy.t_qos_clamp;
2739
2740 if (task == bsd_init_task) {
2741 /* PID 1 gets a special case */
2742 primordial_qos = THREAD_QOS_USER_INITIATED;
2743 }
2744
2745 switch (task->requested_policy.t_apptype) {
2746 case TASK_APPTYPE_APP_TAL:
2747 case TASK_APPTYPE_APP_DEFAULT:
2748 primordial_qos = THREAD_QOS_USER_INTERACTIVE;
2749 break;
2750
2751 case TASK_APPTYPE_DAEMON_INTERACTIVE:
2752 case TASK_APPTYPE_DAEMON_STANDARD:
2753 case TASK_APPTYPE_DAEMON_ADAPTIVE:
2754 primordial_qos = THREAD_QOS_LEGACY;
2755 break;
2756
2757 case TASK_APPTYPE_DAEMON_BACKGROUND:
2758 primordial_qos = THREAD_QOS_BACKGROUND;
2759 break;
2760 }
2761
2762 if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2763 if (primordial_qos != THREAD_QOS_UNSPECIFIED) {
2764 primordial_qos = MIN(qos_clamp, primordial_qos);
2765 } else {
2766 primordial_qos = qos_clamp;
2767 }
2768 }
2769
2770 main_thread->requested_policy.thrp_qos = primordial_qos;
2771
2772 task_policy_update_locked(task, main_thread, &pend_token);
2773
2774 task_unlock(task);
2775
2776 task_policy_update_complete_unlocked(task, main_thread, &pend_token);
2777
2778 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2779 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2780 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
2781 primordial_qos, 0);
2782 }
2783
2784 /* for process_policy to check before attempting to set */
2785 boolean_t
2786 proc_task_is_tal(task_t task)
2787 {
2788 return (task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE;
2789 }
2790
2791 int
2792 task_get_apptype(task_t task)
2793 {
2794 return task->requested_policy.t_apptype;
2795 }
2796
2797 /* for telemetry */
2798 integer_t
2799 task_grab_latency_qos(task_t task)
2800 {
2801 return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS));
2802 }
2803
2804 /* update the darwin background action state in the flags field for libproc */
2805 int
2806 proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
2807 {
2808 if (task->requested_policy.ext_darwinbg)
2809 *flagsp |= PROC_FLAG_EXT_DARWINBG;
2810
2811 if (task->requested_policy.int_darwinbg)
2812 *flagsp |= PROC_FLAG_DARWINBG;
2813
2814
2815 if (task->requested_policy.t_apptype == TASK_APPTYPE_APP_DEFAULT ||
2816 task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL)
2817 *flagsp |= PROC_FLAG_APPLICATION;
2818
2819 if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE)
2820 *flagsp |= PROC_FLAG_ADAPTIVE;
2821
2822 if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && task->requested_policy.t_boosted == 1)
2823 *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT;
2824
2825 if (task_is_importance_donor(task))
2826 *flagsp |= PROC_FLAG_IMPORTANCE_DONOR;
2827
2828 if (task->effective_policy.t_sup_active)
2829 *flagsp |= PROC_FLAG_SUPPRESSED;
2830
2831 return(0);
2832 }
2833
2834 /* All per-thread state is in the first 32-bits of the bitfield */
2835 void
2836 proc_get_thread_policy(thread_t thread, thread_policy_state_t info)
2837 {
2838 task_t task = thread->task;
2839 task_lock(task);
2840 info->requested = (integer_t)task_requested_bitfield(task, thread);
2841 info->effective = (integer_t)task_effective_bitfield(task, thread);
2842 info->pending = 0;
2843 task_unlock(task);
2844 }
2845
2846 /*
2847 * Tracepoint data... Reading the tracepoint data can be somewhat complicated.
2848 * The current scheme packs as much data into a single tracepoint as it can.
2849 *
2850 * Each task/thread requested/effective structure is 64 bits in size. Any
2851 * given tracepoint will emit either requested or effective data, but not both.
2852 *
2853 * A tracepoint may emit any of task, thread, or task & thread data.
2854 *
2855 * The type of data emitted varies with pointer size. Where possible, both
2856 * task and thread data are emitted. In LP32 systems, the first and second
2857 * halves of either the task or thread data is emitted.
2858 *
2859 * The code uses uintptr_t array indexes instead of high/low to avoid
2860 * confusion WRT big vs little endian.
2861 *
2862 * The truth table for the tracepoint data functions is below, and has the
2863 * following invariants:
2864 *
2865 * 1) task and thread are uintptr_t*
2866 * 2) task may never be NULL
2867 *
2868 *
2869 * LP32 LP64
2870 * trequested_0(task, NULL) task[0] task[0]
2871 * trequested_1(task, NULL) task[1] NULL
2872 * trequested_0(task, thread) thread[0] task[0]
2873 * trequested_1(task, thread) thread[1] thread[0]
2874 *
2875 * Basically, you get a full task or thread on LP32, and both on LP64.
2876 *
2877 * The uintptr_t munging here is squicky enough to deserve a comment.
2878 *
2879 * The variables we are accessing are laid out in memory like this:
2880 *
2881 * [ LP64 uintptr_t 0 ]
2882 * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ]
2883 *
2884 * 1 2 3 4 5 6 7 8
2885 *
2886 */
2887
2888 static uintptr_t
2889 trequested_0(task_t task, thread_t thread)
2890 {
2891 assert(task);
2892 _Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2893 _Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated");
2894
2895 uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy);
2896 return raw[0];
2897 }
2898
2899 static uintptr_t
2900 trequested_1(task_t task, thread_t thread)
2901 {
2902 assert(task);
2903 _Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2904 _Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated");
2905
2906 #if defined __LP64__
2907 return (thread == NULL) ? 0 : *(uintptr_t*)&thread->requested_policy;
2908 #else
2909 uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy);
2910 return raw[1];
2911 #endif
2912 }
2913
2914 static uintptr_t
2915 teffective_0(task_t task, thread_t thread)
2916 {
2917 assert(task);
2918 _Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated");
2919 _Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated");
2920
2921 uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy);
2922 return raw[0];
2923 }
2924
2925 static uintptr_t
2926 teffective_1(task_t task, thread_t thread)
2927 {
2928 assert(task);
2929 _Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated");
2930 _Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated");
2931
2932 #if defined __LP64__
2933 return (thread == NULL) ? 0 : *(uintptr_t*)&thread->effective_policy;
2934 #else
2935 uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy);
2936 return raw[1];
2937 #endif
2938 }
2939
2940 /* dump pending for tracepoint */
2941 static uint32_t tpending(task_pend_token_t pend_token) { return *(uint32_t*)(void*)(pend_token); }
2942
2943 uint64_t
2944 task_requested_bitfield(task_t task, thread_t thread)
2945 {
2946 uint64_t bits = 0;
2947 struct task_requested_policy requested =
2948 (thread == THREAD_NULL) ? task->requested_policy : thread->requested_policy;
2949
2950 bits |= (requested.int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2951 bits |= (requested.ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2952 bits |= (requested.int_iotier ? (((uint64_t)requested.int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2953 bits |= (requested.ext_iotier ? (((uint64_t)requested.ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2954 bits |= (requested.int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2955 bits |= (requested.ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2956 bits |= (requested.bg_iotier ? (((uint64_t)requested.bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT) : 0);
2957 bits |= (requested.terminated ? POLICY_REQ_TERMINATED : 0);
2958
2959 bits |= (requested.th_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2960 bits |= (requested.th_workq_bg ? POLICY_REQ_WORKQ_BG : 0);
2961
2962 if (thread != THREAD_NULL) {
2963 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2964 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2965 }
2966
2967 bits |= (requested.t_boosted ? POLICY_REQ_BOOSTED : 0);
2968 bits |= (requested.t_tal_enabled ? POLICY_REQ_TAL_ENABLED : 0);
2969 bits |= (requested.t_apptype ? (((uint64_t)requested.t_apptype) << POLICY_REQ_APPTYPE_SHIFT) : 0);
2970 bits |= (requested.t_role ? (((uint64_t)requested.t_role) << POLICY_REQ_ROLE_SHIFT) : 0);
2971
2972 bits |= (requested.t_sup_active ? POLICY_REQ_SUP_ACTIVE : 0);
2973 bits |= (requested.t_sup_lowpri_cpu ? POLICY_REQ_SUP_LOWPRI_CPU : 0);
2974 bits |= (requested.t_sup_cpu ? POLICY_REQ_SUP_CPU : 0);
2975 bits |= (requested.t_sup_timer ? (((uint64_t)requested.t_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0);
2976 bits |= (requested.t_sup_throughput ? (((uint64_t)requested.t_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT) : 0);
2977 bits |= (requested.t_sup_disk ? POLICY_REQ_SUP_DISK_THROTTLE : 0);
2978 bits |= (requested.t_sup_cpu_limit ? POLICY_REQ_SUP_CPU_LIMIT : 0);
2979 bits |= (requested.t_sup_suspend ? POLICY_REQ_SUP_SUSPEND : 0);
2980 bits |= (requested.t_sup_bg_sockets ? POLICY_REQ_SUP_BG_SOCKETS : 0);
2981 bits |= (requested.t_base_latency_qos ? (((uint64_t)requested.t_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2982 bits |= (requested.t_over_latency_qos ? (((uint64_t)requested.t_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0);
2983 bits |= (requested.t_base_through_qos ? (((uint64_t)requested.t_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2984 bits |= (requested.t_over_through_qos ? (((uint64_t)requested.t_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0);
2985 bits |= (requested.t_sfi_managed ? POLICY_REQ_SFI_MANAGED : 0);
2986 bits |= (requested.t_qos_clamp ? (((uint64_t)requested.t_qos_clamp) << POLICY_REQ_QOS_CLAMP_SHIFT) : 0);
2987
2988 return bits;
2989 }
2990
2991 uint64_t
2992 task_effective_bitfield(task_t task, thread_t thread)
2993 {
2994 uint64_t bits = 0;
2995 struct task_effective_policy effective =
2996 (thread == THREAD_NULL) ? task->effective_policy : thread->effective_policy;
2997
2998 bits |= (effective.io_tier ? (((uint64_t)effective.io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2999 bits |= (effective.io_passive ? POLICY_EFF_IO_PASSIVE : 0);
3000 bits |= (effective.darwinbg ? POLICY_EFF_DARWIN_BG : 0);
3001 bits |= (effective.lowpri_cpu ? POLICY_EFF_LOWPRI_CPU : 0);
3002 bits |= (effective.terminated ? POLICY_EFF_TERMINATED : 0);
3003 bits |= (effective.all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
3004 bits |= (effective.new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
3005 bits |= (effective.bg_iotier ? (((uint64_t)effective.bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0);
3006 bits |= (effective.qos_ui_is_urgent ? POLICY_EFF_QOS_UI_IS_URGENT : 0);
3007
3008 if (thread != THREAD_NULL)
3009 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
3010
3011 bits |= (effective.t_tal_engaged ? POLICY_EFF_TAL_ENGAGED : 0);
3012 bits |= (effective.t_suspended ? POLICY_EFF_SUSPENDED : 0);
3013 bits |= (effective.t_watchers_bg ? POLICY_EFF_WATCHERS_BG : 0);
3014 bits |= (effective.t_sup_active ? POLICY_EFF_SUP_ACTIVE : 0);
3015 bits |= (effective.t_suppressed_cpu ? POLICY_EFF_SUP_CPU : 0);
3016 bits |= (effective.t_role ? (((uint64_t)effective.t_role) << POLICY_EFF_ROLE_SHIFT) : 0);
3017 bits |= (effective.t_latency_qos ? (((uint64_t)effective.t_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
3018 bits |= (effective.t_through_qos ? (((uint64_t)effective.t_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
3019 bits |= (effective.t_sfi_managed ? POLICY_EFF_SFI_MANAGED : 0);
3020 bits |= (effective.t_qos_ceiling ? (((uint64_t)effective.t_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0);
3021
3022 return bits;
3023 }
3024
3025
3026 /*
3027 * Resource usage and CPU related routines
3028 */
3029
3030 int
3031 proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep)
3032 {
3033
3034 int error = 0;
3035 int scope;
3036
3037 task_lock(task);
3038
3039
3040 error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope);
3041 task_unlock(task);
3042
3043 /*
3044 * Reverse-map from CPU resource limit scopes back to policies (see comment below).
3045 */
3046 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
3047 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC;
3048 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3049 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE;
3050 } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) {
3051 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
3052 }
3053
3054 return(error);
3055 }
3056
3057 /*
3058 * Configure the default CPU usage monitor parameters.
3059 *
3060 * For tasks which have this mechanism activated: if any thread in the
3061 * process consumes more CPU than this, an EXC_RESOURCE exception will be generated.
3062 */
3063 void
3064 proc_init_cpumon_params(void)
3065 {
3066 /*
3067 * The max CPU percentage can be configured via the boot-args and
3068 * a key in the device tree. The boot-args are honored first, then the
3069 * device tree.
3070 */
3071 if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage,
3072 sizeof (proc_max_cpumon_percentage)))
3073 {
3074 uint64_t max_percentage = 0ULL;
3075
3076 if (!PE_get_default("kern.max_cpumon_percentage", &max_percentage,
3077 sizeof(max_percentage)))
3078 {
3079 max_percentage = DEFAULT_CPUMON_PERCENTAGE;
3080 }
3081
3082 assert(max_percentage <= UINT8_MAX);
3083 proc_max_cpumon_percentage = (uint8_t) max_percentage;
3084 }
3085
3086 if (proc_max_cpumon_percentage > 100) {
3087 proc_max_cpumon_percentage = 100;
3088 }
3089
3090 /*
3091 * The interval should be specified in seconds.
3092 *
3093 * Like the max CPU percentage, the max CPU interval can be configured
3094 * via boot-args and the device tree.
3095 */
3096 if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval,
3097 sizeof (proc_max_cpumon_interval)))
3098 {
3099 if (!PE_get_default("kern.max_cpumon_interval", &proc_max_cpumon_interval,
3100 sizeof(proc_max_cpumon_interval)))
3101 {
3102 proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
3103 }
3104 }
3105
3106 proc_max_cpumon_interval *= NSEC_PER_SEC;
3107
3108 /* TEMPORARY boot arg to control App suppression */
3109 PE_parse_boot_argn("task_policy_suppression_disable",
3110 &task_policy_suppression_disable,
3111 sizeof(task_policy_suppression_disable));
3112 }
3113
3114 /*
3115 * Currently supported configurations for CPU limits.
3116 *
3117 * Policy | Deadline-based CPU limit | Percentage-based CPU limit
3118 * -------------------------------------+--------------------------+------------------------------
3119 * PROC_POLICY_RSRCACT_THROTTLE | ENOTSUP | Task-wide scope only
3120 * PROC_POLICY_RSRCACT_SUSPEND | Task-wide scope only | ENOTSUP
3121 * PROC_POLICY_RSRCACT_TERMINATE | Task-wide scope only | ENOTSUP
3122 * PROC_POLICY_RSRCACT_NOTIFY_KQ | Task-wide scope only | ENOTSUP
3123 * PROC_POLICY_RSRCACT_NOTIFY_EXC | ENOTSUP | Per-thread scope only
3124 *
3125 * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed
3126 * after the specified amount of wallclock time has elapsed.
3127 *
3128 * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time
3129 * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an
3130 * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads
3131 * in the task are added together), or by any one thread in the task (so-called "per-thread" scope).
3132 *
3133 * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them
3134 * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action
3135 * after I have used some amount of CPU time; this is different than the recurring percentage/interval model)
3136 * but the potential consumer of the API at the time was insisting on wallclock time instead.
3137 *
3138 * Currently, requesting notification via an exception is the only way to get per-thread scope for a
3139 * CPU limit. All other types of notifications force task-wide scope for the limit.
3140 */
3141 int
3142 proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline,
3143 int cpumon_entitled)
3144 {
3145 int error = 0;
3146 int scope;
3147
3148 /*
3149 * Enforce the matrix of supported configurations for policy, percentage, and deadline.
3150 */
3151 switch (policy) {
3152 // If no policy is explicitly given, the default is to throttle.
3153 case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE:
3154 case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE:
3155 if (deadline != 0)
3156 return (ENOTSUP);
3157 scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
3158 break;
3159 case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND:
3160 case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE:
3161 case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ:
3162 if (percentage != 0)
3163 return (ENOTSUP);
3164 scope = TASK_RUSECPU_FLAGS_DEADLINE;
3165 break;
3166 case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC:
3167 if (deadline != 0)
3168 return (ENOTSUP);
3169 scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
3170 #ifdef CONFIG_NOMONITORS
3171 return (error);
3172 #endif /* CONFIG_NOMONITORS */
3173 break;
3174 default:
3175 return (EINVAL);
3176 }
3177
3178 task_lock(task);
3179 if (task != current_task()) {
3180 task->policy_ru_cpu_ext = policy;
3181 } else {
3182 task->policy_ru_cpu = policy;
3183 }
3184 error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled);
3185 task_unlock(task);
3186 return(error);
3187 }
3188
3189 int
3190 proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled)
3191 {
3192 int error = 0;
3193 int action;
3194 void * bsdinfo = NULL;
3195
3196 task_lock(task);
3197 if (task != current_task()) {
3198 task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
3199 } else {
3200 task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
3201 }
3202
3203 error = task_clear_cpuusage_locked(task, cpumon_entitled);
3204 if (error != 0)
3205 goto out;
3206
3207 action = task->applied_ru_cpu;
3208 if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
3209 /* reset action */
3210 task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
3211 }
3212 if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
3213 bsdinfo = task->bsd_info;
3214 task_unlock(task);
3215 proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
3216 goto out1;
3217 }
3218
3219 out:
3220 task_unlock(task);
3221 out1:
3222 return(error);
3223
3224 }
3225
3226 /* used to apply resource limit related actions */
3227 static int
3228 task_apply_resource_actions(task_t task, int type)
3229 {
3230 int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
3231 void * bsdinfo = NULL;
3232
3233 switch (type) {
3234 case TASK_POLICY_CPU_RESOURCE_USAGE:
3235 break;
3236 case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
3237 case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
3238 case TASK_POLICY_DISK_RESOURCE_USAGE:
3239 case TASK_POLICY_NETWORK_RESOURCE_USAGE:
3240 case TASK_POLICY_POWER_RESOURCE_USAGE:
3241 return(0);
3242
3243 default:
3244 return(1);
3245 };
3246
3247 /* only cpu actions for now */
3248 task_lock(task);
3249
3250 if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
3251 /* apply action */
3252 task->applied_ru_cpu_ext = task->policy_ru_cpu_ext;
3253 action = task->applied_ru_cpu_ext;
3254 } else {
3255 action = task->applied_ru_cpu_ext;
3256 }
3257
3258 if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
3259 bsdinfo = task->bsd_info;
3260 task_unlock(task);
3261 proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
3262 } else
3263 task_unlock(task);
3264
3265 return(0);
3266 }
3267
3268 /*
3269 * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API
3270 * only allows for one at a time. This means that if there is a per-thread limit active, the other
3271 * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest
3272 * to the caller, and prefer that, but there's no need for that at the moment.
3273 */
3274 int
3275 task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope)
3276 {
3277 *percentagep = 0;
3278 *intervalp = 0;
3279 *deadlinep = 0;
3280
3281 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) {
3282 *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
3283 *percentagep = task->rusage_cpu_perthr_percentage;
3284 *intervalp = task->rusage_cpu_perthr_interval;
3285 } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) {
3286 *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
3287 *percentagep = task->rusage_cpu_percentage;
3288 *intervalp = task->rusage_cpu_interval;
3289 } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) {
3290 *scope = TASK_RUSECPU_FLAGS_DEADLINE;
3291 *deadlinep = task->rusage_cpu_deadline;
3292 } else {
3293 *scope = 0;
3294 }
3295
3296 return(0);
3297 }
3298
3299 /*
3300 * Disable the CPU usage monitor for the task. Return value indicates
3301 * if the mechanism was actually enabled.
3302 */
3303 int
3304 task_disable_cpumon(task_t task) {
3305 thread_t thread;
3306
3307 task_lock_assert_owned(task);
3308
3309 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) {
3310 return (KERN_INVALID_ARGUMENT);
3311 }
3312
3313 #if CONFIG_TELEMETRY
3314 /*
3315 * Disable task-wide telemetry if it was ever enabled by the CPU usage
3316 * monitor's warning zone.
3317 */
3318 telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0);
3319 #endif
3320
3321 /*
3322 * Disable the monitor for the task, and propagate that change to each thread.
3323 */
3324 task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON);
3325 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3326 set_astledger(thread);
3327 }
3328 task->rusage_cpu_perthr_percentage = 0;
3329 task->rusage_cpu_perthr_interval = 0;
3330
3331 return (KERN_SUCCESS);
3332 }
3333
3334 int
3335 task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled)
3336 {
3337 thread_t thread;
3338 uint64_t abstime = 0;
3339 uint64_t limittime = 0;
3340
3341 lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED);
3342
3343 /* By default, refill once per second */
3344 if (interval == 0)
3345 interval = NSEC_PER_SEC;
3346
3347 if (percentage != 0) {
3348 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
3349 boolean_t warn = FALSE;
3350
3351 /*
3352 * A per-thread CPU limit on a task generates an exception
3353 * (LEDGER_ACTION_EXCEPTION) if any one thread in the task
3354 * exceeds the limit.
3355 */
3356
3357 if (percentage == TASK_POLICY_CPUMON_DISABLE) {
3358 if (cpumon_entitled) {
3359 task_disable_cpumon(task);
3360 return (0);
3361 }
3362
3363 /*
3364 * This task wishes to disable the CPU usage monitor, but it's
3365 * missing the required entitlement:
3366 * com.apple.private.kernel.override-cpumon
3367 *
3368 * Instead, treat this as a request to reset its params
3369 * back to the defaults.
3370 */
3371 warn = TRUE;
3372 percentage = TASK_POLICY_CPUMON_DEFAULTS;
3373 }
3374
3375 if (percentage == TASK_POLICY_CPUMON_DEFAULTS) {
3376 percentage = proc_max_cpumon_percentage;
3377 interval = proc_max_cpumon_interval;
3378 }
3379
3380 if (percentage > 100) {
3381 percentage = 100;
3382 }
3383
3384 /*
3385 * Passing in an interval of -1 means either:
3386 * - Leave the interval as-is, if there's already a per-thread
3387 * limit configured
3388 * - Use the system default.
3389 */
3390 if (interval == -1ULL) {
3391 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
3392 interval = task->rusage_cpu_perthr_interval;
3393 } else {
3394 interval = proc_max_cpumon_interval;
3395 }
3396 }
3397
3398 /*
3399 * Enforce global caps on CPU usage monitor here if the process is not
3400 * entitled to escape the global caps.
3401 */
3402 if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) {
3403 warn = TRUE;
3404 percentage = proc_max_cpumon_percentage;
3405 }
3406
3407 if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) {
3408 warn = TRUE;
3409 interval = proc_max_cpumon_interval;
3410 }
3411
3412 if (warn) {
3413 int pid = 0;
3414 const char *procname = "unknown";
3415
3416 #ifdef MACH_BSD
3417 pid = proc_selfpid();
3418 if (current_task()->bsd_info != NULL) {
3419 procname = proc_name_address(current_task()->bsd_info);
3420 }
3421 #endif
3422
3423 printf("process %s[%d] denied attempt to escape CPU monitor"
3424 " (missing required entitlement).\n", procname, pid);
3425 }
3426
3427 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
3428 task->rusage_cpu_perthr_percentage = percentage;
3429 task->rusage_cpu_perthr_interval = interval;
3430 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3431 set_astledger(thread);
3432 }
3433 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3434 /*
3435 * Currently, a proc-wide CPU limit always blocks if the limit is
3436 * exceeded (LEDGER_ACTION_BLOCK).
3437 */
3438 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT;
3439 task->rusage_cpu_percentage = percentage;
3440 task->rusage_cpu_interval = interval;
3441
3442 limittime = (interval * percentage) / 100;
3443 nanoseconds_to_absolutetime(limittime, &abstime);
3444
3445 ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0);
3446 ledger_set_period(task->ledger, task_ledgers.cpu_time, interval);
3447 ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
3448 }
3449 }
3450
3451 if (deadline != 0) {
3452 assert(scope == TASK_RUSECPU_FLAGS_DEADLINE);
3453
3454 /* if already in use, cancel and wait for it to cleanout */
3455 if (task->rusage_cpu_callt != NULL) {
3456 task_unlock(task);
3457 thread_call_cancel_wait(task->rusage_cpu_callt);
3458 task_lock(task);
3459 }
3460 if (task->rusage_cpu_callt == NULL) {
3461 task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL);
3462 }
3463 /* setup callout */
3464 if (task->rusage_cpu_callt != 0) {
3465 uint64_t save_abstime = 0;
3466
3467 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE;
3468 task->rusage_cpu_deadline = deadline;
3469
3470 nanoseconds_to_absolutetime(deadline, &abstime);
3471 save_abstime = abstime;
3472 clock_absolutetime_interval_to_deadline(save_abstime, &abstime);
3473 thread_call_enter_delayed(task->rusage_cpu_callt, abstime);
3474 }
3475 }
3476
3477 return(0);
3478 }
3479
3480 int
3481 task_clear_cpuusage(task_t task, int cpumon_entitled)
3482 {
3483 int retval = 0;
3484
3485 task_lock(task);
3486 retval = task_clear_cpuusage_locked(task, cpumon_entitled);
3487 task_unlock(task);
3488
3489 return(retval);
3490 }
3491
3492 int
3493 task_clear_cpuusage_locked(task_t task, int cpumon_entitled)
3494 {
3495 thread_call_t savecallt;
3496
3497 /* cancel percentage handling if set */
3498 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3499 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT;
3500 ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
3501 task->rusage_cpu_percentage = 0;
3502 task->rusage_cpu_interval = 0;
3503 }
3504
3505 /*
3506 * Disable the CPU usage monitor.
3507 */
3508 if (cpumon_entitled) {
3509 task_disable_cpumon(task);
3510 }
3511
3512 /* cancel deadline handling if set */
3513 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) {
3514 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE;
3515 if (task->rusage_cpu_callt != 0) {
3516 savecallt = task->rusage_cpu_callt;
3517 task->rusage_cpu_callt = NULL;
3518 task->rusage_cpu_deadline = 0;
3519 task_unlock(task);
3520 thread_call_cancel_wait(savecallt);
3521 thread_call_free(savecallt);
3522 task_lock(task);
3523 }
3524 }
3525 return(0);
3526 }
3527
3528 /* called by ledger unit to enforce action due to resource usage criteria being met */
3529 void
3530 task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1)
3531 {
3532 task_t task = (task_t)param0;
3533 (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE);
3534 return;
3535 }
3536
3537
3538 /*
3539 * Routines for taskwatch and pidbind
3540 */
3541
3542
3543 /*
3544 * Routines for importance donation/inheritance/boosting
3545 */
3546
3547 static void
3548 task_importance_update_live_donor(task_t target_task)
3549 {
3550 #if IMPORTANCE_INHERITANCE
3551
3552 ipc_importance_task_t task_imp;
3553
3554 task_imp = ipc_importance_for_task(target_task, FALSE);
3555 if (IIT_NULL != task_imp) {
3556 ipc_importance_task_update_live_donor(task_imp);
3557 ipc_importance_task_release(task_imp);
3558 }
3559 #endif /* IMPORTANCE_INHERITANCE */
3560 }
3561
3562 void
3563 task_importance_mark_donor(task_t task, boolean_t donating)
3564 {
3565 #if IMPORTANCE_INHERITANCE
3566 ipc_importance_task_t task_imp;
3567
3568 task_imp = ipc_importance_for_task(task, FALSE);
3569 if (IIT_NULL != task_imp) {
3570 ipc_importance_task_mark_donor(task_imp, donating);
3571 ipc_importance_task_release(task_imp);
3572 }
3573 #endif /* IMPORTANCE_INHERITANCE */
3574 }
3575
3576 void
3577 task_importance_mark_live_donor(task_t task, boolean_t live_donating)
3578 {
3579 #if IMPORTANCE_INHERITANCE
3580 ipc_importance_task_t task_imp;
3581
3582 task_imp = ipc_importance_for_task(task, FALSE);
3583 if (IIT_NULL != task_imp) {
3584 ipc_importance_task_mark_live_donor(task_imp, live_donating);
3585 ipc_importance_task_release(task_imp);
3586 }
3587 #endif /* IMPORTANCE_INHERITANCE */
3588 }
3589
3590 void
3591 task_importance_mark_receiver(task_t task, boolean_t receiving)
3592 {
3593 #if IMPORTANCE_INHERITANCE
3594 ipc_importance_task_t task_imp;
3595
3596 task_imp = ipc_importance_for_task(task, FALSE);
3597 if (IIT_NULL != task_imp) {
3598 ipc_importance_task_mark_receiver(task_imp, receiving);
3599 ipc_importance_task_release(task_imp);
3600 }
3601 #endif /* IMPORTANCE_INHERITANCE */
3602 }
3603
3604 void
3605 task_importance_mark_denap_receiver(task_t task, boolean_t denap)
3606 {
3607 #if IMPORTANCE_INHERITANCE
3608 ipc_importance_task_t task_imp;
3609
3610 task_imp = ipc_importance_for_task(task, FALSE);
3611 if (IIT_NULL != task_imp) {
3612 ipc_importance_task_mark_denap_receiver(task_imp, denap);
3613 ipc_importance_task_release(task_imp);
3614 }
3615 #endif /* IMPORTANCE_INHERITANCE */
3616 }
3617
3618 void
3619 task_importance_reset(__imp_only task_t task)
3620 {
3621 #if IMPORTANCE_INHERITANCE
3622 ipc_importance_task_t task_imp;
3623
3624 /* TODO: Lower importance downstream before disconnect */
3625 task_imp = task->task_imp_base;
3626 ipc_importance_reset(task_imp, FALSE);
3627 task_importance_update_live_donor(task);
3628 #endif /* IMPORTANCE_INHERITANCE */
3629 }
3630
3631 #if IMPORTANCE_INHERITANCE
3632
3633 /*
3634 * Sets the task boost bit to the provided value. Does NOT run the update function.
3635 *
3636 * Task lock must be held.
3637 */
3638 void
3639 task_set_boost_locked(task_t task, boolean_t boost_active)
3640 {
3641 #if IMPORTANCE_DEBUG
3642 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START),
3643 proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
3644 #endif
3645
3646 task->requested_policy.t_boosted = boost_active;
3647
3648 #if IMPORTANCE_DEBUG
3649 if (boost_active == TRUE){
3650 DTRACE_BOOST2(boost, task_t, task, int, task_pid(task));
3651 } else {
3652 DTRACE_BOOST2(unboost, task_t, task, int, task_pid(task));
3653 }
3654 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END),
3655 proc_selfpid(), task_pid(task),
3656 trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
3657 #endif
3658 }
3659
3660 /*
3661 * Sets the task boost bit to the provided value and applies the update.
3662 *
3663 * Task lock must be held. Must call update complete after unlocking the task.
3664 */
3665 void
3666 task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token)
3667 {
3668 task_set_boost_locked(task, boost_active);
3669
3670 task_policy_update_locked(task, THREAD_NULL, pend_token);
3671 }
3672
3673 /*
3674 * Check if this task should donate importance.
3675 *
3676 * May be called without taking the task lock. In that case, donor status can change
3677 * so you must check only once for each donation event.
3678 */
3679 boolean_t
3680 task_is_importance_donor(task_t task)
3681 {
3682 if (task->task_imp_base == IIT_NULL)
3683 return FALSE;
3684 return ipc_importance_task_is_donor(task->task_imp_base);
3685 }
3686
3687 /*
3688 * Query the status of the task's donor mark.
3689 */
3690 boolean_t
3691 task_is_marked_importance_donor(task_t task)
3692 {
3693 if (task->task_imp_base == IIT_NULL)
3694 return FALSE;
3695 return ipc_importance_task_is_marked_donor(task->task_imp_base);
3696 }
3697
3698 /*
3699 * Query the status of the task's live donor and donor mark.
3700 */
3701 boolean_t
3702 task_is_marked_live_importance_donor(task_t task)
3703 {
3704 if (task->task_imp_base == IIT_NULL)
3705 return FALSE;
3706 return ipc_importance_task_is_marked_live_donor(task->task_imp_base);
3707 }
3708
3709
3710 /*
3711 * This routine may be called without holding task lock
3712 * since the value of imp_receiver can never be unset.
3713 */
3714 boolean_t
3715 task_is_importance_receiver(task_t task)
3716 {
3717 if (task->task_imp_base == IIT_NULL)
3718 return FALSE;
3719 return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3720 }
3721
3722 /*
3723 * Query the task's receiver mark.
3724 */
3725 boolean_t
3726 task_is_marked_importance_receiver(task_t task)
3727 {
3728 if (task->task_imp_base == IIT_NULL)
3729 return FALSE;
3730 return ipc_importance_task_is_marked_receiver(task->task_imp_base);
3731 }
3732
3733 /*
3734 * This routine may be called without holding task lock
3735 * since the value of de-nap receiver can never be unset.
3736 */
3737 boolean_t
3738 task_is_importance_denap_receiver(task_t task)
3739 {
3740 if (task->task_imp_base == IIT_NULL)
3741 return FALSE;
3742 return ipc_importance_task_is_denap_receiver(task->task_imp_base);
3743 }
3744
3745 /*
3746 * Query the task's de-nap receiver mark.
3747 */
3748 boolean_t
3749 task_is_marked_importance_denap_receiver(task_t task)
3750 {
3751 if (task->task_imp_base == IIT_NULL)
3752 return FALSE;
3753 return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base);
3754 }
3755
3756 /*
3757 * This routine may be called without holding task lock
3758 * since the value of imp_receiver can never be unset.
3759 */
3760 boolean_t
3761 task_is_importance_receiver_type(task_t task)
3762 {
3763 if (task->task_imp_base == IIT_NULL)
3764 return FALSE;
3765 return (task_is_importance_receiver(task) ||
3766 task_is_importance_denap_receiver(task));
3767 }
3768
3769 /*
3770 * External importance assertions are managed by the process in userspace
3771 * Internal importance assertions are the responsibility of the kernel
3772 * Assertions are changed from internal to external via task_importance_externalize_assertion
3773 */
3774
3775 int
3776 task_importance_hold_watchport_assertion(task_t target_task, uint32_t count)
3777 {
3778 ipc_importance_task_t task_imp;
3779 kern_return_t ret;
3780
3781 /* must already have set up an importance */
3782 task_imp = target_task->task_imp_base;
3783 assert(IIT_NULL != task_imp);
3784
3785 ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3786 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3787 }
3788
3789 int
3790 task_importance_hold_internal_assertion(task_t target_task, uint32_t count)
3791 {
3792 ipc_importance_task_t task_imp;
3793 kern_return_t ret;
3794
3795 /* may be first time, so allow for possible importance setup */
3796 task_imp = ipc_importance_for_task(target_task, FALSE);
3797 if (IIT_NULL == task_imp) {
3798 return EOVERFLOW;
3799 }
3800 ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3801 ipc_importance_task_release(task_imp);
3802
3803 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3804 }
3805
3806 int
3807 task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count)
3808 {
3809 ipc_importance_task_t task_imp;
3810 kern_return_t ret;
3811
3812 /* may be first time, so allow for possible importance setup */
3813 task_imp = ipc_importance_for_task(target_task, FALSE);
3814 if (IIT_NULL == task_imp) {
3815 return EOVERFLOW;
3816 }
3817 ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count);
3818 ipc_importance_task_release(task_imp);
3819
3820 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3821 }
3822
3823 int
3824 task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count)
3825 {
3826 ipc_importance_task_t task_imp;
3827 kern_return_t ret;
3828
3829 /* must already have set up an importance */
3830 task_imp = target_task->task_imp_base;
3831 if (IIT_NULL == task_imp) {
3832 return EOVERFLOW;
3833 }
3834 ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count);
3835 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3836 }
3837
3838 int
3839 task_importance_drop_internal_assertion(task_t target_task, uint32_t count)
3840 {
3841 ipc_importance_task_t task_imp;
3842 kern_return_t ret;
3843
3844 /* must already have set up an importance */
3845 task_imp = target_task->task_imp_base;
3846 if (IIT_NULL == task_imp) {
3847 return EOVERFLOW;
3848 }
3849 ret = ipc_importance_task_drop_internal_assertion(target_task->task_imp_base, count);
3850 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3851 }
3852
3853 int
3854 task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count)
3855 {
3856 ipc_importance_task_t task_imp;
3857 kern_return_t ret;
3858
3859 /* must already have set up an importance */
3860 task_imp = target_task->task_imp_base;
3861 if (IIT_NULL == task_imp) {
3862 return EOVERFLOW;
3863 }
3864 ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count);
3865 return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3866 }
3867
3868 int
3869 task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count)
3870 {
3871 ipc_importance_task_t task_imp;
3872 kern_return_t ret;
3873
3874 /* must already have set up an importance */
3875 task_imp = target_task->task_imp_base;
3876 if (IIT_NULL == task_imp) {
3877 return EOVERFLOW;
3878 }
3879 ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count);
3880 return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3881 }
3882
3883 static void
3884 task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
3885 {
3886 int boost = 0;
3887
3888 __impdebug_only int released_pid = 0;
3889 __impdebug_only int pid = task_pid(task);
3890
3891 ipc_importance_task_t release_imp_task = IIT_NULL;
3892
3893 if (IP_VALID(port) != 0) {
3894 ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE);
3895
3896 ip_lock(port);
3897
3898 /*
3899 * The port must have been marked tempowner already.
3900 * This also filters out ports whose receive rights
3901 * are already enqueued in a message, as you can't
3902 * change the right's destination once it's already
3903 * on its way.
3904 */
3905 if (port->ip_tempowner != 0) {
3906 assert(port->ip_impdonation != 0);
3907
3908 boost = port->ip_impcount;
3909 if (IIT_NULL != port->ip_imp_task) {
3910 /*
3911 * if this port is already bound to a task,
3912 * release the task reference and drop any
3913 * watchport-forwarded boosts
3914 */
3915 release_imp_task = port->ip_imp_task;
3916 port->ip_imp_task = IIT_NULL;
3917 }
3918
3919 /* mark the port is watching another task (reference held in port->ip_imp_task) */
3920 if (ipc_importance_task_is_marked_receiver(new_imp_task)) {
3921 port->ip_imp_task = new_imp_task;
3922 new_imp_task = IIT_NULL;
3923 }
3924 }
3925 ip_unlock(port);
3926
3927 if (IIT_NULL != new_imp_task) {
3928 ipc_importance_task_release(new_imp_task);
3929 }
3930
3931 if (IIT_NULL != release_imp_task) {
3932 if (boost > 0)
3933 ipc_importance_task_drop_internal_assertion(release_imp_task, boost);
3934
3935 // released_pid = task_pid(release_imp_task); /* TODO: Need ref-safe way to get pid */
3936 ipc_importance_task_release(release_imp_task);
3937 }
3938 #if IMPORTANCE_DEBUG
3939 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE,
3940 proc_selfpid(), pid, boost, released_pid, 0);
3941 #endif /* IMPORTANCE_DEBUG */
3942 }
3943
3944 *boostp = boost;
3945 return;
3946 }
3947
3948 #endif /* IMPORTANCE_INHERITANCE */
3949
3950 /*
3951 * Routines for VM to query task importance
3952 */
3953
3954
3955 /*
3956 * Order to be considered while estimating importance
3957 * for low memory notification and purging purgeable memory.
3958 */
3959 #define TASK_IMPORTANCE_FOREGROUND 4
3960 #define TASK_IMPORTANCE_NOTDARWINBG 1
3961
3962
3963 /*
3964 * (Un)Mark the task as a privileged listener for memory notifications.
3965 * if marked, this task will be among the first to be notified amongst
3966 * the bulk of all other tasks when the system enters a pressure level
3967 * of interest to this task.
3968 */
3969 int
3970 task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value)
3971 {
3972 if (old_value != NULL) {
3973 *old_value = (boolean_t)task->low_mem_privileged_listener;
3974 } else {
3975 task_lock(task);
3976 task->low_mem_privileged_listener = (uint32_t)new_value;
3977 task_unlock(task);
3978 }
3979
3980 return 0;
3981 }
3982
3983 /*
3984 * Checks if the task is already notified.
3985 *
3986 * Condition: task lock should be held while calling this function.
3987 */
3988 boolean_t
3989 task_has_been_notified(task_t task, int pressurelevel)
3990 {
3991 if (task == NULL) {
3992 return FALSE;
3993 }
3994
3995 if (pressurelevel == kVMPressureWarning)
3996 return (task->low_mem_notified_warn ? TRUE : FALSE);
3997 else if (pressurelevel == kVMPressureCritical)
3998 return (task->low_mem_notified_critical ? TRUE : FALSE);
3999 else
4000 return TRUE;
4001 }
4002
4003
4004 /*
4005 * Checks if the task is used for purging.
4006 *
4007 * Condition: task lock should be held while calling this function.
4008 */
4009 boolean_t
4010 task_used_for_purging(task_t task, int pressurelevel)
4011 {
4012 if (task == NULL) {
4013 return FALSE;
4014 }
4015
4016 if (pressurelevel == kVMPressureWarning)
4017 return (task->purged_memory_warn ? TRUE : FALSE);
4018 else if (pressurelevel == kVMPressureCritical)
4019 return (task->purged_memory_critical ? TRUE : FALSE);
4020 else
4021 return TRUE;
4022 }
4023
4024
4025 /*
4026 * Mark the task as notified with memory notification.
4027 *
4028 * Condition: task lock should be held while calling this function.
4029 */
4030 void
4031 task_mark_has_been_notified(task_t task, int pressurelevel)
4032 {
4033 if (task == NULL) {
4034 return;
4035 }
4036
4037 if (pressurelevel == kVMPressureWarning)
4038 task->low_mem_notified_warn = 1;
4039 else if (pressurelevel == kVMPressureCritical)
4040 task->low_mem_notified_critical = 1;
4041 }
4042
4043
4044 /*
4045 * Mark the task as purged.
4046 *
4047 * Condition: task lock should be held while calling this function.
4048 */
4049 void
4050 task_mark_used_for_purging(task_t task, int pressurelevel)
4051 {
4052 if (task == NULL) {
4053 return;
4054 }
4055
4056 if (pressurelevel == kVMPressureWarning)
4057 task->purged_memory_warn = 1;
4058 else if (pressurelevel == kVMPressureCritical)
4059 task->purged_memory_critical = 1;
4060 }
4061
4062
4063 /*
4064 * Mark the task eligible for low memory notification.
4065 *
4066 * Condition: task lock should be held while calling this function.
4067 */
4068 void
4069 task_clear_has_been_notified(task_t task, int pressurelevel)
4070 {
4071 if (task == NULL) {
4072 return;
4073 }
4074
4075 if (pressurelevel == kVMPressureWarning)
4076 task->low_mem_notified_warn = 0;
4077 else if (pressurelevel == kVMPressureCritical)
4078 task->low_mem_notified_critical = 0;
4079 }
4080
4081
4082 /*
4083 * Mark the task eligible for purging its purgeable memory.
4084 *
4085 * Condition: task lock should be held while calling this function.
4086 */
4087 void
4088 task_clear_used_for_purging(task_t task)
4089 {
4090 if (task == NULL) {
4091 return;
4092 }
4093
4094 task->purged_memory_warn = 0;
4095 task->purged_memory_critical = 0;
4096 }
4097
4098
4099 /*
4100 * Estimate task importance for purging its purgeable memory
4101 * and low memory notification.
4102 *
4103 * Importance is calculated in the following order of criteria:
4104 * -Task role : Background vs Foreground
4105 * -Boost status: Not boosted vs Boosted
4106 * -Darwin BG status.
4107 *
4108 * Returns: Estimated task importance. Less important task will have lower
4109 * estimated importance.
4110 */
4111 int
4112 task_importance_estimate(task_t task)
4113 {
4114 int task_importance = 0;
4115
4116 if (task == NULL) {
4117 return 0;
4118 }
4119
4120 if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION)
4121 task_importance += TASK_IMPORTANCE_FOREGROUND;
4122
4123 if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0)
4124 task_importance += TASK_IMPORTANCE_NOTDARWINBG;
4125
4126 return task_importance;
4127 }
4128