]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/task_policy.c
xnu-3248.50.21.tar.gz
[apple/xnu.git] / osfmk / kern / task_policy.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
1c79356b 28
91447636
A
29#include <mach/mach_types.h>
30#include <mach/task_server.h>
31
32#include <kern/sched.h>
0b4e3aa0 33#include <kern/task.h>
6d2010ae
A
34#include <mach/thread_policy.h>
35#include <sys/errno.h>
36#include <sys/resource.h>
37#include <machine/limits.h>
316670eb
A
38#include <kern/ledger.h>
39#include <kern/thread_call.h>
fe8ab488 40#include <kern/sfi.h>
a1c7dba1 41#include <kern/coalition.h>
39236c6e
A
42#if CONFIG_TELEMETRY
43#include <kern/telemetry.h>
44#endif
45
fe8ab488
A
46#if IMPORTANCE_INHERITANCE
47#include <ipc/ipc_importance.h>
39236c6e
A
48#if IMPORTANCE_DEBUG
49#include <mach/machine/sdt.h>
50#endif /* IMPORTANCE_DEBUG */
fe8ab488 51#endif /* IMPORTANCE_INHERITACE */
39236c6e 52
316670eb
A
53#include <sys/kdebug.h>
54
39236c6e
A
55/*
56 * Task Policy
57 *
58 * This subsystem manages task and thread IO priority and backgrounding,
59 * as well as importance inheritance, process suppression, task QoS, and apptype.
60 * These properties have a suprising number of complex interactions, so they are
61 * centralized here in one state machine to simplify the implementation of those interactions.
62 *
63 * Architecture:
64 * Threads and tasks have three policy fields: requested, effective, and pending.
65 * Requested represents the wishes of each interface that influences task policy.
66 * Effective represents the distillation of that policy into a set of behaviors.
67 * Pending represents updates that haven't been applied yet.
68 *
69 * Each interface that has an input into the task policy state machine controls a field in requested.
70 * If the interface has a getter, it returns what is in the field in requested, but that is
71 * not necessarily what is actually in effect.
72 *
73 * All kernel subsystems that behave differently based on task policy call into
74 * the get_effective_policy function, which returns the decision of the task policy state machine
75 * for that subsystem by querying only the 'effective' field.
76 *
77 * Policy change operations:
78 * Here are the steps to change a policy on a task or thread:
79 * 1) Lock task
80 * 2) Change requested field for the relevant policy
81 * 3) Run a task policy update, which recalculates effective based on requested,
82 * then takes a diff between the old and new versions of requested and calls the relevant
83 * other subsystems to apply these changes, and updates the pending field.
84 * 4) Unlock task
85 * 5) Run task policy update complete, which looks at the pending field to update
86 * subsystems which cannot be touched while holding the task lock.
87 *
88 * To add a new requested policy, add the field in the requested struct, the flavor in task.h,
89 * the setter and getter in proc_(set|get)_task_policy*, and dump the state in task_requested_bitfield,
fe8ab488
A
90 * then set up the effects of that behavior in task_policy_update*. If the policy manifests
91 * itself as a distinct effective policy, add it to the effective struct and add it to the
92 * proc_get_effective_policy accessor.
39236c6e
A
93 *
94 * Most policies are set via proc_set_task_policy, but policies that don't fit that interface
95 * roll their own lock/set/update/unlock/complete code inside this file.
96 *
97 *
98 * Suppression policy
99 *
100 * These are a set of behaviors that can be requested for a task. They currently have specific
101 * implied actions when they're enabled, but they may be made customizable in the future.
102 *
103 * When the affected task is boosted, we temporarily disable the suppression behaviors
104 * so that the affected process has a chance to run so it can call the API to permanently
105 * disable the suppression behaviors.
106 *
107 * Locking
108 *
109 * Changing task policy on a task or thread takes the task lock, and not the thread lock.
110 * TODO: Should changing policy on a thread take the thread lock instead?
111 *
112 * Querying the effective policy does not take the task lock, to prevent deadlocks or slowdown in sensitive code.
113 * This means that any notification of state change needs to be externally synchronized.
114 *
115 */
116
fe8ab488
A
117extern const qos_policy_params_t thread_qos_policy_params;
118
39236c6e
A
119/* for task holds without dropping the lock */
120extern void task_hold_locked(task_t task);
121extern void task_release_locked(task_t task);
122extern void task_wait_locked(task_t task, boolean_t until_not_runnable);
123
fe8ab488
A
124extern void thread_recompute_qos(thread_t thread);
125
39236c6e
A
126/* Task policy related helper functions */
127static void proc_set_task_policy_locked(task_t task, thread_t thread, int category, int flavor, int value);
fe8ab488 128static void proc_set_task_policy2_locked(task_t task, thread_t thread, int category, int flavor, int value1, int value2);
6d2010ae 129
fe8ab488
A
130static void task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token);
131static void task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token);
132static void task_policy_update_task_locked(task_t task, boolean_t update_throttle, boolean_t update_bg_throttle, boolean_t update_sfi);
133static void task_policy_update_thread_locked(thread_t thread, int update_cpu, boolean_t update_throttle, boolean_t update_sfi, boolean_t update_qos);
3e170ce0
A
134
135#if CONFIG_SCHED_SFI
a1c7dba1 136static boolean_t task_policy_update_coalition_focal_tasks(task_t task, int prev_role, int next_role);
3e170ce0 137#endif
39236c6e
A
138
139static int proc_get_effective_policy(task_t task, thread_t thread, int policy);
140
141static void proc_iopol_to_tier(int iopolicy, int *tier, int *passive);
142static int proc_tier_to_iopol(int tier, int passive);
143
fe8ab488
A
144static uintptr_t trequested_0(task_t task, thread_t thread);
145static uintptr_t trequested_1(task_t task, thread_t thread);
146static uintptr_t teffective_0(task_t task, thread_t thread);
147static uintptr_t teffective_1(task_t task, thread_t thread);
148static uint32_t tpending(task_pend_token_t pend_token);
39236c6e
A
149static uint64_t task_requested_bitfield(task_t task, thread_t thread);
150static uint64_t task_effective_bitfield(task_t task, thread_t thread);
39236c6e
A
151
152void proc_get_thread_policy(thread_t thread, thread_policy_state_t info);
153
154/* CPU Limits related helper functions */
155static int task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope);
156int task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int entitled);
157static int task_clear_cpuusage_locked(task_t task, int cpumon_entitled);
158int task_disable_cpumon(task_t task);
6d2010ae 159static int task_apply_resource_actions(task_t task, int type);
316670eb 160void task_action_cpuusage(thread_call_param_t param0, thread_call_param_t param1);
39236c6e
A
161void proc_init_cpumon_params(void);
162
163#ifdef MACH_BSD
164int proc_pid(void *proc);
165extern int proc_selfpid(void);
166extern char * proc_name_address(void *p);
167extern void rethrottle_thread(void * uthread);
fe8ab488 168extern void proc_apply_task_networkbg(void * bsd_info, thread_t thread);
39236c6e 169#endif /* MACH_BSD */
0b4e3aa0 170
a1c7dba1
A
171extern zone_t thread_qos_override_zone;
172static boolean_t _proc_thread_qos_remove_override_internal(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type, boolean_t reset);
173
316670eb 174
39236c6e
A
175/* Importance Inheritance related helper functions */
176
39236c6e 177#if IMPORTANCE_INHERITANCE
39236c6e 178
fe8ab488
A
179static void task_add_importance_watchport(task_t task, mach_port_t port, int *boostp);
180static void task_importance_update_live_donor(task_t target_task);
181
39236c6e
A
182#endif /* IMPORTANCE_INHERITANCE */
183
184#if IMPORTANCE_DEBUG
185#define __impdebug_only
186#else
187#define __impdebug_only __unused
188#endif
189
190#if IMPORTANCE_INHERITANCE
191#define __imp_only
192#else
193#define __imp_only __unused
194#endif
195
196#define TASK_LOCKED 1
197#define TASK_UNLOCKED 0
198
199#define DO_LOWPRI_CPU 1
200#define UNDO_LOWPRI_CPU 2
201
202/* Macros for making tracing simpler */
203
3e170ce0 204#define tpriority(task, thread) ((uintptr_t)(thread == THREAD_NULL ? (task->priority) : (thread->base_pri)))
39236c6e 205#define tisthread(thread) (thread == THREAD_NULL ? TASK_POLICY_TASK : TASK_POLICY_THREAD)
3e170ce0 206#define targetid(task, thread) ((uintptr_t)(thread == THREAD_NULL ? (task_pid(task)) : (thread->thread_id)))
39236c6e
A
207
208/*
209 * Default parameters for certain policies
210 */
211
212int proc_standard_daemon_tier = THROTTLE_LEVEL_TIER1;
213int proc_suppressed_disk_tier = THROTTLE_LEVEL_TIER1;
214int proc_tal_disk_tier = THROTTLE_LEVEL_TIER1;
215
216int proc_graphics_timer_qos = (LATENCY_QOS_TIER_0 & 0xFF);
217
218const int proc_default_bg_iotier = THROTTLE_LEVEL_TIER2;
219
fe8ab488 220/* Latency/throughput QoS fields remain zeroed, i.e. TIER_UNSPECIFIED at creation */
39236c6e
A
221const struct task_requested_policy default_task_requested_policy = {
222 .bg_iotier = proc_default_bg_iotier
223};
224const struct task_effective_policy default_task_effective_policy = {};
225const struct task_pended_policy default_task_pended_policy = {};
226
316670eb 227/*
39236c6e
A
228 * Default parameters for CPU usage monitor.
229 *
230 * Default setting is 50% over 3 minutes.
316670eb 231 */
39236c6e
A
232#define DEFAULT_CPUMON_PERCENTAGE 50
233#define DEFAULT_CPUMON_INTERVAL (3 * 60)
234
235uint8_t proc_max_cpumon_percentage;
236uint64_t proc_max_cpumon_interval;
237
fe8ab488
A
238kern_return_t
239qos_latency_policy_validate(task_latency_qos_t ltier) {
240 if ((ltier != LATENCY_QOS_TIER_UNSPECIFIED) &&
241 ((ltier > LATENCY_QOS_TIER_5) || (ltier < LATENCY_QOS_TIER_0)))
242 return KERN_INVALID_ARGUMENT;
243
244 return KERN_SUCCESS;
245}
246
247kern_return_t
248qos_throughput_policy_validate(task_throughput_qos_t ttier) {
249 if ((ttier != THROUGHPUT_QOS_TIER_UNSPECIFIED) &&
250 ((ttier > THROUGHPUT_QOS_TIER_5) || (ttier < THROUGHPUT_QOS_TIER_0)))
251 return KERN_INVALID_ARGUMENT;
252
253 return KERN_SUCCESS;
254}
255
316670eb 256static kern_return_t
39236c6e
A
257task_qos_policy_validate(task_qos_policy_t qosinfo, mach_msg_type_number_t count) {
258 if (count < TASK_QOS_POLICY_COUNT)
259 return KERN_INVALID_ARGUMENT;
316670eb 260
39236c6e
A
261 task_latency_qos_t ltier = qosinfo->task_latency_qos_tier;
262 task_throughput_qos_t ttier = qosinfo->task_throughput_qos_tier;
316670eb 263
fe8ab488 264 kern_return_t kr = qos_latency_policy_validate(ltier);
39236c6e 265
fe8ab488
A
266 if (kr != KERN_SUCCESS)
267 return kr;
39236c6e 268
fe8ab488
A
269 kr = qos_throughput_policy_validate(ttier);
270
271 return kr;
39236c6e
A
272}
273
fe8ab488
A
274uint32_t
275qos_extract(uint32_t qv) {
39236c6e
A
276 return (qv & 0xFF);
277}
278
fe8ab488
A
279uint32_t
280qos_latency_policy_package(uint32_t qv) {
39236c6e 281 return (qv == LATENCY_QOS_TIER_UNSPECIFIED) ? LATENCY_QOS_TIER_UNSPECIFIED : ((0xFF << 16) | qv);
316670eb
A
282}
283
fe8ab488
A
284uint32_t
285qos_throughput_policy_package(uint32_t qv) {
39236c6e
A
286 return (qv == THROUGHPUT_QOS_TIER_UNSPECIFIED) ? THROUGHPUT_QOS_TIER_UNSPECIFIED : ((0xFE << 16) | qv);
287}
1c79356b 288
fe8ab488
A
289/* TEMPORARY boot-arg controlling task_policy suppression (App Nap) */
290static boolean_t task_policy_suppression_disable = FALSE;
291
1c79356b
A
292kern_return_t
293task_policy_set(
294 task_t task,
295 task_policy_flavor_t flavor,
296 task_policy_t policy_info,
297 mach_msg_type_number_t count)
298{
0b4e3aa0
A
299 kern_return_t result = KERN_SUCCESS;
300
301 if (task == TASK_NULL || task == kernel_task)
1c79356b
A
302 return (KERN_INVALID_ARGUMENT);
303
304 switch (flavor) {
305
39236c6e 306 case TASK_CATEGORY_POLICY: {
6d2010ae 307 task_category_policy_t info = (task_category_policy_t)policy_info;
1c79356b
A
308
309 if (count < TASK_CATEGORY_POLICY_COUNT)
310 return (KERN_INVALID_ARGUMENT);
311
6d2010ae 312
316670eb 313 switch(info->role) {
39236c6e
A
314 case TASK_FOREGROUND_APPLICATION:
315 case TASK_BACKGROUND_APPLICATION:
316 case TASK_DEFAULT_APPLICATION:
317 proc_set_task_policy(task, THREAD_NULL,
318 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
319 info->role);
320 break;
316670eb 321
39236c6e
A
322 case TASK_CONTROL_APPLICATION:
323 if (task != current_task() || task->sec_token.val[0] != 0)
324 result = KERN_INVALID_ARGUMENT;
325 else
326 proc_set_task_policy(task, THREAD_NULL,
327 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
328 info->role);
329 break;
6d2010ae 330
39236c6e
A
331 case TASK_GRAPHICS_SERVER:
332 /* TODO: Restrict this role to FCFS <rdar://problem/12552788> */
333 if (task != current_task() || task->sec_token.val[0] != 0)
334 result = KERN_INVALID_ARGUMENT;
335 else
336 proc_set_task_policy(task, THREAD_NULL,
337 TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE,
338 info->role);
339 break;
340 default:
0b4e3aa0 341 result = KERN_INVALID_ARGUMENT;
39236c6e
A
342 break;
343 } /* switch (info->role) */
316670eb 344
39236c6e
A
345 break;
346 }
316670eb 347
39236c6e
A
348/* Desired energy-efficiency/performance "quality-of-service" */
349 case TASK_BASE_QOS_POLICY:
fe8ab488 350 case TASK_OVERRIDE_QOS_POLICY:
39236c6e
A
351 {
352 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
353 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
354
355 if (kr != KERN_SUCCESS)
356 return kr;
357
1c79356b 358
fe8ab488
A
359 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
360 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
6d2010ae 361
fe8ab488
A
362 proc_set_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE,
363 flavor == TASK_BASE_QOS_POLICY ? TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS : TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS,
364 lqos, tqos);
1c79356b 365 }
fe8ab488 366 break;
39236c6e 367
fe8ab488 368 case TASK_BASE_LATENCY_QOS_POLICY:
39236c6e
A
369 {
370 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
371 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
372
373 if (kr != KERN_SUCCESS)
374 return kr;
375
fe8ab488 376 uint32_t lqos = qos_extract(qosinfo->task_latency_qos_tier);
39236c6e 377
fe8ab488
A
378 proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_LATENCY_QOS_POLICY, lqos);
379 }
380 break;
39236c6e 381
fe8ab488
A
382 case TASK_BASE_THROUGHPUT_QOS_POLICY:
383 {
384 task_qos_policy_t qosinfo = (task_qos_policy_t)policy_info;
385 kern_return_t kr = task_qos_policy_validate(qosinfo, count);
39236c6e 386
fe8ab488
A
387 if (kr != KERN_SUCCESS)
388 return kr;
39236c6e 389
fe8ab488 390 uint32_t tqos = qos_extract(qosinfo->task_throughput_qos_tier);
39236c6e 391
fe8ab488 392 proc_set_task_policy(task, NULL, TASK_POLICY_ATTRIBUTE, TASK_BASE_THROUGHPUT_QOS_POLICY, tqos);
0b4e3aa0 393 }
fe8ab488 394 break;
0b4e3aa0 395
39236c6e
A
396 case TASK_SUPPRESSION_POLICY:
397 {
0b4e3aa0 398
39236c6e 399 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
0b4e3aa0 400
39236c6e
A
401 if (count < TASK_SUPPRESSION_POLICY_COUNT)
402 return (KERN_INVALID_ARGUMENT);
0b4e3aa0 403
39236c6e 404 struct task_qos_policy qosinfo;
0b4e3aa0 405
39236c6e
A
406 qosinfo.task_latency_qos_tier = info->timer_throttle;
407 qosinfo.task_throughput_qos_tier = info->throughput_qos;
0b4e3aa0 408
39236c6e
A
409 kern_return_t kr = task_qos_policy_validate(&qosinfo, TASK_QOS_POLICY_COUNT);
410
411 if (kr != KERN_SUCCESS)
412 return kr;
413
fe8ab488
A
414 /* TEMPORARY disablement of task suppression */
415 if (task_policy_suppression_disable && info->active)
416 return KERN_SUCCESS;
417
418 struct task_pend_token pend_token = {};
419
39236c6e
A
420 task_lock(task);
421
422 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
423 (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_START,
3e170ce0 424 proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL),
fe8ab488 425 trequested_1(task, THREAD_NULL), 0);
39236c6e
A
426
427 task->requested_policy.t_sup_active = (info->active) ? 1 : 0;
428 task->requested_policy.t_sup_lowpri_cpu = (info->lowpri_cpu) ? 1 : 0;
fe8ab488 429 task->requested_policy.t_sup_timer = qos_extract(info->timer_throttle);
39236c6e
A
430 task->requested_policy.t_sup_disk = (info->disk_throttle) ? 1 : 0;
431 task->requested_policy.t_sup_cpu_limit = (info->cpu_limit) ? 1 : 0;
432 task->requested_policy.t_sup_suspend = (info->suspend) ? 1 : 0;
fe8ab488 433 task->requested_policy.t_sup_throughput = qos_extract(info->throughput_qos);
39236c6e 434 task->requested_policy.t_sup_cpu = (info->suppressed_cpu) ? 1 : 0;
fe8ab488 435 task->requested_policy.t_sup_bg_sockets = (info->background_sockets) ? 1 : 0;
39236c6e 436
fe8ab488 437 task_policy_update_locked(task, THREAD_NULL, &pend_token);
39236c6e
A
438
439 task_unlock(task);
440
fe8ab488 441 task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token);
39236c6e
A
442
443 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
444 (IMPORTANCE_CODE(IMP_TASK_SUPPRESSION, info->active)) | DBG_FUNC_END,
3e170ce0 445 proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL),
fe8ab488 446 trequested_1(task, THREAD_NULL), 0);
39236c6e
A
447
448 break;
0b4e3aa0 449
39236c6e 450 }
0b4e3aa0 451
39236c6e
A
452 default:
453 result = KERN_INVALID_ARGUMENT;
454 break;
0b4e3aa0 455 }
39236c6e
A
456
457 return (result);
0b4e3aa0
A
458}
459
39236c6e 460/* Sets BSD 'nice' value on the task */
0b4e3aa0
A
461kern_return_t
462task_importance(
463 task_t task,
464 integer_t importance)
465{
466 if (task == TASK_NULL || task == kernel_task)
467 return (KERN_INVALID_ARGUMENT);
468
469 task_lock(task);
470
471 if (!task->active) {
472 task_unlock(task);
473
474 return (KERN_TERMINATED);
475 }
476
39236c6e 477 if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) >= TASK_CONTROL_APPLICATION) {
0b4e3aa0
A
478 task_unlock(task);
479
1c79356b
A
480 return (KERN_INVALID_ARGUMENT);
481 }
482
39236c6e
A
483 task->importance = importance;
484
485 /* TODO: tracepoint? */
486
487 /* Redrive only the task priority calculation */
fe8ab488 488 task_policy_update_task_locked(task, FALSE, FALSE, FALSE);
0b4e3aa0
A
489
490 task_unlock(task);
491
1c79356b
A
492 return (KERN_SUCCESS);
493}
39236c6e 494
1c79356b
A
495kern_return_t
496task_policy_get(
497 task_t task,
498 task_policy_flavor_t flavor,
499 task_policy_t policy_info,
500 mach_msg_type_number_t *count,
501 boolean_t *get_default)
502{
0b4e3aa0 503 if (task == TASK_NULL || task == kernel_task)
1c79356b
A
504 return (KERN_INVALID_ARGUMENT);
505
506 switch (flavor) {
507
508 case TASK_CATEGORY_POLICY:
509 {
510 task_category_policy_t info = (task_category_policy_t)policy_info;
511
512 if (*count < TASK_CATEGORY_POLICY_COUNT)
513 return (KERN_INVALID_ARGUMENT);
514
0b4e3aa0
A
515 if (*get_default)
516 info->role = TASK_UNSPECIFIED;
39236c6e
A
517 else
518 info->role = proc_get_task_policy(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_ROLE);
519 break;
520 }
521
522 case TASK_BASE_QOS_POLICY: /* FALLTHRU */
523 case TASK_OVERRIDE_QOS_POLICY:
524 {
525 task_qos_policy_t info = (task_qos_policy_t)policy_info;
526
527 if (*count < TASK_QOS_POLICY_COUNT)
528 return (KERN_INVALID_ARGUMENT);
529
530 if (*get_default) {
531 info->task_latency_qos_tier = LATENCY_QOS_TIER_UNSPECIFIED;
532 info->task_throughput_qos_tier = THROUGHPUT_QOS_TIER_UNSPECIFIED;
533 } else if (flavor == TASK_BASE_QOS_POLICY) {
fe8ab488 534 int value1, value2;
39236c6e 535
fe8ab488
A
536 proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
537
538 info->task_latency_qos_tier = qos_latency_policy_package(value1);
539 info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
39236c6e 540
39236c6e 541 } else if (flavor == TASK_OVERRIDE_QOS_POLICY) {
fe8ab488 542 int value1, value2;
39236c6e 543
fe8ab488 544 proc_get_task_policy2(task, THREAD_NULL, TASK_POLICY_ATTRIBUTE, TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS, &value1, &value2);
39236c6e 545
fe8ab488
A
546 info->task_latency_qos_tier = qos_latency_policy_package(value1);
547 info->task_throughput_qos_tier = qos_throughput_policy_package(value2);
0b4e3aa0 548 }
39236c6e 549
1c79356b
A
550 break;
551 }
552
39236c6e
A
553 case TASK_POLICY_STATE:
554 {
555 task_policy_state_t info = (task_policy_state_t)policy_info;
1c79356b 556
39236c6e
A
557 if (*count < TASK_POLICY_STATE_COUNT)
558 return (KERN_INVALID_ARGUMENT);
6d2010ae 559
39236c6e
A
560 /* Only root can get this info */
561 if (current_task()->sec_token.val[0] != 0)
562 return KERN_PROTECTION_FAILURE;
6d2010ae 563
39236c6e
A
564 if (*get_default) {
565 info->requested = 0;
566 info->effective = 0;
567 info->pending = 0;
568 info->imp_assertcnt = 0;
569 info->imp_externcnt = 0;
570 info->flags = 0;
fe8ab488 571 info->imp_transitions = 0;
39236c6e 572 } else {
fe8ab488
A
573 task_lock(task);
574
39236c6e
A
575 info->requested = task_requested_bitfield(task, THREAD_NULL);
576 info->effective = task_effective_bitfield(task, THREAD_NULL);
fe8ab488 577 info->pending = 0;
39236c6e
A
578
579 info->flags = 0;
fe8ab488
A
580 if (task->task_imp_base != NULL) {
581 info->imp_assertcnt = task->task_imp_base->iit_assertcnt;
582 info->imp_externcnt = IIT_EXTERN(task->task_imp_base);
583 info->flags |= (task_is_marked_importance_receiver(task) ? TASK_IMP_RECEIVER : 0);
584 info->flags |= (task_is_marked_importance_denap_receiver(task) ? TASK_DENAP_RECEIVER : 0);
585 info->flags |= (task_is_marked_importance_donor(task) ? TASK_IMP_DONOR : 0);
586 info->flags |= (task_is_marked_live_importance_donor(task) ? TASK_IMP_LIVE_DONOR : 0);
587 info->imp_transitions = task->task_imp_base->iit_transitions;
588 } else {
589 info->imp_assertcnt = 0;
590 info->imp_externcnt = 0;
591 info->imp_transitions = 0;
592 }
593 task_unlock(task);
39236c6e 594 }
6d2010ae 595
fe8ab488
A
596 info->reserved[0] = 0;
597 info->reserved[1] = 0;
6d2010ae 598
39236c6e
A
599 break;
600 }
6d2010ae 601
39236c6e
A
602 case TASK_SUPPRESSION_POLICY:
603 {
604 task_suppression_policy_t info = (task_suppression_policy_t)policy_info;
6d2010ae 605
39236c6e
A
606 if (*count < TASK_SUPPRESSION_POLICY_COUNT)
607 return (KERN_INVALID_ARGUMENT);
6d2010ae 608
6d2010ae 609 task_lock(task);
39236c6e
A
610
611 if (*get_default) {
612 info->active = 0;
613 info->lowpri_cpu = 0;
614 info->timer_throttle = LATENCY_QOS_TIER_UNSPECIFIED;
615 info->disk_throttle = 0;
616 info->cpu_limit = 0;
617 info->suspend = 0;
618 info->throughput_qos = 0;
619 info->suppressed_cpu = 0;
620 } else {
621 info->active = task->requested_policy.t_sup_active;
622 info->lowpri_cpu = task->requested_policy.t_sup_lowpri_cpu;
fe8ab488 623 info->timer_throttle = qos_latency_policy_package(task->requested_policy.t_sup_timer);
39236c6e
A
624 info->disk_throttle = task->requested_policy.t_sup_disk;
625 info->cpu_limit = task->requested_policy.t_sup_cpu_limit;
626 info->suspend = task->requested_policy.t_sup_suspend;
fe8ab488 627 info->throughput_qos = qos_throughput_policy_package(task->requested_policy.t_sup_throughput);
39236c6e 628 info->suppressed_cpu = task->requested_policy.t_sup_cpu;
fe8ab488 629 info->background_sockets = task->requested_policy.t_sup_bg_sockets;
39236c6e
A
630 }
631
6d2010ae 632 task_unlock(task);
39236c6e
A
633 break;
634 }
635
636 default:
637 return (KERN_INVALID_ARGUMENT);
6d2010ae
A
638 }
639
39236c6e 640 return (KERN_SUCCESS);
6d2010ae
A
641}
642
39236c6e
A
643/*
644 * Called at task creation
645 * We calculate the correct effective but don't apply it to anything yet.
646 * The threads, etc will inherit from the task as they get created.
647 */
648void
649task_policy_create(task_t task, int parent_boosted)
6d2010ae 650{
39236c6e
A
651 if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
652 if (parent_boosted) {
653 task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_INTERACTIVE;
654 task_importance_mark_donor(task, TRUE);
655 } else {
656 task->requested_policy.t_apptype = TASK_APPTYPE_DAEMON_BACKGROUND;
657 task_importance_mark_receiver(task, FALSE);
658 }
659 }
6d2010ae 660
39236c6e 661 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
fe8ab488 662 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_START,
3e170ce0 663 task_pid(task), teffective_0(task, THREAD_NULL),
fe8ab488 664 teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0);
39236c6e 665
fe8ab488 666 task_policy_update_internal_locked(task, THREAD_NULL, TRUE, NULL);
39236c6e
A
667
668 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
fe8ab488 669 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_TASK))) | DBG_FUNC_END,
3e170ce0 670 task_pid(task), teffective_0(task, THREAD_NULL),
fe8ab488
A
671 teffective_1(task, THREAD_NULL), tpriority(task, THREAD_NULL), 0);
672
673 task_importance_update_live_donor(task);
674 task_policy_update_task_locked(task, FALSE, FALSE, FALSE);
675}
676
677void
678thread_policy_create(thread_t thread)
679{
680 task_t task = thread->task;
681
682 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
683 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
684 targetid(task, thread), teffective_0(task, thread),
685 teffective_1(task, thread), tpriority(task, thread), 0);
686
687 task_policy_update_internal_locked(task, thread, TRUE, NULL);
688
689 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
690 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
691 targetid(task, thread), teffective_0(task, thread),
692 teffective_1(task, thread), tpriority(task, thread), 0);
6d2010ae
A
693}
694
39236c6e 695static void
fe8ab488 696task_policy_update_locked(task_t task, thread_t thread, task_pend_token_t pend_token)
6d2010ae 697{
39236c6e
A
698 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
699 (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread)) | DBG_FUNC_START),
fe8ab488
A
700 targetid(task, thread), teffective_0(task, thread),
701 teffective_1(task, thread), tpriority(task, thread), 0);
6d2010ae 702
fe8ab488 703 task_policy_update_internal_locked(task, thread, FALSE, pend_token);
6d2010ae 704
39236c6e 705 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
fe8ab488
A
706 (IMPORTANCE_CODE(IMP_UPDATE, tisthread(thread))) | DBG_FUNC_END,
707 targetid(task, thread), teffective_0(task, thread),
708 teffective_1(task, thread), tpriority(task, thread), 0);
39236c6e 709}
6d2010ae 710
39236c6e
A
711/*
712 * One state update function TO RULE THEM ALL
713 *
714 * This function updates the task or thread effective policy fields
715 * and pushes the results to the relevant subsystems.
716 *
717 * Must call update_complete after unlocking the task,
718 * as some subsystems cannot be updated while holding the task lock.
719 *
720 * Called with task locked, not thread
721 */
fe8ab488 722
39236c6e 723static void
fe8ab488 724task_policy_update_internal_locked(task_t task, thread_t thread, boolean_t in_create, task_pend_token_t pend_token)
6d2010ae 725{
39236c6e
A
726 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
727
728 /*
729 * Step 1:
730 * Gather requested policy
731 */
6d2010ae 732
39236c6e
A
733 struct task_requested_policy requested =
734 (on_task) ? task->requested_policy : thread->requested_policy;
735
fe8ab488 736
39236c6e
A
737 /*
738 * Step 2:
739 * Calculate new effective policies from requested policy and task state
740 * Rules:
741 * If in an 'on_task' block, must only look at and set fields starting with t_
742 * If operating on a task, don't touch anything starting with th_
743 * If operating on a thread, don't touch anything starting with t_
744 * Don't change requested, it won't take effect
745 */
6d2010ae 746
39236c6e 747 struct task_effective_policy next = {};
fe8ab488
A
748 struct task_effective_policy task_effective;
749
750 /* Calculate QoS policies */
751
752 if (on_task) {
753 /* Update task role */
754 next.t_role = requested.t_role;
755
756 /* Set task qos clamp and ceiling */
757 next.t_qos_clamp = requested.t_qos_clamp;
758
759 if (requested.t_apptype == TASK_APPTYPE_APP_DEFAULT ||
760 requested.t_apptype == TASK_APPTYPE_APP_TAL) {
761
762 switch (next.t_role) {
763 case TASK_FOREGROUND_APPLICATION:
764 /* Foreground apps get urgent scheduler priority */
765 next.qos_ui_is_urgent = 1;
766 next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
767 break;
768
769 case TASK_BACKGROUND_APPLICATION:
770 /* This is really 'non-focal but on-screen' */
771 next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
772 break;
773
a1c7dba1
A
774 case TASK_DEFAULT_APPLICATION:
775 /* This is 'may render UI but we don't know if it's focal/nonfocal' */
776 next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
3e170ce0 777 break;
a1c7dba1 778
fe8ab488
A
779 case TASK_NONUI_APPLICATION:
780 /* i.e. 'off-screen' */
781 next.t_qos_ceiling = THREAD_QOS_LEGACY;
782 break;
783
784 case TASK_CONTROL_APPLICATION:
785 case TASK_GRAPHICS_SERVER:
786 next.qos_ui_is_urgent = 1;
787 next.t_qos_ceiling = THREAD_QOS_UNSPECIFIED;
788 break;
789
3e170ce0
A
790 case TASK_THROTTLE_APPLICATION:
791 /* i.e. 'TAL launch' */
792 next.t_qos_ceiling = THREAD_QOS_UTILITY;
793 break;
794
fe8ab488
A
795 case TASK_UNSPECIFIED:
796 default:
797 /* Apps that don't have an application role get
798 * USER_INTERACTIVE and USER_INITIATED squashed to LEGACY */
799 next.t_qos_ceiling = THREAD_QOS_LEGACY;
800 break;
801 }
802 } else {
803 /* Daemons get USER_INTERACTIVE squashed to USER_INITIATED */
804 next.t_qos_ceiling = THREAD_QOS_USER_INITIATED;
805 }
806 } else {
807 /*
808 * Set thread qos tier
809 * Note that an override only overrides the QoS field, not other policy settings.
810 * A thread must already be participating in QoS for override to take effect
811 */
812
813 /* Snapshot the task's effective policy */
814 task_effective = task->effective_policy;
815
816 next.qos_ui_is_urgent = task_effective.qos_ui_is_urgent;
817
818 if ((requested.thrp_qos_override != THREAD_QOS_UNSPECIFIED) && (requested.thrp_qos != THREAD_QOS_UNSPECIFIED))
819 next.thep_qos = MAX(requested.thrp_qos_override, requested.thrp_qos);
820 else
821 next.thep_qos = requested.thrp_qos;
822
823 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
824 if (task_effective.t_qos_clamp != THREAD_QOS_UNSPECIFIED) {
825 if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
826 next.thep_qos = MIN(task_effective.t_qos_clamp, next.thep_qos);
827 else
828 next.thep_qos = task_effective.t_qos_clamp;
829 }
830
831 /* The ceiling only applies to threads that are in the QoS world */
832 if (task_effective.t_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
833 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
834 next.thep_qos = MIN(task_effective.t_qos_ceiling, next.thep_qos);
835 }
836
837 /*
838 * The QoS relative priority is only applicable when the original programmer's
839 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
840 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
841 * since otherwise it would be lower than unclamped threads. Similarly, in the
842 * presence of boosting, the programmer doesn't know what other actors
843 * are boosting the thread.
844 */
845 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
846 (requested.thrp_qos == next.thep_qos) &&
847 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
848 next.thep_qos_relprio = requested.thrp_qos_relprio;
849 } else {
850 next.thep_qos_relprio = 0;
851 }
852 }
6d2010ae 853
39236c6e
A
854 /* Calculate DARWIN_BG */
855 boolean_t wants_darwinbg = FALSE;
856 boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */
857 boolean_t wants_watchersbg = FALSE; /* Do I want my pidbound threads to be bg */
fe8ab488 858
39236c6e
A
859 /*
860 * If DARWIN_BG has been requested at either level, it's engaged.
861 * Only true DARWIN_BG changes cause watchers to transition.
fe8ab488
A
862 *
863 * Backgrounding due to apptype does.
39236c6e
A
864 */
865 if (requested.int_darwinbg || requested.ext_darwinbg)
866 wants_watchersbg = wants_all_sockets_bg = wants_darwinbg = TRUE;
867
868 if (on_task) {
869 /* Background TAL apps are throttled when TAL is enabled */
870 if (requested.t_apptype == TASK_APPTYPE_APP_TAL &&
871 requested.t_role == TASK_BACKGROUND_APPLICATION &&
872 requested.t_tal_enabled == 1) {
3e170ce0
A
873 next.t_tal_engaged = 1;
874 }
875
876 if ((requested.t_apptype == TASK_APPTYPE_APP_DEFAULT ||
877 requested.t_apptype == TASK_APPTYPE_APP_TAL) &&
878 requested.t_role == TASK_THROTTLE_APPLICATION) {
39236c6e
A
879 next.t_tal_engaged = 1;
880 }
6d2010ae 881
39236c6e
A
882 /* Adaptive daemons are DARWIN_BG unless boosted, and don't get network throttled. */
883 if (requested.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE &&
884 requested.t_boosted == 0)
885 wants_darwinbg = TRUE;
886
887 /* Background daemons are always DARWIN_BG, no exceptions, and don't get network throttled. */
888 if (requested.t_apptype == TASK_APPTYPE_DAEMON_BACKGROUND)
889 wants_darwinbg = TRUE;
fe8ab488
A
890
891 if (next.t_qos_clamp == THREAD_QOS_BACKGROUND || next.t_qos_clamp == THREAD_QOS_MAINTENANCE)
892 wants_darwinbg = TRUE;
6d2010ae 893 } else {
39236c6e
A
894 if (requested.th_pidbind_bg)
895 wants_all_sockets_bg = wants_darwinbg = TRUE;
6d2010ae 896
39236c6e
A
897 if (requested.th_workq_bg)
898 wants_darwinbg = TRUE;
fe8ab488
A
899
900 if (next.thep_qos == THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_MAINTENANCE)
901 wants_darwinbg = TRUE;
39236c6e 902 }
6d2010ae 903
39236c6e 904 /* Calculate side effects of DARWIN_BG */
6d2010ae 905
39236c6e
A
906 if (wants_darwinbg) {
907 next.darwinbg = 1;
908 /* darwinbg threads/tasks always create bg sockets, but we don't always loop over all sockets */
909 next.new_sockets_bg = 1;
910 next.lowpri_cpu = 1;
6d2010ae
A
911 }
912
39236c6e
A
913 if (wants_all_sockets_bg)
914 next.all_sockets_bg = 1;
6d2010ae 915
39236c6e
A
916 if (on_task && wants_watchersbg)
917 next.t_watchers_bg = 1;
6d2010ae 918
fe8ab488
A
919 /* darwinbg on either task or thread implies background QOS (or lower) */
920 if (!on_task &&
921 (wants_darwinbg || task_effective.darwinbg) &&
922 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)){
923 next.thep_qos = THREAD_QOS_BACKGROUND;
924 next.thep_qos_relprio = 0;
925 }
926
39236c6e 927 /* Calculate low CPU priority */
6d2010ae 928
39236c6e 929 boolean_t wants_lowpri_cpu = FALSE;
316670eb 930
3e170ce0
A
931 if (wants_darwinbg)
932 wants_lowpri_cpu = TRUE;
933
934 if (next.t_tal_engaged)
39236c6e
A
935 wants_lowpri_cpu = TRUE;
936
937 if (on_task && requested.t_sup_lowpri_cpu && requested.t_boosted == 0)
938 wants_lowpri_cpu = TRUE;
939
940 if (wants_lowpri_cpu)
941 next.lowpri_cpu = 1;
942
943 /* Calculate IO policy */
944
945 /* Update BG IO policy (so we can see if it has changed) */
946 next.bg_iotier = requested.bg_iotier;
947
948 int iopol = THROTTLE_LEVEL_TIER0;
949
950 if (wants_darwinbg)
951 iopol = MAX(iopol, requested.bg_iotier);
952
953 if (on_task) {
954 if (requested.t_apptype == TASK_APPTYPE_DAEMON_STANDARD)
955 iopol = MAX(iopol, proc_standard_daemon_tier);
956
957 if (requested.t_sup_disk && requested.t_boosted == 0)
958 iopol = MAX(iopol, proc_suppressed_disk_tier);
959
3e170ce0 960 if (next.t_tal_engaged)
39236c6e 961 iopol = MAX(iopol, proc_tal_disk_tier);
fe8ab488
A
962
963 if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
964 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.t_qos_clamp]);
965
966 } else {
967 /* Look up the associated IO tier value for the QoS class */
968 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
6d2010ae 969 }
6d2010ae 970
39236c6e
A
971 iopol = MAX(iopol, requested.int_iotier);
972 iopol = MAX(iopol, requested.ext_iotier);
6d2010ae 973
39236c6e 974 next.io_tier = iopol;
6d2010ae 975
39236c6e
A
976 /* Calculate Passive IO policy */
977
978 if (requested.ext_iopassive || requested.int_iopassive)
979 next.io_passive = 1;
980
981 /* Calculate miscellaneous policy */
982
983 if (on_task) {
39236c6e
A
984 /* Calculate suppression-active flag */
985 if (requested.t_sup_active && requested.t_boosted == 0)
986 next.t_sup_active = 1;
987
988 /* Calculate suspend policy */
989 if (requested.t_sup_suspend && requested.t_boosted == 0)
990 next.t_suspended = 1;
991
39236c6e
A
992 /* Calculate timer QOS */
993 int latency_qos = requested.t_base_latency_qos;
994
995 if (requested.t_sup_timer && requested.t_boosted == 0)
996 latency_qos = requested.t_sup_timer;
997
fe8ab488
A
998 if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
999 latency_qos = MAX(latency_qos, (int)thread_qos_policy_params.qos_latency_qos[next.t_qos_clamp]);
1000
39236c6e
A
1001 if (requested.t_over_latency_qos != 0)
1002 latency_qos = requested.t_over_latency_qos;
1003
1004 /* Treat the windowserver special */
1005 if (requested.t_role == TASK_GRAPHICS_SERVER)
1006 latency_qos = proc_graphics_timer_qos;
1007
1008 next.t_latency_qos = latency_qos;
1009
1010 /* Calculate throughput QOS */
1011 int through_qos = requested.t_base_through_qos;
1012
1013 if (requested.t_sup_throughput && requested.t_boosted == 0)
1014 through_qos = requested.t_sup_throughput;
1015
fe8ab488
A
1016 if (next.t_qos_clamp != THREAD_QOS_UNSPECIFIED)
1017 through_qos = MAX(through_qos, (int)thread_qos_policy_params.qos_through_qos[next.t_qos_clamp]);
1018
39236c6e
A
1019 if (requested.t_over_through_qos != 0)
1020 through_qos = requested.t_over_through_qos;
1021
1022 next.t_through_qos = through_qos;
1023
1024 /* Calculate suppressed CPU priority */
1025 if (requested.t_sup_cpu && requested.t_boosted == 0)
1026 next.t_suppressed_cpu = 1;
fe8ab488
A
1027
1028 /*
1029 * Calculate background sockets
1030 * Don't take into account boosting to limit transition frequency.
1031 */
1032 if (requested.t_sup_bg_sockets){
1033 next.all_sockets_bg = 1;
1034 next.new_sockets_bg = 1;
1035 }
1036
1037 /* Apply SFI Managed class bit */
1038 next.t_sfi_managed = requested.t_sfi_managed;
1039
1040 /* Calculate 'live donor' status for live importance */
1041 switch (requested.t_apptype) {
1042 case TASK_APPTYPE_APP_TAL:
1043 case TASK_APPTYPE_APP_DEFAULT:
1044 if (requested.ext_darwinbg == 0)
1045 next.t_live_donor = 1;
1046 else
1047 next.t_live_donor = 0;
1048 break;
1049
1050 case TASK_APPTYPE_DAEMON_INTERACTIVE:
1051 case TASK_APPTYPE_DAEMON_STANDARD:
1052 case TASK_APPTYPE_DAEMON_ADAPTIVE:
1053 case TASK_APPTYPE_DAEMON_BACKGROUND:
1054 default:
1055 next.t_live_donor = 0;
1056 break;
1057 }
6d2010ae
A
1058 }
1059
39236c6e
A
1060 if (requested.terminated) {
1061 /*
1062 * Shoot down the throttles that slow down exit or response to SIGTERM
1063 * We don't need to shoot down:
1064 * passive (don't want to cause others to throttle)
1065 * all_sockets_bg (don't need to iterate FDs on every exit)
1066 * new_sockets_bg (doesn't matter for exiting process)
39236c6e
A
1067 * pidsuspend (jetsam-ed BG process shouldn't run again)
1068 * watchers_bg (watcher threads don't need to be unthrottled)
1069 * t_latency_qos (affects userspace timers only)
1070 */
6d2010ae 1071
39236c6e
A
1072 next.terminated = 1;
1073 next.darwinbg = 0;
1074 next.lowpri_cpu = 0;
1075 next.io_tier = THROTTLE_LEVEL_TIER0;
1076 if (on_task) {
1077 next.t_tal_engaged = 0;
1078 next.t_role = TASK_UNSPECIFIED;
1079 next.t_suppressed_cpu = 0;
1080
1081 /* TODO: This should only be shot down on SIGTERM, not exit */
1082 next.t_suspended = 0;
fe8ab488 1083 } else {
3e170ce0 1084 next.thep_qos = THREAD_QOS_UNSPECIFIED;
39236c6e
A
1085 }
1086 }
6d2010ae 1087
39236c6e
A
1088 /*
1089 * Step 3:
1090 * Swap out old policy for new policy
1091 */
6d2010ae 1092
fe8ab488
A
1093 if (!on_task) {
1094 /* Acquire thread mutex to synchronize against
1095 * thread_policy_set(). Consider reworking to separate qos
1096 * fields, or locking the task in thread_policy_set.
1097 * A more efficient model would be to make the thread bits
1098 * authoritative.
1099 */
1100 thread_mtx_lock(thread);
1101 }
1102
39236c6e
A
1103 struct task_effective_policy prev =
1104 (on_task) ? task->effective_policy : thread->effective_policy;
1105
1106 /*
1107 * Check for invalid transitions here for easier debugging
1108 * TODO: dump the structs as hex in the panic string
1109 */
1110 if (task == kernel_task && prev.all_sockets_bg != next.all_sockets_bg)
1111 panic("unexpected network change for kernel task");
1112
1113 /* This is the point where the new values become visible to other threads */
1114 if (on_task)
1115 task->effective_policy = next;
fe8ab488
A
1116 else {
1117 /* Preserve thread specific latency/throughput QoS modified via
1118 * thread_policy_set(). Inelegant in the extreme, to be reworked.
1119 *
1120 * If thread QoS class is set, we don't need to preserve the previously set values.
1121 * We should ensure to not accidentally preserve previous thread QoS values if you set a thread
1122 * back to default QoS.
1123 */
1124 uint32_t lqos = thread->effective_policy.t_latency_qos, tqos = thread->effective_policy.t_through_qos;
1125
1126 if (prev.thep_qos == THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) {
1127 next.t_latency_qos = lqos;
1128 next.t_through_qos = tqos;
1129 } else if (prev.thep_qos != THREAD_QOS_UNSPECIFIED && next.thep_qos == THREAD_QOS_UNSPECIFIED) {
1130 next.t_latency_qos = 0;
1131 next.t_through_qos = 0;
1132 } else {
1133 next.t_latency_qos = thread_qos_policy_params.qos_latency_qos[next.thep_qos];
1134 next.t_through_qos = thread_qos_policy_params.qos_through_qos[next.thep_qos];
1135 }
1136
1137 thread_update_qos_cpu_time(thread, TRUE);
39236c6e 1138 thread->effective_policy = next;
fe8ab488
A
1139 thread_mtx_unlock(thread);
1140 }
39236c6e
A
1141
1142 /* Don't do anything further to a half-formed task or thread */
1143 if (in_create)
1144 return;
1145
1146 /*
1147 * Step 4:
1148 * Pend updates that can't be done while holding the task lock
39236c6e
A
1149 */
1150
39236c6e 1151 if (prev.all_sockets_bg != next.all_sockets_bg)
fe8ab488 1152 pend_token->tpt_update_sockets = 1;
39236c6e
A
1153
1154 if (on_task) {
1155 /* Only re-scan the timer list if the qos level is getting less strong */
1156 if (prev.t_latency_qos > next.t_latency_qos)
fe8ab488 1157 pend_token->tpt_update_timers = 1;
6d2010ae 1158
6d2010ae 1159
fe8ab488
A
1160 if (prev.t_live_donor != next.t_live_donor)
1161 pend_token->tpt_update_live_donor = 1;
1162 }
39236c6e
A
1163
1164 /*
1165 * Step 5:
1166 * Update other subsystems as necessary if something has changed
1167 */
1168
1169 boolean_t update_throttle = (prev.io_tier != next.io_tier) ? TRUE : FALSE;
1170
1171 if (on_task) {
1172 if (prev.t_suspended == 0 && next.t_suspended == 1 && task->active) {
1173 task_hold_locked(task);
1174 task_wait_locked(task, FALSE);
1175 }
1176 if (prev.t_suspended == 1 && next.t_suspended == 0 && task->active) {
1177 task_release_locked(task);
1178 }
1179
1180 boolean_t update_threads = FALSE;
fe8ab488
A
1181 boolean_t update_sfi = FALSE;
1182
1183 if (prev.bg_iotier != next.bg_iotier ||
1184 prev.terminated != next.terminated ||
1185 prev.t_qos_clamp != next.t_qos_clamp ||
1186 prev.t_qos_ceiling != next.t_qos_ceiling ||
1187 prev.qos_ui_is_urgent != next.qos_ui_is_urgent ||
1188 prev.darwinbg != next.darwinbg)
39236c6e
A
1189 update_threads = TRUE;
1190
fe8ab488
A
1191 /*
1192 * A bit of a layering violation. We know what task policy attributes
1193 * sfi_thread_classify() consults, so if they change, trigger SFI
1194 * re-evaluation.
1195 */
1196 if ((prev.t_latency_qos != next.t_latency_qos) ||
1197 (prev.t_role != next.t_role) ||
1198 (prev.darwinbg != next.darwinbg) ||
1199 (prev.t_sfi_managed != next.t_sfi_managed))
1200 update_sfi = TRUE;
39236c6e 1201
3e170ce0 1202#if CONFIG_SCHED_SFI
a1c7dba1
A
1203 if (prev.t_role != next.t_role && task_policy_update_coalition_focal_tasks(task, prev.t_role, next.t_role)) {
1204 update_sfi = TRUE;
1205 pend_token->tpt_update_coal_sfi = 1;
1206 }
3e170ce0 1207#endif /* !CONFIG_SCHED_SFI */
a1c7dba1 1208
fe8ab488 1209 task_policy_update_task_locked(task, update_throttle, update_threads, update_sfi);
39236c6e
A
1210 } else {
1211 int update_cpu = 0;
fe8ab488
A
1212 boolean_t update_sfi = FALSE;
1213 boolean_t update_qos = FALSE;
39236c6e
A
1214
1215 if (prev.lowpri_cpu != next.lowpri_cpu)
1216 update_cpu = (next.lowpri_cpu ? DO_LOWPRI_CPU : UNDO_LOWPRI_CPU);
1217
fe8ab488
A
1218 if (prev.darwinbg != next.darwinbg ||
1219 prev.thep_qos != next.thep_qos)
1220 update_sfi = TRUE;
1221
1222 if (prev.thep_qos != next.thep_qos ||
1223 prev.thep_qos_relprio != next.thep_qos_relprio ||
3e170ce0
A
1224 prev.qos_ui_is_urgent != next.qos_ui_is_urgent ||
1225 prev.terminated != next.terminated) {
fe8ab488
A
1226 update_qos = TRUE;
1227 }
1228
1229 task_policy_update_thread_locked(thread, update_cpu, update_throttle, update_sfi, update_qos);
39236c6e 1230 }
6d2010ae
A
1231}
1232
3e170ce0
A
1233
1234#if CONFIG_SCHED_SFI
a1c7dba1
A
1235/*
1236 * Yet another layering violation. We reach out and bang on the coalition directly.
1237 */
1238static boolean_t
1239task_policy_update_coalition_focal_tasks(task_t task,
1240 int prev_role,
1241 int next_role)
1242{
1243 boolean_t sfi_transition = FALSE;
1244
3e170ce0 1245 /* task moving into/out-of the foreground */
a1c7dba1 1246 if (prev_role != TASK_FOREGROUND_APPLICATION && next_role == TASK_FOREGROUND_APPLICATION) {
3e170ce0 1247 if (task_coalition_adjust_focal_count(task, 1) == 1)
a1c7dba1
A
1248 sfi_transition = TRUE;
1249 } else if (prev_role == TASK_FOREGROUND_APPLICATION && next_role != TASK_FOREGROUND_APPLICATION) {
3e170ce0 1250 if (task_coalition_adjust_focal_count(task, -1) == 0)
a1c7dba1
A
1251 sfi_transition = TRUE;
1252 }
1253
3e170ce0 1254 /* task moving into/out-of background */
a1c7dba1 1255 if (prev_role != TASK_BACKGROUND_APPLICATION && next_role == TASK_BACKGROUND_APPLICATION) {
3e170ce0 1256 if (task_coalition_adjust_nonfocal_count(task, 1) == 1)
a1c7dba1
A
1257 sfi_transition = TRUE;
1258 } else if (prev_role == TASK_BACKGROUND_APPLICATION && next_role != TASK_BACKGROUND_APPLICATION) {
3e170ce0 1259 if (task_coalition_adjust_nonfocal_count(task, -1) == 0)
a1c7dba1
A
1260 sfi_transition = TRUE;
1261 }
1262
1263 return sfi_transition;
1264}
3e170ce0 1265#endif /* CONFIG_SCHED_SFI */
a1c7dba1 1266
39236c6e 1267/* Despite the name, the thread's task is locked, the thread is not */
fe8ab488 1268void
39236c6e
A
1269task_policy_update_thread_locked(thread_t thread,
1270 int update_cpu,
fe8ab488
A
1271 boolean_t update_throttle,
1272 boolean_t update_sfi,
1273 boolean_t update_qos)
6d2010ae 1274{
39236c6e 1275 thread_precedence_policy_data_t policy;
6d2010ae 1276
39236c6e
A
1277 if (update_throttle) {
1278 rethrottle_thread(thread->uthread);
1279 }
316670eb 1280
fe8ab488
A
1281 if (update_sfi) {
1282 sfi_reevaluate(thread);
1283 }
1284
39236c6e
A
1285 /*
1286 * TODO: pidbind needs to stuff remembered importance into saved_importance
1287 * properly deal with bg'ed threads being pidbound and unbging while pidbound
1288 *
1289 * TODO: A BG thread's priority is 0 on desktop and 4 on embedded. Need to reconcile this.
1290 * */
1291 if (update_cpu == DO_LOWPRI_CPU) {
1292 thread->saved_importance = thread->importance;
1293 policy.importance = INT_MIN;
1294 } else if (update_cpu == UNDO_LOWPRI_CPU) {
1295 policy.importance = thread->saved_importance;
1296 thread->saved_importance = 0;
6d2010ae 1297 }
6d2010ae 1298
39236c6e
A
1299 /* Takes thread lock and thread mtx lock */
1300 if (update_cpu)
1301 thread_policy_set_internal(thread, THREAD_PRECEDENCE_POLICY,
1302 (thread_policy_t)&policy,
1303 THREAD_PRECEDENCE_POLICY_COUNT);
fe8ab488
A
1304
1305 if (update_qos)
1306 thread_recompute_qos(thread);
6d2010ae
A
1307}
1308
39236c6e
A
1309/*
1310 * Calculate priority on a task, loop through its threads, and tell them about
1311 * priority changes and throttle changes.
1312 */
fe8ab488 1313void
39236c6e
A
1314task_policy_update_task_locked(task_t task,
1315 boolean_t update_throttle,
fe8ab488
A
1316 boolean_t update_threads,
1317 boolean_t update_sfi)
6d2010ae 1318{
39236c6e 1319 boolean_t update_priority = FALSE;
6d2010ae 1320
39236c6e
A
1321 if (task == kernel_task)
1322 panic("Attempting to set task policy on kernel_task");
6d2010ae 1323
39236c6e
A
1324 int priority = BASEPRI_DEFAULT;
1325 int max_priority = MAXPRI_USER;
6d2010ae 1326
39236c6e
A
1327 if (proc_get_effective_task_policy(task, TASK_POLICY_LOWPRI_CPU)) {
1328 priority = MAXPRI_THROTTLE;
1329 max_priority = MAXPRI_THROTTLE;
1330 } else if (proc_get_effective_task_policy(task, TASK_POLICY_SUPPRESSED_CPU)) {
1331 priority = MAXPRI_SUPPRESSED;
1332 max_priority = MAXPRI_SUPPRESSED;
6d2010ae 1333 } else {
39236c6e 1334 switch (proc_get_effective_task_policy(task, TASK_POLICY_ROLE)) {
39236c6e
A
1335 case TASK_CONTROL_APPLICATION:
1336 priority = BASEPRI_CONTROL;
1337 break;
1338 case TASK_GRAPHICS_SERVER:
1339 priority = BASEPRI_GRAPHICS;
1340 max_priority = MAXPRI_RESERVED;
1341 break;
1342 default:
1343 break;
1344 }
1345
1346 /* factor in 'nice' value */
1347 priority += task->importance;
fe8ab488
A
1348
1349 if (task->effective_policy.t_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1350 int qos_clamp_priority = thread_qos_policy_params.qos_pri[task->effective_policy.t_qos_clamp];
1351
1352 priority = MIN(priority, qos_clamp_priority);
1353 max_priority = MIN(max_priority, qos_clamp_priority);
1354 }
6d2010ae
A
1355 }
1356
39236c6e
A
1357 /* avoid extra work if priority isn't changing */
1358 if (task->priority != priority || task->max_priority != max_priority) {
1359 update_priority = TRUE;
6d2010ae 1360
39236c6e
A
1361 /* update the scheduling priority for the task */
1362 task->max_priority = max_priority;
6d2010ae 1363
39236c6e
A
1364 if (priority > task->max_priority)
1365 priority = task->max_priority;
1366 else if (priority < MINPRI)
1367 priority = MINPRI;
316670eb 1368
39236c6e 1369 task->priority = priority;
6d2010ae 1370 }
6d2010ae 1371
39236c6e 1372 /* Loop over the threads in the task only once, and only if necessary */
fe8ab488 1373 if (update_threads || update_throttle || update_priority || update_sfi ) {
39236c6e 1374 thread_t thread;
6d2010ae 1375
39236c6e
A
1376 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1377 if (update_priority) {
1378 thread_mtx_lock(thread);
6d2010ae 1379
fe8ab488 1380 thread_task_priority(thread, priority, max_priority);
6d2010ae 1381
39236c6e
A
1382 thread_mtx_unlock(thread);
1383 }
6d2010ae 1384
39236c6e
A
1385 if (update_throttle) {
1386 rethrottle_thread(thread->uthread);
1387 }
1388
fe8ab488
A
1389 if (update_sfi) {
1390 sfi_reevaluate(thread);
1391 }
1392
39236c6e
A
1393 if (update_threads) {
1394 thread->requested_policy.bg_iotier = task->effective_policy.bg_iotier;
1395 thread->requested_policy.terminated = task->effective_policy.terminated;
1396
fe8ab488 1397 task_policy_update_internal_locked(task, thread, FALSE, NULL);
39236c6e
A
1398 /* The thread policy must not emit any completion actions due to this change. */
1399 }
1400 }
1401 }
6d2010ae
A
1402}
1403
3e170ce0
A
1404#if CONFIG_SCHED_SFI
1405/* coalition object is locked */
1406static void
1407task_sfi_reevaluate_cb(coalition_t coal, void *ctx, task_t task)
1408{
1409 thread_t thread;
1410
1411 /* unused for now */
1412 (void)coal;
1413
1414 /* skip the task we're re-evaluating on behalf of: it's already updated */
1415 if (task == (task_t)ctx)
1416 return;
1417
1418 task_lock(task);
1419
1420 queue_iterate(&task->threads, thread, thread_t, task_threads) {
1421 sfi_reevaluate(thread);
1422 }
1423
1424 task_unlock(task);
1425}
1426#endif /* CONFIG_SCHED_SFI */
1427
39236c6e
A
1428/*
1429 * Called with task unlocked to do things that can't be done while holding the task lock
39236c6e 1430 */
fe8ab488
A
1431void
1432task_policy_update_complete_unlocked(task_t task, thread_t thread, task_pend_token_t pend_token)
6d2010ae 1433{
39236c6e
A
1434 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1435
39236c6e 1436#ifdef MACH_BSD
fe8ab488
A
1437 if (pend_token->tpt_update_sockets)
1438 proc_apply_task_networkbg(task->bsd_info, thread);
39236c6e
A
1439#endif /* MACH_BSD */
1440
1441 if (on_task) {
fe8ab488
A
1442 /* The timer throttle has been removed or reduced, we need to look for expired timers and fire them */
1443 if (pend_token->tpt_update_timers)
39236c6e 1444 ml_timer_evaluate();
316670eb 1445
39236c6e 1446
fe8ab488
A
1447 if (pend_token->tpt_update_live_donor)
1448 task_importance_update_live_donor(task);
a1c7dba1 1449
3e170ce0
A
1450#if CONFIG_SCHED_SFI
1451 /* use the resource coalition for SFI re-evaluation */
a1c7dba1 1452 if (pend_token->tpt_update_coal_sfi)
3e170ce0
A
1453 coalition_for_each_task(task->coalition[COALITION_TYPE_RESOURCE],
1454 (void *)task, task_sfi_reevaluate_cb);
1455#endif /* CONFIG_SCHED_SFI */
fe8ab488 1456 }
6d2010ae
A
1457}
1458
39236c6e
A
1459/*
1460 * Initiate a task policy state transition
1461 *
1462 * Everything that modifies requested except functions that need to hold the task lock
1463 * should use this function
1464 *
1465 * Argument validation should be performed before reaching this point.
1466 *
1467 * TODO: Do we need to check task->active or thread->active?
1468 */
1469void
1470proc_set_task_policy(task_t task,
1471 thread_t thread,
1472 int category,
1473 int flavor,
1474 int value)
6d2010ae 1475{
fe8ab488
A
1476 struct task_pend_token pend_token = {};
1477
39236c6e 1478 task_lock(task);
6d2010ae 1479
39236c6e 1480 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
fe8ab488
A
1481 (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START,
1482 targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0);
6d2010ae 1483
39236c6e
A
1484 proc_set_task_policy_locked(task, thread, category, flavor, value);
1485
fe8ab488 1486 task_policy_update_locked(task, thread, &pend_token);
39236c6e
A
1487
1488 task_unlock(task);
1489
1490 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
fe8ab488
A
1491 (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END,
1492 targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0);
39236c6e 1493
fe8ab488 1494 task_policy_update_complete_unlocked(task, thread, &pend_token);
6d2010ae
A
1495}
1496
39236c6e
A
1497/*
1498 * Initiate a task policy state transition on a thread with its TID
1499 * Useful if you cannot guarantee the thread won't get terminated
1500 */
1501void
1502proc_set_task_policy_thread(task_t task,
1503 uint64_t tid,
1504 int category,
1505 int flavor,
1506 int value)
6d2010ae 1507{
39236c6e 1508 thread_t thread;
6d2010ae 1509 thread_t self = current_thread();
fe8ab488 1510 struct task_pend_token pend_token = {};
6d2010ae 1511
39236c6e 1512 task_lock(task);
6d2010ae 1513
39236c6e 1514 if (tid == TID_NULL || tid == self->thread_id)
6d2010ae 1515 thread = self;
39236c6e
A
1516 else
1517 thread = task_findtid(task, tid);
1518
1519 if (thread == THREAD_NULL) {
1520 task_unlock(task);
1521 return;
6d2010ae
A
1522 }
1523
39236c6e 1524 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
fe8ab488
A
1525 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1526 targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value, 0);
6d2010ae 1527
39236c6e
A
1528 proc_set_task_policy_locked(task, thread, category, flavor, value);
1529
fe8ab488 1530 task_policy_update_locked(task, thread, &pend_token);
39236c6e
A
1531
1532 task_unlock(task);
1533
1534 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
fe8ab488
A
1535 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1536 targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), tpending(&pend_token), 0);
39236c6e 1537
fe8ab488 1538 task_policy_update_complete_unlocked(task, thread, &pend_token);
6d2010ae
A
1539}
1540
fe8ab488
A
1541/*
1542 * Variant of proc_set_task_policy() that sets two scalars in the requested policy structure.
1543 * Same locking rules apply.
1544 */
1545void
1546proc_set_task_policy2(task_t task, thread_t thread, int category, int flavor, int value1, int value2)
1547{
1548 struct task_pend_token pend_token = {};
1549
1550 task_lock(task);
1551
1552 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1553 (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_START,
1554 targetid(task, thread), trequested_0(task, thread), trequested_1(task, thread), value1, 0);
1555
1556 proc_set_task_policy2_locked(task, thread, category, flavor, value1, value2);
1557
1558 task_policy_update_locked(task, thread, &pend_token);
1559
1560 task_unlock(task);
1561
1562 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1563 (IMPORTANCE_CODE(flavor, (category | tisthread(thread)))) | DBG_FUNC_END,
1564 targetid(task, thread), trequested_0(task, thread), trequested_0(task, thread), tpending(&pend_token), 0);
1565
1566 task_policy_update_complete_unlocked(task, thread, &pend_token);
1567}
39236c6e
A
1568
1569/*
1570 * Set the requested state for a specific flavor to a specific value.
1571 *
1572 * TODO:
1573 * Verify that arguments to non iopol things are 1 or 0
1574 */
1575static void
1576proc_set_task_policy_locked(task_t task,
1577 thread_t thread,
1578 int category,
1579 int flavor,
1580 int value)
6d2010ae 1581{
39236c6e
A
1582 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1583
1584 int tier, passive;
6d2010ae 1585
39236c6e
A
1586 struct task_requested_policy requested =
1587 (on_task) ? task->requested_policy : thread->requested_policy;
316670eb 1588
39236c6e
A
1589 switch (flavor) {
1590
1591 /* Category: EXTERNAL and INTERNAL, thread and task */
1592
1593 case TASK_POLICY_DARWIN_BG:
1594 if (category == TASK_POLICY_EXTERNAL)
1595 requested.ext_darwinbg = value;
1596 else
1597 requested.int_darwinbg = value;
1598 break;
316670eb 1599
39236c6e
A
1600 case TASK_POLICY_IOPOL:
1601 proc_iopol_to_tier(value, &tier, &passive);
1602 if (category == TASK_POLICY_EXTERNAL) {
1603 requested.ext_iotier = tier;
1604 requested.ext_iopassive = passive;
1605 } else {
1606 requested.int_iotier = tier;
1607 requested.int_iopassive = passive;
6d2010ae 1608 }
39236c6e
A
1609 break;
1610
1611 case TASK_POLICY_IO:
1612 if (category == TASK_POLICY_EXTERNAL)
1613 requested.ext_iotier = value;
1614 else
1615 requested.int_iotier = value;
1616 break;
1617
1618 case TASK_POLICY_PASSIVE_IO:
1619 if (category == TASK_POLICY_EXTERNAL)
1620 requested.ext_iopassive = value;
1621 else
1622 requested.int_iopassive = value;
1623 break;
1624
fe8ab488 1625 /* Category: INTERNAL, task only */
316670eb 1626
39236c6e
A
1627 case TASK_POLICY_DARWIN_BG_IOPOL:
1628 assert(on_task && category == TASK_POLICY_INTERNAL);
1629 proc_iopol_to_tier(value, &tier, &passive);
1630 requested.bg_iotier = tier;
1631 break;
1632
1633 /* Category: ATTRIBUTE, task only */
1634
1635 case TASK_POLICY_TAL:
1636 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1637 requested.t_tal_enabled = value;
1638 break;
1639
1640 case TASK_POLICY_BOOST:
1641 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1642 requested.t_boosted = value;
1643 break;
1644
1645 case TASK_POLICY_ROLE:
1646 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1647 requested.t_role = value;
1648 break;
1649
1650 case TASK_POLICY_TERMINATED:
1651 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1652 requested.terminated = value;
1653 break;
fe8ab488
A
1654 case TASK_BASE_LATENCY_QOS_POLICY:
1655 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1656 requested.t_base_latency_qos = value;
1657 break;
1658 case TASK_BASE_THROUGHPUT_QOS_POLICY:
1659 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1660 requested.t_base_through_qos = value;
1661 break;
1662 case TASK_POLICY_SFI_MANAGED:
1663 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1664 requested.t_sfi_managed = value;
1665 break;
39236c6e
A
1666
1667 /* Category: ATTRIBUTE, thread only */
1668
1669 case TASK_POLICY_PIDBIND_BG:
1670 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1671 requested.th_pidbind_bg = value;
1672 break;
1673
1674 case TASK_POLICY_WORKQ_BG:
1675 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1676 requested.th_workq_bg = value;
1677 break;
1678
fe8ab488
A
1679 case TASK_POLICY_QOS:
1680 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1681 requested.thrp_qos = value;
1682 break;
1683
1684 case TASK_POLICY_QOS_OVERRIDE:
1685 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1686 requested.thrp_qos_override = value;
1687 break;
1688
39236c6e
A
1689 default:
1690 panic("unknown task policy: %d %d %d", category, flavor, value);
1691 break;
1692 }
1693
1694 if (on_task)
1695 task->requested_policy = requested;
1696 else
1697 thread->requested_policy = requested;
6d2010ae
A
1698}
1699
39236c6e 1700/*
fe8ab488 1701 * Variant of proc_set_task_policy_locked() that sets two scalars in the requested policy structure.
39236c6e 1702 */
fe8ab488
A
1703static void
1704proc_set_task_policy2_locked(task_t task,
1705 thread_t thread,
1706 int category,
1707 int flavor,
1708 int value1,
1709 int value2)
316670eb 1710{
39236c6e 1711 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
316670eb 1712
fe8ab488
A
1713 struct task_requested_policy requested =
1714 (on_task) ? task->requested_policy : thread->requested_policy;
1715
1716 switch (flavor) {
1717
1718 /* Category: ATTRIBUTE, task only */
1719
1720 case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1721 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1722 requested.t_base_latency_qos = value1;
1723 requested.t_base_through_qos = value2;
1724 break;
1725
1726 case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1727 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1728 requested.t_over_latency_qos = value1;
1729 requested.t_over_through_qos = value2;
1730 break;
1731
1732 /* Category: ATTRIBUTE, thread only */
1733
1734 case TASK_POLICY_QOS_AND_RELPRIO:
1735
1736 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1737 requested.thrp_qos = value1;
1738 requested.thrp_qos_relprio = value2;
1739 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1740 break;
1741
1742 default:
1743 panic("unknown task policy: %d %d %d %d", category, flavor, value1, value2);
1744 break;
1745 }
1746
1747 if (on_task)
1748 task->requested_policy = requested;
1749 else
1750 thread->requested_policy = requested;
1751}
1752
1753
1754/*
1755 * Gets what you set. Effective values may be different.
1756 */
1757int
1758proc_get_task_policy(task_t task,
1759 thread_t thread,
1760 int category,
1761 int flavor)
1762{
1763 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1764
1765 int value = 0;
39236c6e
A
1766
1767 task_lock(task);
316670eb 1768
39236c6e
A
1769 struct task_requested_policy requested =
1770 (on_task) ? task->requested_policy : thread->requested_policy;
316670eb 1771
39236c6e
A
1772 switch (flavor) {
1773 case TASK_POLICY_DARWIN_BG:
1774 if (category == TASK_POLICY_EXTERNAL)
1775 value = requested.ext_darwinbg;
1776 else
1777 value = requested.int_darwinbg;
1778 break;
1779 case TASK_POLICY_IOPOL:
1780 if (category == TASK_POLICY_EXTERNAL)
1781 value = proc_tier_to_iopol(requested.ext_iotier,
1782 requested.ext_iopassive);
1783 else
1784 value = proc_tier_to_iopol(requested.int_iotier,
1785 requested.int_iopassive);
1786 break;
1787 case TASK_POLICY_IO:
1788 if (category == TASK_POLICY_EXTERNAL)
1789 value = requested.ext_iotier;
1790 else
1791 value = requested.int_iotier;
1792 break;
1793 case TASK_POLICY_PASSIVE_IO:
1794 if (category == TASK_POLICY_EXTERNAL)
1795 value = requested.ext_iopassive;
1796 else
1797 value = requested.int_iopassive;
1798 break;
39236c6e
A
1799 case TASK_POLICY_DARWIN_BG_IOPOL:
1800 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1801 value = proc_tier_to_iopol(requested.bg_iotier, 0);
1802 break;
1803 case TASK_POLICY_ROLE:
1804 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1805 value = requested.t_role;
1806 break;
fe8ab488
A
1807 case TASK_POLICY_SFI_MANAGED:
1808 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1809 value = requested.t_sfi_managed;
1810 break;
1811 case TASK_POLICY_QOS:
1812 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1813 value = requested.thrp_qos;
1814 break;
1815 case TASK_POLICY_QOS_OVERRIDE:
1816 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1817 value = requested.thrp_qos_override;
1818 break;
39236c6e
A
1819 default:
1820 panic("unknown policy_flavor %d", flavor);
1821 break;
316670eb
A
1822 }
1823
39236c6e 1824 task_unlock(task);
316670eb 1825
39236c6e
A
1826 return value;
1827}
1828
fe8ab488
A
1829/*
1830 * Variant of proc_get_task_policy() that returns two scalar outputs.
1831 */
1832void
1833proc_get_task_policy2(task_t task, thread_t thread, int category __unused, int flavor, int *value1, int *value2)
1834{
1835 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1836
1837 task_lock(task);
1838
1839 struct task_requested_policy requested =
1840 (on_task) ? task->requested_policy : thread->requested_policy;
1841
1842 switch (flavor) {
1843 /* TASK attributes */
1844 case TASK_POLICY_BASE_LATENCY_AND_THROUGHPUT_QOS:
1845 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1846 *value1 = requested.t_base_latency_qos;
1847 *value2 = requested.t_base_through_qos;
1848 break;
1849
1850 case TASK_POLICY_OVERRIDE_LATENCY_AND_THROUGHPUT_QOS:
1851 assert(on_task && category == TASK_POLICY_ATTRIBUTE);
1852 *value1 = requested.t_over_latency_qos;
1853 *value2 = requested.t_over_through_qos;
1854 break;
1855
1856 /* THREAD attributes */
1857 case TASK_POLICY_QOS_AND_RELPRIO:
1858 assert(!on_task && category == TASK_POLICY_ATTRIBUTE);
1859 *value1 = requested.thrp_qos;
1860 *value2 = requested.thrp_qos_relprio;
1861 break;
1862
1863 default:
1864 panic("unknown policy_flavor %d", flavor);
1865 break;
1866 }
1867
1868 task_unlock(task);
1869}
1870
39236c6e
A
1871
1872/*
1873 * Functions for querying effective state for relevant subsystems
1874 * ONLY the relevant subsystem should query these.
1875 * NEVER take a value from one of the 'effective' functions and stuff it into a setter.
1876 */
1877
1878int
1879proc_get_effective_task_policy(task_t task, int flavor)
1880{
1881 return proc_get_effective_policy(task, THREAD_NULL, flavor);
1882}
1883
1884int
1885proc_get_effective_thread_policy(thread_t thread, int flavor)
1886{
1887 return proc_get_effective_policy(thread->task, thread, flavor);
1888}
1889
1890/*
1891 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1892 *
1893 * NOTE: This accessor does not take the task lock.
1894 * Notifications of state updates need to be externally synchronized with state queries.
1895 * This routine *MUST* remain interrupt safe, as it is potentially invoked
fe8ab488 1896 * within the context of a timer interrupt. It is also called in KDP context for stackshot.
39236c6e
A
1897 */
1898static int
1899proc_get_effective_policy(task_t task,
1900 thread_t thread,
1901 int flavor)
1902{
1903 boolean_t on_task = (thread == THREAD_NULL) ? TRUE : FALSE;
1904 int value = 0;
1905
1906 switch (flavor) {
1907 case TASK_POLICY_DARWIN_BG:
1908 /*
1909 * This backs the KPI call proc_pidbackgrounded to find
1910 * out if a pid is backgrounded,
1911 * as well as proc_get_effective_thread_policy.
1912 * Its main use is within the timer layer, as well as
1913 * prioritizing requests to the graphics system.
1914 * Returns 1 for background mode, 0 for normal mode
1915 */
1916 if (on_task)
1917 value = task->effective_policy.darwinbg;
1918 else
1919 value = (task->effective_policy.darwinbg ||
1920 thread->effective_policy.darwinbg) ? 1 : 0;
1921 break;
1922 case TASK_POLICY_IO:
1923 /*
1924 * The I/O system calls here to find out what throttling tier to apply to an operation.
fe8ab488
A
1925 * Returns THROTTLE_LEVEL_* values. Some userspace spinlock operations can apply
1926 * a temporary iotier override to make the I/O more aggressive to get the lock
1927 * owner to release the spinlock.
39236c6e
A
1928 */
1929 if (on_task)
1930 value = task->effective_policy.io_tier;
1931 else {
1932 value = MAX(task->effective_policy.io_tier,
1933 thread->effective_policy.io_tier);
1934 if (thread->iotier_override != THROTTLE_LEVEL_NONE)
1935 value = MIN(value, thread->iotier_override);
1936 }
1937 break;
1938 case TASK_POLICY_PASSIVE_IO:
1939 /*
1940 * The I/O system calls here to find out whether an operation should be passive.
1941 * (i.e. not cause operations with lower throttle tiers to be throttled)
fe8ab488
A
1942 * Returns 1 for passive mode, 0 for normal mode.
1943 * If a userspace spinlock has applied an override, that I/O should always
1944 * be passive to avoid self-throttling when the override is removed and lower
1945 * iotier I/Os are issued.
39236c6e
A
1946 */
1947 if (on_task)
1948 value = task->effective_policy.io_passive;
fe8ab488
A
1949 else {
1950 int io_tier = MAX(task->effective_policy.io_tier, thread->effective_policy.io_tier);
1951 boolean_t override_in_effect = (thread->iotier_override != THROTTLE_LEVEL_NONE) && (thread->iotier_override < io_tier);
1952
39236c6e 1953 value = (task->effective_policy.io_passive ||
fe8ab488
A
1954 thread->effective_policy.io_passive || override_in_effect) ? 1 : 0;
1955 }
1956 break;
1957 case TASK_POLICY_ALL_SOCKETS_BG:
1958 /*
1959 * do_background_socket() calls this to determine what it should do to the proc's sockets
1960 * Returns 1 for background mode, 0 for normal mode
1961 *
1962 * This consults both thread and task so un-DBGing a thread while the task is BG
1963 * doesn't get you out of the network throttle.
1964 */
1965 if (on_task)
1966 value = task->effective_policy.all_sockets_bg;
1967 else
1968 value = (task->effective_policy.all_sockets_bg ||
1969 thread->effective_policy.all_sockets_bg) ? 1 : 0;
39236c6e
A
1970 break;
1971 case TASK_POLICY_NEW_SOCKETS_BG:
1972 /*
1973 * socreate() calls this to determine if it should mark a new socket as background
1974 * Returns 1 for background mode, 0 for normal mode
1975 */
1976 if (on_task)
1977 value = task->effective_policy.new_sockets_bg;
1978 else
1979 value = (task->effective_policy.new_sockets_bg ||
1980 thread->effective_policy.new_sockets_bg) ? 1 : 0;
1981 break;
1982 case TASK_POLICY_LOWPRI_CPU:
1983 /*
1984 * Returns 1 for low priority cpu mode, 0 for normal mode
1985 */
1986 if (on_task)
1987 value = task->effective_policy.lowpri_cpu;
1988 else
1989 value = (task->effective_policy.lowpri_cpu ||
1990 thread->effective_policy.lowpri_cpu) ? 1 : 0;
1991 break;
1992 case TASK_POLICY_SUPPRESSED_CPU:
1993 /*
1994 * Returns 1 for suppressed cpu mode, 0 for normal mode
1995 */
1996 assert(on_task);
1997 value = task->effective_policy.t_suppressed_cpu;
1998 break;
1999 case TASK_POLICY_LATENCY_QOS:
2000 /*
2001 * timer arming calls into here to find out the timer coalescing level
2002 * Returns a QoS tier (0-6)
2003 */
fe8ab488
A
2004 if (on_task) {
2005 value = task->effective_policy.t_latency_qos;
2006 } else {
2007 value = MAX(task->effective_policy.t_latency_qos, thread->effective_policy.t_latency_qos);
2008 }
39236c6e
A
2009 break;
2010 case TASK_POLICY_THROUGH_QOS:
2011 /*
2012 * Returns a QoS tier (0-6)
2013 */
2014 assert(on_task);
2015 value = task->effective_policy.t_through_qos;
2016 break;
39236c6e
A
2017 case TASK_POLICY_ROLE:
2018 assert(on_task);
2019 value = task->effective_policy.t_role;
2020 break;
2021 case TASK_POLICY_WATCHERS_BG:
2022 assert(on_task);
2023 value = task->effective_policy.t_watchers_bg;
2024 break;
fe8ab488
A
2025 case TASK_POLICY_SFI_MANAGED:
2026 assert(on_task);
2027 value = task->effective_policy.t_sfi_managed;
2028 break;
2029 case TASK_POLICY_QOS:
2030 assert(!on_task);
2031 value = thread->effective_policy.thep_qos;
2032 break;
39236c6e
A
2033 default:
2034 panic("unknown policy_flavor %d", flavor);
2035 break;
316670eb
A
2036 }
2037
39236c6e 2038 return value;
6d2010ae
A
2039}
2040
39236c6e
A
2041/*
2042 * Convert from IOPOL_* values to throttle tiers.
2043 *
2044 * TODO: Can this be made more compact, like an array lookup
2045 * Note that it is possible to support e.g. IOPOL_PASSIVE_STANDARD in the future
2046 */
6d2010ae 2047
39236c6e
A
2048static void
2049proc_iopol_to_tier(int iopolicy, int *tier, int *passive)
6d2010ae 2050{
39236c6e
A
2051 *passive = 0;
2052 *tier = 0;
2053 switch (iopolicy) {
2054 case IOPOL_IMPORTANT:
2055 *tier = THROTTLE_LEVEL_TIER0;
2056 break;
2057 case IOPOL_PASSIVE:
2058 *tier = THROTTLE_LEVEL_TIER0;
2059 *passive = 1;
2060 break;
2061 case IOPOL_STANDARD:
2062 *tier = THROTTLE_LEVEL_TIER1;
2063 break;
2064 case IOPOL_UTILITY:
2065 *tier = THROTTLE_LEVEL_TIER2;
2066 break;
2067 case IOPOL_THROTTLE:
2068 *tier = THROTTLE_LEVEL_TIER3;
2069 break;
2070 default:
2071 panic("unknown I/O policy %d", iopolicy);
2072 break;
2073 }
6d2010ae
A
2074}
2075
2076static int
39236c6e 2077proc_tier_to_iopol(int tier, int passive)
6d2010ae 2078{
39236c6e
A
2079 if (passive == 1) {
2080 switch (tier) {
2081 case THROTTLE_LEVEL_TIER0:
2082 return IOPOL_PASSIVE;
2083 break;
2084 default:
2085 panic("unknown passive tier %d", tier);
2086 return IOPOL_DEFAULT;
2087 break;
6d2010ae 2088 }
39236c6e
A
2089 } else {
2090 switch (tier) {
2091 case THROTTLE_LEVEL_NONE:
39236c6e 2092 case THROTTLE_LEVEL_TIER0:
fe8ab488 2093 return IOPOL_DEFAULT;
39236c6e
A
2094 break;
2095 case THROTTLE_LEVEL_TIER1:
2096 return IOPOL_STANDARD;
2097 break;
2098 case THROTTLE_LEVEL_TIER2:
2099 return IOPOL_UTILITY;
2100 break;
2101 case THROTTLE_LEVEL_TIER3:
2102 return IOPOL_THROTTLE;
2103 break;
2104 default:
2105 panic("unknown tier %d", tier);
2106 return IOPOL_DEFAULT;
2107 break;
6d2010ae
A
2108 }
2109 }
39236c6e
A
2110}
2111
3e170ce0
A
2112int
2113proc_darwin_role_to_task_role(int darwin_role, int* task_role)
2114{
2115 integer_t role = TASK_UNSPECIFIED;
2116
2117 switch (darwin_role) {
2118 case PRIO_DARWIN_ROLE_DEFAULT:
2119 role = TASK_UNSPECIFIED;
2120 break;
2121 case PRIO_DARWIN_ROLE_UI_FOCAL:
2122 role = TASK_FOREGROUND_APPLICATION;
2123 break;
2124 case PRIO_DARWIN_ROLE_UI:
2125 role = TASK_DEFAULT_APPLICATION;
2126 break;
2127 case PRIO_DARWIN_ROLE_NON_UI:
2128 role = TASK_NONUI_APPLICATION;
2129 break;
2130 case PRIO_DARWIN_ROLE_UI_NON_FOCAL:
2131 role = TASK_BACKGROUND_APPLICATION;
2132 break;
2133 case PRIO_DARWIN_ROLE_TAL_LAUNCH:
2134 role = TASK_THROTTLE_APPLICATION;
2135 break;
2136 default:
2137 return EINVAL;
2138 }
2139
2140 *task_role = role;
2141
2142 return 0;
2143}
2144
2145int
2146proc_task_role_to_darwin_role(int task_role)
2147{
2148 switch (task_role) {
2149 case TASK_FOREGROUND_APPLICATION:
2150 return PRIO_DARWIN_ROLE_UI_FOCAL;
2151 case TASK_BACKGROUND_APPLICATION:
2152 return PRIO_DARWIN_ROLE_UI;
2153 case TASK_NONUI_APPLICATION:
2154 return PRIO_DARWIN_ROLE_NON_UI;
2155 case TASK_DEFAULT_APPLICATION:
2156 return PRIO_DARWIN_ROLE_UI_NON_FOCAL;
2157 case TASK_THROTTLE_APPLICATION:
2158 return PRIO_DARWIN_ROLE_TAL_LAUNCH;
2159 case TASK_UNSPECIFIED:
2160 default:
2161 return PRIO_DARWIN_ROLE_DEFAULT;
2162 }
2163}
2164
2165
39236c6e
A
2166/* apply internal backgrounding for workqueue threads */
2167int
2168proc_apply_workq_bgthreadpolicy(thread_t thread)
2169{
2170 if (thread == THREAD_NULL)
2171 return ESRCH;
2172
2173 proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE,
2174 TASK_POLICY_WORKQ_BG, TASK_POLICY_ENABLE);
6d2010ae
A
2175
2176 return(0);
2177}
2178
39236c6e
A
2179/*
2180 * remove internal backgrounding for workqueue threads
2181 * does NOT go find sockets created while BG and unbackground them
2182 */
2183int
6d2010ae
A
2184proc_restore_workq_bgthreadpolicy(thread_t thread)
2185{
39236c6e
A
2186 if (thread == THREAD_NULL)
2187 return ESRCH;
6d2010ae 2188
39236c6e
A
2189 proc_set_task_policy(thread->task, thread, TASK_POLICY_ATTRIBUTE,
2190 TASK_POLICY_WORKQ_BG, TASK_POLICY_DISABLE);
2191
2192 return(0);
6d2010ae
A
2193}
2194
39236c6e
A
2195/* here for temporary compatibility */
2196int
2197proc_setthread_saved_importance(__unused thread_t thread, __unused int importance)
6d2010ae 2198{
39236c6e 2199 return(0);
6d2010ae
A
2200}
2201
39236c6e
A
2202/*
2203 * Set an override on the thread which is consulted with a
2204 * higher priority than the task/thread policy. This should
2205 * only be set for temporary grants until the thread
2206 * returns to the userspace boundary
2207 *
2208 * We use atomic operations to swap in the override, with
2209 * the assumption that the thread itself can
2210 * read the override and clear it on return to userspace.
2211 *
2212 * No locking is performed, since it is acceptable to see
2213 * a stale override for one loop through throttle_lowpri_io().
2214 * However a thread reference must be held on the thread.
2215 */
316670eb 2216
39236c6e
A
2217void set_thread_iotier_override(thread_t thread, int policy)
2218{
2219 int current_override;
6d2010ae 2220
39236c6e
A
2221 /* Let most aggressive I/O policy win until user boundary */
2222 do {
2223 current_override = thread->iotier_override;
316670eb 2224
39236c6e
A
2225 if (current_override != THROTTLE_LEVEL_NONE)
2226 policy = MIN(current_override, policy);
6d2010ae 2227
39236c6e
A
2228 if (current_override == policy) {
2229 /* no effective change */
2230 return;
316670eb 2231 }
39236c6e 2232 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
6d2010ae 2233
39236c6e
A
2234 /*
2235 * Since the thread may be currently throttled,
2236 * re-evaluate tiers and potentially break out
2237 * of an msleep
2238 */
2239 rethrottle_thread(thread->uthread);
6d2010ae
A
2240}
2241
39236c6e 2242/*
fe8ab488
A
2243 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2244 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2245 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2246 * priority thread. In these cases, we attempt to propagate the priority token, as long
2247 * as the subsystem informs us of the relationships between the threads. The userspace
2248 * synchronization subsystem should maintain the information of owner->resource and
2249 * resource->waiters itself.
39236c6e 2250 */
fe8ab488 2251
a1c7dba1
A
2252/*
2253 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2254 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2255 * to be handled specially in the future, but for now it's fine to slam
2256 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2257 */
2258static void _canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) {
2259 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2260 /* Map all input resource/type to a single one */
2261 *resource = USER_ADDR_NULL;
2262 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2263 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2264 /* no transform */
2265 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH) {
2266 /* Map all dispatch overrides to a single one, to avoid memory overhead */
2267 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2268 *resource = USER_ADDR_NULL;
2269 }
2270 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2271 /* Map all mutex overrides to a single one, to avoid memory overhead */
2272 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2273 *resource = USER_ADDR_NULL;
2274 }
2275 }
2276}
2277
2278/* This helper routine finds an existing override if known. Locking should be done by caller */
2279static struct thread_qos_override *_find_qos_override(thread_t thread, user_addr_t resource, int resource_type) {
2280 struct thread_qos_override *override;
2281
2282 override = thread->overrides;
2283 while (override) {
2284 if (override->override_resource == resource &&
2285 override->override_resource_type == resource_type) {
2286 return override;
2287 }
2288
2289 override = override->override_next;
2290 }
2291
2292 return NULL;
2293}
2294
2295static void _find_and_decrement_qos_override(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset, struct thread_qos_override **free_override_list) {
2296 struct thread_qos_override *override, *override_prev;
2297
2298 override_prev = NULL;
2299 override = thread->overrides;
2300 while (override) {
2301 struct thread_qos_override *override_next = override->override_next;
2302
2303 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2304 override->override_resource_type == resource_type) {
2305 if (reset) {
2306 override->override_contended_resource_count = 0;
2307 } else {
2308 override->override_contended_resource_count--;
2309 }
2310
2311 if (override->override_contended_resource_count == 0) {
2312 if (override_prev == NULL) {
2313 thread->overrides = override_next;
2314 } else {
2315 override_prev->override_next = override_next;
2316 }
2317
2318 /* Add to out-param for later zfree */
2319 override->override_next = *free_override_list;
2320 *free_override_list = override;
2321 } else {
2322 override_prev = override;
2323 }
2324
2325 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2326 return;
2327 }
2328 } else {
2329 override_prev = override;
2330 }
2331
2332 override = override_next;
2333 }
2334}
2335
2336/* This helper recalculates the current requested override using the policy selected at boot */
2337static int _calculate_requested_qos_override(thread_t thread)
2338{
2339 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2340 return THREAD_QOS_UNSPECIFIED;
2341 }
2342
2343 /* iterate over all overrides and calculate MAX */
2344 struct thread_qos_override *override;
2345 int qos_override = THREAD_QOS_UNSPECIFIED;
2346
2347 override = thread->overrides;
2348 while (override) {
2349 if (qos_override_mode != QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH ||
2350 override->override_resource_type != THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2351 qos_override = MAX(qos_override, override->override_qos);
2352 }
2353
2354 override = override->override_next;
2355 }
2356
2357 return qos_override;
2358}
2359
2360boolean_t proc_thread_qos_add_override(task_t task, thread_t thread, uint64_t tid, int override_qos, boolean_t first_override_for_resource, user_addr_t resource, int resource_type)
fe8ab488
A
2361{
2362 thread_t self = current_thread();
fe8ab488
A
2363 struct task_pend_token pend_token = {};
2364
2365 /* XXX move to thread mutex when thread policy does */
2366 task_lock(task);
2367
2368 /*
2369 * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference
2370 * to the thread
2371 */
2372
2373 if (thread != THREAD_NULL) {
2374 assert(task == thread->task);
2375 } else {
2376 if (tid == self->thread_id) {
2377 thread = self;
2378 } else {
2379 thread = task_findtid(task, tid);
2380
2381 if (thread == THREAD_NULL) {
2382 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2383 tid, 0, 0xdead, 0, 0);
2384 task_unlock(task);
2385 return FALSE;
2386 }
2387 }
2388 }
2389
2390 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2391 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2392
2393 DTRACE_BOOST5(qos_add_override_pre, uint64_t, tid, uint64_t, thread->requested_policy.thrp_qos,
2394 uint64_t, thread->effective_policy.thep_qos, int, override_qos, boolean_t, first_override_for_resource);
2395
a1c7dba1
A
2396 struct task_requested_policy requested = thread->requested_policy;
2397 struct thread_qos_override *override;
2398 struct thread_qos_override *deferred_free_override = NULL;
2399 int new_qos_override, prev_qos_override;
2400 int new_effective_qos;
2401 boolean_t has_thread_reference = FALSE;
2402
2403 _canonicalize_resource_and_type(&resource, &resource_type);
2404
fe8ab488 2405 if (first_override_for_resource) {
a1c7dba1
A
2406 override = _find_qos_override(thread, resource, resource_type);
2407 if (override) {
2408 override->override_contended_resource_count++;
2409 } else {
2410 struct thread_qos_override *override_new;
2411
2412 /* We need to allocate a new object. Drop the task lock and recheck afterwards in case someone else added the override */
2413 thread_reference(thread);
2414 has_thread_reference = TRUE;
2415 task_unlock(task);
2416 override_new = zalloc(thread_qos_override_zone);
2417 task_lock(task);
2418
2419 override = _find_qos_override(thread, resource, resource_type);
2420 if (override) {
2421 /* Someone else already allocated while the task lock was dropped */
2422 deferred_free_override = override_new;
2423 override->override_contended_resource_count++;
2424 } else {
2425 override = override_new;
2426 override->override_next = thread->overrides;
2427 override->override_contended_resource_count = 1 /* since first_override_for_resource was TRUE */;
2428 override->override_resource = resource;
2429 override->override_resource_type = resource_type;
2430 override->override_qos = THREAD_QOS_UNSPECIFIED;
2431 thread->overrides = override;
2432 }
2433 }
fe8ab488 2434 } else {
a1c7dba1 2435 override = _find_qos_override(thread, resource, resource_type);
fe8ab488
A
2436 }
2437
a1c7dba1
A
2438 if (override) {
2439 if (override->override_qos == THREAD_QOS_UNSPECIFIED)
2440 override->override_qos = override_qos;
2441 else
2442 override->override_qos = MAX(override->override_qos, override_qos);
2443 }
fe8ab488 2444
a1c7dba1
A
2445 /* Determine how to combine the various overrides into a single current requested override */
2446 prev_qos_override = requested.thrp_qos_override;
2447 new_qos_override = _calculate_requested_qos_override(thread);
fe8ab488 2448
a1c7dba1
A
2449 if (new_qos_override != prev_qos_override) {
2450 requested.thrp_qos_override = new_qos_override;
fe8ab488 2451
a1c7dba1 2452 thread->requested_policy = requested;
fe8ab488 2453
a1c7dba1
A
2454 task_policy_update_locked(task, thread, &pend_token);
2455
2456 if (!has_thread_reference) {
2457 thread_reference(thread);
2458 }
2459
2460 task_unlock(task);
2461
2462 task_policy_update_complete_unlocked(task, thread, &pend_token);
fe8ab488 2463
a1c7dba1
A
2464 new_effective_qos = thread->effective_policy.thep_qos;
2465
2466 thread_deallocate(thread);
2467 } else {
2468 new_effective_qos = thread->effective_policy.thep_qos;
fe8ab488 2469
a1c7dba1 2470 task_unlock(task);
fe8ab488 2471
a1c7dba1
A
2472 if (has_thread_reference) {
2473 thread_deallocate(thread);
2474 }
2475 }
2476
2477 if (deferred_free_override) {
2478 zfree(thread_qos_override_zone, deferred_free_override);
2479 }
fe8ab488 2480
a1c7dba1
A
2481 DTRACE_BOOST3(qos_add_override_post, int, prev_qos_override, int, new_qos_override,
2482 int, new_effective_qos);
fe8ab488
A
2483
2484 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
a1c7dba1 2485 new_qos_override, resource, resource_type, 0, 0);
fe8ab488
A
2486
2487 return TRUE;
2488}
2489
a1c7dba1
A
2490
2491static boolean_t _proc_thread_qos_remove_override_internal(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type, boolean_t reset)
6d2010ae 2492{
fe8ab488 2493 thread_t self = current_thread();
fe8ab488
A
2494 struct task_pend_token pend_token = {};
2495
2496 /* XXX move to thread mutex when thread policy does */
39236c6e 2497 task_lock(task);
6d2010ae 2498
fe8ab488
A
2499 /*
2500 * If thread is passed, it is assumed to be most accurate, since the caller must have an explicit (or implicit) reference
2501 * to the thread
2502 */
2503 if (thread != THREAD_NULL) {
2504 assert(task == thread->task);
2505 } else {
2506 if (tid == self->thread_id) {
2507 thread = self;
2508 } else {
2509 thread = task_findtid(task, tid);
2510
2511 if (thread == THREAD_NULL) {
2512 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2513 tid, 0, 0xdead, 0, 0);
2514 task_unlock(task);
2515 return FALSE;
2516 }
2517 }
2518 }
2519
a1c7dba1
A
2520 struct task_requested_policy requested = thread->requested_policy;
2521 struct thread_qos_override *deferred_free_override_list = NULL;
2522 int new_qos_override, prev_qos_override;
2523
2524 _canonicalize_resource_and_type(&resource, &resource_type);
2525
2526 _find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
fe8ab488
A
2527
2528 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
a1c7dba1
A
2529 thread_tid(thread), resource, reset, 0, 0);
2530
2531 /* Determine how to combine the various overrides into a single current requested override */
2532 prev_qos_override = requested.thrp_qos_override;
2533 new_qos_override = _calculate_requested_qos_override(thread);
fe8ab488 2534
a1c7dba1
A
2535 if (new_qos_override != prev_qos_override) {
2536 requested.thrp_qos_override = new_qos_override;
2537
2538 thread->requested_policy = requested;
fe8ab488
A
2539
2540 task_policy_update_locked(task, thread, &pend_token);
2541
2542 thread_reference(thread);
a1c7dba1 2543
fe8ab488
A
2544 task_unlock(task);
2545
2546 task_policy_update_complete_unlocked(task, thread, &pend_token);
a1c7dba1 2547
fe8ab488 2548 thread_deallocate(thread);
fe8ab488
A
2549 } else {
2550 task_unlock(task);
2551 }
2552
a1c7dba1
A
2553 while (deferred_free_override_list) {
2554 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2555
2556 zfree(thread_qos_override_zone, deferred_free_override_list);
2557 deferred_free_override_list = override_next;
2558 }
2559
fe8ab488
A
2560 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2561 0, 0, 0, 0, 0);
2562
2563 return TRUE;
2564}
2565
a1c7dba1
A
2566boolean_t proc_thread_qos_remove_override(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type)
2567{
2568 return _proc_thread_qos_remove_override_internal(task, thread, tid, resource, resource_type, FALSE);
2569
2570}
2571
2572boolean_t proc_thread_qos_reset_override(task_t task, thread_t thread, uint64_t tid, user_addr_t resource, int resource_type)
2573{
2574 return _proc_thread_qos_remove_override_internal(task, thread, tid, resource, resource_type, TRUE);
2575}
2576
2577/* Deallocate before thread termination */
2578void proc_thread_qos_deallocate(thread_t thread)
2579{
2580 task_t task = thread->task;
2581 struct thread_qos_override *override;
2582
2583 /* XXX move to thread mutex when thread policy does */
2584 task_lock(task);
2585 override = thread->overrides;
2586 thread->overrides = NULL; /* task policy re-evaluation needed? */
2587 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2588 task_unlock(task);
2589
2590 while (override) {
2591 struct thread_qos_override *override_next = override->override_next;
2592
2593 zfree(thread_qos_override_zone, override);
2594 override = override_next;
2595 }
2596}
2597
fe8ab488
A
2598/* TODO: remove this variable when interactive daemon audit period is over */
2599extern boolean_t ipc_importance_interactive_receiver;
2600
2601/*
2602 * Called at process exec to initialize the apptype, qos clamp, and qos seed of a process
2603 *
2604 * TODO: Make this function more table-driven instead of ad-hoc
2605 */
2606void
3e170ce0 2607proc_set_task_spawnpolicy(task_t task, int apptype, int qos_clamp, int role,
fe8ab488
A
2608 ipc_port_t * portwatch_ports, int portwatch_count)
2609{
2610 struct task_pend_token pend_token = {};
2611
39236c6e 2612 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
fe8ab488 2613 (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_START,
3e170ce0 2614 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
fe8ab488 2615 apptype, 0);
316670eb 2616
39236c6e
A
2617 switch (apptype) {
2618 case TASK_APPTYPE_APP_TAL:
39236c6e 2619 case TASK_APPTYPE_APP_DEFAULT:
fe8ab488
A
2620 /* Apps become donors via the 'live-donor' flag instead of the static donor flag */
2621 task_importance_mark_donor(task, FALSE);
2622 task_importance_mark_live_donor(task, TRUE);
2623 task_importance_mark_receiver(task, FALSE);
2624 /* Apps are de-nap recievers on desktop for suppression behaviors */
2625 task_importance_mark_denap_receiver(task, TRUE);
2626 break;
316670eb 2627
fe8ab488 2628 case TASK_APPTYPE_DAEMON_INTERACTIVE:
39236c6e 2629 task_importance_mark_donor(task, TRUE);
fe8ab488
A
2630 task_importance_mark_live_donor(task, FALSE);
2631
2632 /*
2633 * A boot arg controls whether interactive daemons are importance receivers.
2634 * Normally, they are not. But for testing their behavior as an adaptive
2635 * daemon, the boot-arg can be set.
2636 *
2637 * TODO: remove this when the interactive daemon audit period is over.
2638 */
2639 task_importance_mark_receiver(task, /* FALSE */ ipc_importance_interactive_receiver);
2640 task_importance_mark_denap_receiver(task, FALSE);
6d2010ae
A
2641 break;
2642
39236c6e 2643 case TASK_APPTYPE_DAEMON_STANDARD:
39236c6e 2644 task_importance_mark_donor(task, TRUE);
fe8ab488 2645 task_importance_mark_live_donor(task, FALSE);
39236c6e 2646 task_importance_mark_receiver(task, FALSE);
fe8ab488 2647 task_importance_mark_denap_receiver(task, FALSE);
6d2010ae 2648 break;
39236c6e
A
2649
2650 case TASK_APPTYPE_DAEMON_ADAPTIVE:
39236c6e 2651 task_importance_mark_donor(task, FALSE);
fe8ab488 2652 task_importance_mark_live_donor(task, FALSE);
39236c6e 2653 task_importance_mark_receiver(task, TRUE);
fe8ab488 2654 task_importance_mark_denap_receiver(task, FALSE);
316670eb
A
2655 break;
2656
39236c6e 2657 case TASK_APPTYPE_DAEMON_BACKGROUND:
39236c6e 2658 task_importance_mark_donor(task, FALSE);
fe8ab488 2659 task_importance_mark_live_donor(task, FALSE);
39236c6e 2660 task_importance_mark_receiver(task, FALSE);
fe8ab488 2661 task_importance_mark_denap_receiver(task, FALSE);
316670eb 2662 break;
6d2010ae 2663
fe8ab488
A
2664 case TASK_APPTYPE_NONE:
2665 break;
2666 }
2667
2668 if (portwatch_ports != NULL && apptype == TASK_APPTYPE_DAEMON_ADAPTIVE) {
2669 int portwatch_boosts = 0;
2670
2671 for (int i = 0; i < portwatch_count; i++) {
2672 ipc_port_t port = NULL;
2673
2674 if ((port = portwatch_ports[i]) != NULL) {
2675 int boost = 0;
2676 task_add_importance_watchport(task, port, &boost);
2677 portwatch_boosts += boost;
2678 }
2679 }
2680
2681 if (portwatch_boosts > 0) {
2682 task_importance_hold_internal_assertion(task, portwatch_boosts);
2683 }
2684 }
2685
2686 task_lock(task);
2687
2688 if (apptype == TASK_APPTYPE_APP_TAL) {
2689 /* TAL starts off enabled by default */
2690 task->requested_policy.t_tal_enabled = 1;
2691 }
2692
2693 if (apptype != TASK_APPTYPE_NONE) {
2694 task->requested_policy.t_apptype = apptype;
3e170ce0 2695 }
fe8ab488 2696
3e170ce0
A
2697 if (role != TASK_UNSPECIFIED) {
2698 task->requested_policy.t_role = role;
fe8ab488
A
2699 }
2700
2701 if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2702 task->requested_policy.t_qos_clamp = qos_clamp;
2703 }
2704
2705 task_policy_update_locked(task, THREAD_NULL, &pend_token);
2706
2707 task_unlock(task);
2708
2709 /* Ensure the donor bit is updated to be in sync with the new live donor status */
2710 pend_token.tpt_update_live_donor = 1;
2711
2712 task_policy_update_complete_unlocked(task, THREAD_NULL, &pend_token);
2713
2714 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2715 (IMPORTANCE_CODE(IMP_TASK_APPTYPE, apptype)) | DBG_FUNC_END,
3e170ce0 2716 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
fe8ab488
A
2717 task_is_importance_receiver(task), 0);
2718}
2719
3e170ce0
A
2720extern task_t bsd_init_task;
2721
fe8ab488
A
2722/* Set up the primordial thread's QoS */
2723void
2724task_set_main_thread_qos(task_t task, thread_t main_thread) {
2725 struct task_pend_token pend_token = {};
2726
2727 assert(main_thread->task == task);
2728
2729 task_lock(task);
2730
2731 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2732 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
3e170ce0 2733 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
fe8ab488
A
2734 main_thread->requested_policy.thrp_qos, 0);
2735
2736 int primordial_qos = THREAD_QOS_UNSPECIFIED;
2737
2738 int qos_clamp = task->requested_policy.t_qos_clamp;
2739
3e170ce0
A
2740 if (task == bsd_init_task) {
2741 /* PID 1 gets a special case */
2742 primordial_qos = THREAD_QOS_USER_INITIATED;
2743 }
2744
fe8ab488
A
2745 switch (task->requested_policy.t_apptype) {
2746 case TASK_APPTYPE_APP_TAL:
2747 case TASK_APPTYPE_APP_DEFAULT:
2748 primordial_qos = THREAD_QOS_USER_INTERACTIVE;
2749 break;
2750
2751 case TASK_APPTYPE_DAEMON_INTERACTIVE:
2752 case TASK_APPTYPE_DAEMON_STANDARD:
2753 case TASK_APPTYPE_DAEMON_ADAPTIVE:
2754 primordial_qos = THREAD_QOS_LEGACY;
2755 break;
2756
2757 case TASK_APPTYPE_DAEMON_BACKGROUND:
2758 primordial_qos = THREAD_QOS_BACKGROUND;
6d2010ae
A
2759 break;
2760 }
39236c6e 2761
fe8ab488
A
2762 if (qos_clamp != THREAD_QOS_UNSPECIFIED) {
2763 if (primordial_qos != THREAD_QOS_UNSPECIFIED) {
2764 primordial_qos = MIN(qos_clamp, primordial_qos);
2765 } else {
2766 primordial_qos = qos_clamp;
2767 }
2768 }
2769
2770 main_thread->requested_policy.thrp_qos = primordial_qos;
2771
2772 task_policy_update_locked(task, main_thread, &pend_token);
39236c6e
A
2773
2774 task_unlock(task);
2775
fe8ab488 2776 task_policy_update_complete_unlocked(task, main_thread, &pend_token);
39236c6e
A
2777
2778 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
fe8ab488 2779 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
3e170ce0 2780 task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL),
fe8ab488 2781 primordial_qos, 0);
39236c6e
A
2782}
2783
2784/* for process_policy to check before attempting to set */
2785boolean_t
2786proc_task_is_tal(task_t task)
2787{
2788 return (task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL) ? TRUE : FALSE;
6d2010ae
A
2789}
2790
3e170ce0
A
2791int
2792task_get_apptype(task_t task)
2793{
2794 return task->requested_policy.t_apptype;
2795}
2796
39236c6e
A
2797/* for telemetry */
2798integer_t
2799task_grab_latency_qos(task_t task)
2800{
fe8ab488 2801 return qos_latency_policy_package(proc_get_effective_task_policy(task, TASK_POLICY_LATENCY_QOS));
39236c6e 2802}
6d2010ae 2803
39236c6e 2804/* update the darwin background action state in the flags field for libproc */
6d2010ae
A
2805int
2806proc_get_darwinbgstate(task_t task, uint32_t * flagsp)
2807{
39236c6e 2808 if (task->requested_policy.ext_darwinbg)
6d2010ae 2809 *flagsp |= PROC_FLAG_EXT_DARWINBG;
39236c6e
A
2810
2811 if (task->requested_policy.int_darwinbg)
6d2010ae 2812 *flagsp |= PROC_FLAG_DARWINBG;
6d2010ae 2813
6d2010ae 2814
fe8ab488
A
2815 if (task->requested_policy.t_apptype == TASK_APPTYPE_APP_DEFAULT ||
2816 task->requested_policy.t_apptype == TASK_APPTYPE_APP_TAL)
2817 *flagsp |= PROC_FLAG_APPLICATION;
2818
39236c6e
A
2819 if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE)
2820 *flagsp |= PROC_FLAG_ADAPTIVE;
6d2010ae 2821
39236c6e
A
2822 if (task->requested_policy.t_apptype == TASK_APPTYPE_DAEMON_ADAPTIVE && task->requested_policy.t_boosted == 1)
2823 *flagsp |= PROC_FLAG_ADAPTIVE_IMPORTANT;
316670eb 2824
fe8ab488 2825 if (task_is_importance_donor(task))
39236c6e 2826 *flagsp |= PROC_FLAG_IMPORTANCE_DONOR;
316670eb 2827
39236c6e
A
2828 if (task->effective_policy.t_sup_active)
2829 *flagsp |= PROC_FLAG_SUPPRESSED;
316670eb 2830
39236c6e
A
2831 return(0);
2832}
316670eb 2833
39236c6e
A
2834/* All per-thread state is in the first 32-bits of the bitfield */
2835void
2836proc_get_thread_policy(thread_t thread, thread_policy_state_t info)
316670eb 2837{
39236c6e
A
2838 task_t task = thread->task;
2839 task_lock(task);
2840 info->requested = (integer_t)task_requested_bitfield(task, thread);
2841 info->effective = (integer_t)task_effective_bitfield(task, thread);
fe8ab488 2842 info->pending = 0;
39236c6e 2843 task_unlock(task);
6d2010ae
A
2844}
2845
fe8ab488
A
2846/*
2847 * Tracepoint data... Reading the tracepoint data can be somewhat complicated.
2848 * The current scheme packs as much data into a single tracepoint as it can.
2849 *
2850 * Each task/thread requested/effective structure is 64 bits in size. Any
2851 * given tracepoint will emit either requested or effective data, but not both.
2852 *
2853 * A tracepoint may emit any of task, thread, or task & thread data.
2854 *
2855 * The type of data emitted varies with pointer size. Where possible, both
2856 * task and thread data are emitted. In LP32 systems, the first and second
2857 * halves of either the task or thread data is emitted.
2858 *
2859 * The code uses uintptr_t array indexes instead of high/low to avoid
2860 * confusion WRT big vs little endian.
2861 *
2862 * The truth table for the tracepoint data functions is below, and has the
2863 * following invariants:
2864 *
2865 * 1) task and thread are uintptr_t*
2866 * 2) task may never be NULL
2867 *
2868 *
2869 * LP32 LP64
2870 * trequested_0(task, NULL) task[0] task[0]
2871 * trequested_1(task, NULL) task[1] NULL
2872 * trequested_0(task, thread) thread[0] task[0]
2873 * trequested_1(task, thread) thread[1] thread[0]
2874 *
2875 * Basically, you get a full task or thread on LP32, and both on LP64.
2876 *
2877 * The uintptr_t munging here is squicky enough to deserve a comment.
2878 *
2879 * The variables we are accessing are laid out in memory like this:
2880 *
2881 * [ LP64 uintptr_t 0 ]
2882 * [ LP32 uintptr_t 0 ] [ LP32 uintptr_t 1 ]
2883 *
2884 * 1 2 3 4 5 6 7 8
2885 *
2886 */
316670eb 2887
39236c6e 2888static uintptr_t
fe8ab488 2889trequested_0(task_t task, thread_t thread)
6d2010ae 2890{
fe8ab488
A
2891 assert(task);
2892 _Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2893 _Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated");
2894
2895 uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy);
2896 return raw[0];
6d2010ae
A
2897}
2898
39236c6e 2899static uintptr_t
fe8ab488 2900trequested_1(task_t task, thread_t thread)
6d2010ae 2901{
fe8ab488
A
2902 assert(task);
2903 _Static_assert(sizeof(struct task_requested_policy) == sizeof(uint64_t), "size invariant violated");
2904 _Static_assert(sizeof(task->requested_policy) == sizeof(thread->requested_policy), "size invariant violated");
2905
2906#if defined __LP64__
2907 return (thread == NULL) ? 0 : *(uintptr_t*)&thread->requested_policy;
2908#else
2909 uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->requested_policy : &thread->requested_policy);
2910 return raw[1];
2911#endif
6d2010ae
A
2912}
2913
39236c6e 2914static uintptr_t
fe8ab488 2915teffective_0(task_t task, thread_t thread)
6d2010ae 2916{
fe8ab488
A
2917 assert(task);
2918 _Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated");
2919 _Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated");
2920
2921 uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy);
2922 return raw[0];
2923}
2924
2925static uintptr_t
2926teffective_1(task_t task, thread_t thread)
2927{
2928 assert(task);
2929 _Static_assert(sizeof(struct task_effective_policy) == sizeof(uint64_t), "size invariant violated");
2930 _Static_assert(sizeof(task->effective_policy) == sizeof(thread->effective_policy), "size invariant violated");
2931
2932#if defined __LP64__
2933 return (thread == NULL) ? 0 : *(uintptr_t*)&thread->effective_policy;
2934#else
2935 uintptr_t* raw = (uintptr_t*)((thread == THREAD_NULL) ? &task->effective_policy : &thread->effective_policy);
2936 return raw[1];
2937#endif
6d2010ae
A
2938}
2939
fe8ab488
A
2940/* dump pending for tracepoint */
2941static uint32_t tpending(task_pend_token_t pend_token) { return *(uint32_t*)(void*)(pend_token); }
2942
39236c6e
A
2943uint64_t
2944task_requested_bitfield(task_t task, thread_t thread)
6d2010ae 2945{
39236c6e
A
2946 uint64_t bits = 0;
2947 struct task_requested_policy requested =
2948 (thread == THREAD_NULL) ? task->requested_policy : thread->requested_policy;
6d2010ae 2949
39236c6e
A
2950 bits |= (requested.int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2951 bits |= (requested.ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2952 bits |= (requested.int_iotier ? (((uint64_t)requested.int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2953 bits |= (requested.ext_iotier ? (((uint64_t)requested.ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2954 bits |= (requested.int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2955 bits |= (requested.ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2956 bits |= (requested.bg_iotier ? (((uint64_t)requested.bg_iotier) << POLICY_REQ_BG_IOTIER_SHIFT) : 0);
2957 bits |= (requested.terminated ? POLICY_REQ_TERMINATED : 0);
6d2010ae 2958
39236c6e
A
2959 bits |= (requested.th_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2960 bits |= (requested.th_workq_bg ? POLICY_REQ_WORKQ_BG : 0);
316670eb 2961
fe8ab488
A
2962 if (thread != THREAD_NULL) {
2963 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2964 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2965 }
2966
39236c6e
A
2967 bits |= (requested.t_boosted ? POLICY_REQ_BOOSTED : 0);
2968 bits |= (requested.t_tal_enabled ? POLICY_REQ_TAL_ENABLED : 0);
39236c6e
A
2969 bits |= (requested.t_apptype ? (((uint64_t)requested.t_apptype) << POLICY_REQ_APPTYPE_SHIFT) : 0);
2970 bits |= (requested.t_role ? (((uint64_t)requested.t_role) << POLICY_REQ_ROLE_SHIFT) : 0);
316670eb 2971
39236c6e
A
2972 bits |= (requested.t_sup_active ? POLICY_REQ_SUP_ACTIVE : 0);
2973 bits |= (requested.t_sup_lowpri_cpu ? POLICY_REQ_SUP_LOWPRI_CPU : 0);
2974 bits |= (requested.t_sup_cpu ? POLICY_REQ_SUP_CPU : 0);
2975 bits |= (requested.t_sup_timer ? (((uint64_t)requested.t_sup_timer) << POLICY_REQ_SUP_TIMER_THROTTLE_SHIFT) : 0);
2976 bits |= (requested.t_sup_throughput ? (((uint64_t)requested.t_sup_throughput) << POLICY_REQ_SUP_THROUGHPUT_SHIFT) : 0);
2977 bits |= (requested.t_sup_disk ? POLICY_REQ_SUP_DISK_THROTTLE : 0);
2978 bits |= (requested.t_sup_cpu_limit ? POLICY_REQ_SUP_CPU_LIMIT : 0);
2979 bits |= (requested.t_sup_suspend ? POLICY_REQ_SUP_SUSPEND : 0);
fe8ab488 2980 bits |= (requested.t_sup_bg_sockets ? POLICY_REQ_SUP_BG_SOCKETS : 0);
39236c6e
A
2981 bits |= (requested.t_base_latency_qos ? (((uint64_t)requested.t_base_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2982 bits |= (requested.t_over_latency_qos ? (((uint64_t)requested.t_over_latency_qos) << POLICY_REQ_OVER_LATENCY_QOS_SHIFT) : 0);
2983 bits |= (requested.t_base_through_qos ? (((uint64_t)requested.t_base_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2984 bits |= (requested.t_over_through_qos ? (((uint64_t)requested.t_over_through_qos) << POLICY_REQ_OVER_THROUGH_QOS_SHIFT) : 0);
fe8ab488
A
2985 bits |= (requested.t_sfi_managed ? POLICY_REQ_SFI_MANAGED : 0);
2986 bits |= (requested.t_qos_clamp ? (((uint64_t)requested.t_qos_clamp) << POLICY_REQ_QOS_CLAMP_SHIFT) : 0);
316670eb 2987
39236c6e 2988 return bits;
316670eb
A
2989}
2990
39236c6e
A
2991uint64_t
2992task_effective_bitfield(task_t task, thread_t thread)
2993{
2994 uint64_t bits = 0;
2995 struct task_effective_policy effective =
2996 (thread == THREAD_NULL) ? task->effective_policy : thread->effective_policy;
316670eb 2997
39236c6e
A
2998 bits |= (effective.io_tier ? (((uint64_t)effective.io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2999 bits |= (effective.io_passive ? POLICY_EFF_IO_PASSIVE : 0);
3000 bits |= (effective.darwinbg ? POLICY_EFF_DARWIN_BG : 0);
3001 bits |= (effective.lowpri_cpu ? POLICY_EFF_LOWPRI_CPU : 0);
3002 bits |= (effective.terminated ? POLICY_EFF_TERMINATED : 0);
3003 bits |= (effective.all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
3004 bits |= (effective.new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
3005 bits |= (effective.bg_iotier ? (((uint64_t)effective.bg_iotier) << POLICY_EFF_BG_IOTIER_SHIFT) : 0);
fe8ab488
A
3006 bits |= (effective.qos_ui_is_urgent ? POLICY_EFF_QOS_UI_IS_URGENT : 0);
3007
3008 if (thread != THREAD_NULL)
3009 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
316670eb 3010
39236c6e
A
3011 bits |= (effective.t_tal_engaged ? POLICY_EFF_TAL_ENGAGED : 0);
3012 bits |= (effective.t_suspended ? POLICY_EFF_SUSPENDED : 0);
3013 bits |= (effective.t_watchers_bg ? POLICY_EFF_WATCHERS_BG : 0);
3014 bits |= (effective.t_sup_active ? POLICY_EFF_SUP_ACTIVE : 0);
3015 bits |= (effective.t_suppressed_cpu ? POLICY_EFF_SUP_CPU : 0);
3016 bits |= (effective.t_role ? (((uint64_t)effective.t_role) << POLICY_EFF_ROLE_SHIFT) : 0);
3017 bits |= (effective.t_latency_qos ? (((uint64_t)effective.t_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
3018 bits |= (effective.t_through_qos ? (((uint64_t)effective.t_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
fe8ab488
A
3019 bits |= (effective.t_sfi_managed ? POLICY_EFF_SFI_MANAGED : 0);
3020 bits |= (effective.t_qos_ceiling ? (((uint64_t)effective.t_qos_ceiling) << POLICY_EFF_QOS_CEILING_SHIFT) : 0);
6d2010ae 3021
39236c6e 3022 return bits;
6d2010ae
A
3023}
3024
6d2010ae 3025
39236c6e
A
3026/*
3027 * Resource usage and CPU related routines
3028 */
6d2010ae 3029
6d2010ae 3030int
39236c6e 3031proc_get_task_ruse_cpu(task_t task, uint32_t *policyp, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep)
6d2010ae
A
3032{
3033
3034 int error = 0;
39236c6e 3035 int scope;
6d2010ae
A
3036
3037 task_lock(task);
39236c6e 3038
6d2010ae 3039
39236c6e
A
3040 error = task_get_cpuusage(task, percentagep, intervalp, deadlinep, &scope);
3041 task_unlock(task);
3042
3043 /*
3044 * Reverse-map from CPU resource limit scopes back to policies (see comment below).
3045 */
3046 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
3047 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC;
3048 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3049 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE;
3050 } else if (scope == TASK_RUSECPU_FLAGS_DEADLINE) {
3051 *policyp = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
3052 }
6d2010ae
A
3053
3054 return(error);
3055}
3056
39236c6e
A
3057/*
3058 * Configure the default CPU usage monitor parameters.
3059 *
3060 * For tasks which have this mechanism activated: if any thread in the
3061 * process consumes more CPU than this, an EXC_RESOURCE exception will be generated.
3062 */
3063void
3064proc_init_cpumon_params(void)
3065{
3e170ce0
A
3066 /*
3067 * The max CPU percentage can be configured via the boot-args and
3068 * a key in the device tree. The boot-args are honored first, then the
3069 * device tree.
3070 */
39236c6e 3071 if (!PE_parse_boot_argn("max_cpumon_percentage", &proc_max_cpumon_percentage,
3e170ce0
A
3072 sizeof (proc_max_cpumon_percentage)))
3073 {
3074 uint64_t max_percentage = 0ULL;
3075
3076 if (!PE_get_default("kern.max_cpumon_percentage", &max_percentage,
3077 sizeof(max_percentage)))
3078 {
3079 max_percentage = DEFAULT_CPUMON_PERCENTAGE;
3080 }
3081
3082 assert(max_percentage <= UINT8_MAX);
3083 proc_max_cpumon_percentage = (uint8_t) max_percentage;
39236c6e
A
3084 }
3085
3086 if (proc_max_cpumon_percentage > 100) {
3087 proc_max_cpumon_percentage = 100;
3088 }
3089
3e170ce0
A
3090 /*
3091 * The interval should be specified in seconds.
3092 *
3093 * Like the max CPU percentage, the max CPU interval can be configured
3094 * via boot-args and the device tree.
3095 */
39236c6e 3096 if (!PE_parse_boot_argn("max_cpumon_interval", &proc_max_cpumon_interval,
3e170ce0
A
3097 sizeof (proc_max_cpumon_interval)))
3098 {
3099 if (!PE_get_default("kern.max_cpumon_interval", &proc_max_cpumon_interval,
3100 sizeof(proc_max_cpumon_interval)))
3101 {
3102 proc_max_cpumon_interval = DEFAULT_CPUMON_INTERVAL;
3103 }
39236c6e
A
3104 }
3105
3106 proc_max_cpumon_interval *= NSEC_PER_SEC;
fe8ab488
A
3107
3108 /* TEMPORARY boot arg to control App suppression */
3109 PE_parse_boot_argn("task_policy_suppression_disable",
3110 &task_policy_suppression_disable,
3111 sizeof(task_policy_suppression_disable));
39236c6e
A
3112}
3113
316670eb
A
3114/*
3115 * Currently supported configurations for CPU limits.
3116 *
39236c6e
A
3117 * Policy | Deadline-based CPU limit | Percentage-based CPU limit
3118 * -------------------------------------+--------------------------+------------------------------
3119 * PROC_POLICY_RSRCACT_THROTTLE | ENOTSUP | Task-wide scope only
3120 * PROC_POLICY_RSRCACT_SUSPEND | Task-wide scope only | ENOTSUP
3121 * PROC_POLICY_RSRCACT_TERMINATE | Task-wide scope only | ENOTSUP
3122 * PROC_POLICY_RSRCACT_NOTIFY_KQ | Task-wide scope only | ENOTSUP
3123 * PROC_POLICY_RSRCACT_NOTIFY_EXC | ENOTSUP | Per-thread scope only
316670eb
A
3124 *
3125 * A deadline-based CPU limit is actually a simple wallclock timer - the requested action is performed
3126 * after the specified amount of wallclock time has elapsed.
3127 *
3128 * A percentage-based CPU limit performs the requested action after the specified amount of actual CPU time
3129 * has been consumed -- regardless of how much wallclock time has elapsed -- by either the task as an
3130 * aggregate entity (so-called "Task-wide" or "Proc-wide" scope, whereby the CPU time consumed by all threads
3131 * in the task are added together), or by any one thread in the task (so-called "per-thread" scope).
3132 *
3133 * We support either deadline != 0 OR percentage != 0, but not both. The original intention in having them
3134 * share an API was to use actual CPU time as the basis of the deadline-based limit (as in: perform an action
3135 * after I have used some amount of CPU time; this is different than the recurring percentage/interval model)
3136 * but the potential consumer of the API at the time was insisting on wallclock time instead.
3137 *
3138 * Currently, requesting notification via an exception is the only way to get per-thread scope for a
3139 * CPU limit. All other types of notifications force task-wide scope for the limit.
3140 */
6d2010ae 3141int
39236c6e
A
3142proc_set_task_ruse_cpu(task_t task, uint32_t policy, uint8_t percentage, uint64_t interval, uint64_t deadline,
3143 int cpumon_entitled)
6d2010ae
A
3144{
3145 int error = 0;
316670eb
A
3146 int scope;
3147
3148 /*
3149 * Enforce the matrix of supported configurations for policy, percentage, and deadline.
3150 */
3151 switch (policy) {
3152 // If no policy is explicitly given, the default is to throttle.
3153 case TASK_POLICY_RESOURCE_ATTRIBUTE_NONE:
3154 case TASK_POLICY_RESOURCE_ATTRIBUTE_THROTTLE:
3155 if (deadline != 0)
3156 return (ENOTSUP);
3157 scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
3158 break;
3159 case TASK_POLICY_RESOURCE_ATTRIBUTE_SUSPEND:
3160 case TASK_POLICY_RESOURCE_ATTRIBUTE_TERMINATE:
3161 case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_KQ:
3162 if (percentage != 0)
3163 return (ENOTSUP);
3164 scope = TASK_RUSECPU_FLAGS_DEADLINE;
3165 break;
3166 case TASK_POLICY_RESOURCE_ATTRIBUTE_NOTIFY_EXC:
3167 if (deadline != 0)
3168 return (ENOTSUP);
3169 scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
39236c6e
A
3170#ifdef CONFIG_NOMONITORS
3171 return (error);
3172#endif /* CONFIG_NOMONITORS */
316670eb
A
3173 break;
3174 default:
3175 return (EINVAL);
3176 }
6d2010ae
A
3177
3178 task_lock(task);
3179 if (task != current_task()) {
39236c6e 3180 task->policy_ru_cpu_ext = policy;
6d2010ae 3181 } else {
39236c6e 3182 task->policy_ru_cpu = policy;
6d2010ae 3183 }
39236c6e 3184 error = task_set_cpuusage(task, percentage, interval, deadline, scope, cpumon_entitled);
6d2010ae
A
3185 task_unlock(task);
3186 return(error);
3187}
3188
316670eb 3189int
39236c6e 3190proc_clear_task_ruse_cpu(task_t task, int cpumon_entitled)
316670eb
A
3191{
3192 int error = 0;
3193 int action;
3194 void * bsdinfo = NULL;
3195
3196 task_lock(task);
3197 if (task != current_task()) {
39236c6e 3198 task->policy_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
316670eb 3199 } else {
39236c6e 3200 task->policy_ru_cpu = TASK_POLICY_RESOURCE_ATTRIBUTE_DEFAULT;
316670eb
A
3201 }
3202
39236c6e 3203 error = task_clear_cpuusage_locked(task, cpumon_entitled);
316670eb
A
3204 if (error != 0)
3205 goto out;
3206
39236c6e
A
3207 action = task->applied_ru_cpu;
3208 if (task->applied_ru_cpu_ext != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
316670eb 3209 /* reset action */
39236c6e 3210 task->applied_ru_cpu_ext = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
316670eb
A
3211 }
3212 if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
3213 bsdinfo = task->bsd_info;
3214 task_unlock(task);
3215 proc_restore_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
3216 goto out1;
3217 }
3218
3219out:
3220 task_unlock(task);
3221out1:
3222 return(error);
3223
3224}
6d2010ae
A
3225
3226/* used to apply resource limit related actions */
3227static int
3228task_apply_resource_actions(task_t task, int type)
3229{
3230 int action = TASK_POLICY_RESOURCE_ATTRIBUTE_NONE;
3231 void * bsdinfo = NULL;
3232
3233 switch (type) {
3234 case TASK_POLICY_CPU_RESOURCE_USAGE:
3235 break;
3236 case TASK_POLICY_WIREDMEM_RESOURCE_USAGE:
3237 case TASK_POLICY_VIRTUALMEM_RESOURCE_USAGE:
3238 case TASK_POLICY_DISK_RESOURCE_USAGE:
3239 case TASK_POLICY_NETWORK_RESOURCE_USAGE:
3240 case TASK_POLICY_POWER_RESOURCE_USAGE:
3241 return(0);
3242
3243 default:
3244 return(1);
3245 };
3246
3247 /* only cpu actions for now */
3248 task_lock(task);
3249
39236c6e 3250 if (task->applied_ru_cpu_ext == TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
6d2010ae 3251 /* apply action */
39236c6e
A
3252 task->applied_ru_cpu_ext = task->policy_ru_cpu_ext;
3253 action = task->applied_ru_cpu_ext;
316670eb 3254 } else {
39236c6e 3255 action = task->applied_ru_cpu_ext;
6d2010ae 3256 }
316670eb 3257
6d2010ae
A
3258 if (action != TASK_POLICY_RESOURCE_ATTRIBUTE_NONE) {
3259 bsdinfo = task->bsd_info;
3260 task_unlock(task);
3261 proc_apply_resource_actions(bsdinfo, TASK_POLICY_CPU_RESOURCE_USAGE, action);
3262 } else
3263 task_unlock(task);
3264
3265 return(0);
3266}
3267
39236c6e
A
3268/*
3269 * XXX This API is somewhat broken; we support multiple simultaneous CPU limits, but the get/set API
3270 * only allows for one at a time. This means that if there is a per-thread limit active, the other
3271 * "scopes" will not be accessible via this API. We could change it to pass in the scope of interest
3272 * to the caller, and prefer that, but there's no need for that at the moment.
3273 */
3274int
3275task_get_cpuusage(task_t task, uint8_t *percentagep, uint64_t *intervalp, uint64_t *deadlinep, int *scope)
3276{
3277 *percentagep = 0;
3278 *intervalp = 0;
3279 *deadlinep = 0;
3280
3281 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) != 0) {
3282 *scope = TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
3283 *percentagep = task->rusage_cpu_perthr_percentage;
3284 *intervalp = task->rusage_cpu_perthr_interval;
3285 } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) != 0) {
3286 *scope = TASK_RUSECPU_FLAGS_PROC_LIMIT;
3287 *percentagep = task->rusage_cpu_percentage;
3288 *intervalp = task->rusage_cpu_interval;
3289 } else if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) != 0) {
3290 *scope = TASK_RUSECPU_FLAGS_DEADLINE;
3291 *deadlinep = task->rusage_cpu_deadline;
3292 } else {
3293 *scope = 0;
3294 }
316670eb
A
3295
3296 return(0);
3297}
3298
39236c6e
A
3299/*
3300 * Disable the CPU usage monitor for the task. Return value indicates
3301 * if the mechanism was actually enabled.
3302 */
3303int
3304task_disable_cpumon(task_t task) {
3305 thread_t thread;
3306
3307 task_lock_assert_owned(task);
3308
3309 if ((task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) == 0) {
3310 return (KERN_INVALID_ARGUMENT);
3311 }
3312
3313#if CONFIG_TELEMETRY
3314 /*
3315 * Disable task-wide telemetry if it was ever enabled by the CPU usage
3316 * monitor's warning zone.
3317 */
fe8ab488 3318 telemetry_task_ctl_locked(task, TF_CPUMON_WARNING, 0);
39236c6e
A
3319#endif
3320
3321 /*
3322 * Disable the monitor for the task, and propagate that change to each thread.
3323 */
3324 task->rusage_cpu_flags &= ~(TASK_RUSECPU_FLAGS_PERTHR_LIMIT | TASK_RUSECPU_FLAGS_FATAL_CPUMON);
3325 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3326 set_astledger(thread);
3327 }
3328 task->rusage_cpu_perthr_percentage = 0;
3329 task->rusage_cpu_perthr_interval = 0;
3330
3331 return (KERN_SUCCESS);
3332}
3333
6d2010ae 3334int
39236c6e 3335task_set_cpuusage(task_t task, uint8_t percentage, uint64_t interval, uint64_t deadline, int scope, int cpumon_entitled)
6d2010ae 3336{
39236c6e 3337 thread_t thread;
316670eb 3338 uint64_t abstime = 0;
316670eb 3339 uint64_t limittime = 0;
6d2010ae 3340
316670eb
A
3341 lck_mtx_assert(&task->lock, LCK_MTX_ASSERT_OWNED);
3342
3343 /* By default, refill once per second */
3344 if (interval == 0)
3345 interval = NSEC_PER_SEC;
3346
39236c6e
A
3347 if (percentage != 0) {
3348 if (scope == TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
3349 boolean_t warn = FALSE;
3350
3351 /*
3352 * A per-thread CPU limit on a task generates an exception
3353 * (LEDGER_ACTION_EXCEPTION) if any one thread in the task
3354 * exceeds the limit.
3355 */
3356
3357 if (percentage == TASK_POLICY_CPUMON_DISABLE) {
3358 if (cpumon_entitled) {
3359 task_disable_cpumon(task);
3360 return (0);
3361 }
3362
3363 /*
3364 * This task wishes to disable the CPU usage monitor, but it's
3365 * missing the required entitlement:
3366 * com.apple.private.kernel.override-cpumon
3367 *
3368 * Instead, treat this as a request to reset its params
3369 * back to the defaults.
3370 */
3371 warn = TRUE;
3372 percentage = TASK_POLICY_CPUMON_DEFAULTS;
3373 }
3374
3375 if (percentage == TASK_POLICY_CPUMON_DEFAULTS) {
3376 percentage = proc_max_cpumon_percentage;
3377 interval = proc_max_cpumon_interval;
3378 }
3379
3380 if (percentage > 100) {
3381 percentage = 100;
3382 }
3383
3384 /*
3385 * Passing in an interval of -1 means either:
3386 * - Leave the interval as-is, if there's already a per-thread
3387 * limit configured
3388 * - Use the system default.
3389 */
3390 if (interval == -1ULL) {
3391 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PERTHR_LIMIT) {
3392 interval = task->rusage_cpu_perthr_interval;
3393 } else {
3394 interval = proc_max_cpumon_interval;
3395 }
3396 }
3397
316670eb 3398 /*
39236c6e
A
3399 * Enforce global caps on CPU usage monitor here if the process is not
3400 * entitled to escape the global caps.
316670eb 3401 */
39236c6e
A
3402 if ((percentage > proc_max_cpumon_percentage) && (cpumon_entitled == 0)) {
3403 warn = TRUE;
3404 percentage = proc_max_cpumon_percentage;
3405 }
3406
3407 if ((interval > proc_max_cpumon_interval) && (cpumon_entitled == 0)) {
3408 warn = TRUE;
3409 interval = proc_max_cpumon_interval;
3410 }
3411
3412 if (warn) {
3413 int pid = 0;
3e170ce0 3414 const char *procname = "unknown";
39236c6e
A
3415
3416#ifdef MACH_BSD
3417 pid = proc_selfpid();
3418 if (current_task()->bsd_info != NULL) {
3419 procname = proc_name_address(current_task()->bsd_info);
3420 }
3421#endif
3422
3423 printf("process %s[%d] denied attempt to escape CPU monitor"
3424 " (missing required entitlement).\n", procname, pid);
3425 }
3426
316670eb
A
3427 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PERTHR_LIMIT;
3428 task->rusage_cpu_perthr_percentage = percentage;
3429 task->rusage_cpu_perthr_interval = interval;
3430 queue_iterate(&task->threads, thread, thread_t, task_threads) {
3431 set_astledger(thread);
3432 }
3433 } else if (scope == TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3434 /*
3435 * Currently, a proc-wide CPU limit always blocks if the limit is
3436 * exceeded (LEDGER_ACTION_BLOCK).
3437 */
3438 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_PROC_LIMIT;
3439 task->rusage_cpu_percentage = percentage;
3440 task->rusage_cpu_interval = interval;
3441
39236c6e
A
3442 limittime = (interval * percentage) / 100;
3443 nanoseconds_to_absolutetime(limittime, &abstime);
3444
3445 ledger_set_limit(task->ledger, task_ledgers.cpu_time, abstime, 0);
316670eb
A
3446 ledger_set_period(task->ledger, task_ledgers.cpu_time, interval);
3447 ledger_set_action(task->ledger, task_ledgers.cpu_time, LEDGER_ACTION_BLOCK);
3448 }
3449 }
6d2010ae 3450
316670eb
A
3451 if (deadline != 0) {
3452 assert(scope == TASK_RUSECPU_FLAGS_DEADLINE);
3453
3454 /* if already in use, cancel and wait for it to cleanout */
3455 if (task->rusage_cpu_callt != NULL) {
3456 task_unlock(task);
3457 thread_call_cancel_wait(task->rusage_cpu_callt);
3458 task_lock(task);
3459 }
3460 if (task->rusage_cpu_callt == NULL) {
3461 task->rusage_cpu_callt = thread_call_allocate_with_priority(task_action_cpuusage, (thread_call_param_t)task, THREAD_CALL_PRIORITY_KERNEL);
3462 }
3463 /* setup callout */
3464 if (task->rusage_cpu_callt != 0) {
39236c6e
A
3465 uint64_t save_abstime = 0;
3466
316670eb
A
3467 task->rusage_cpu_flags |= TASK_RUSECPU_FLAGS_DEADLINE;
3468 task->rusage_cpu_deadline = deadline;
3469
3470 nanoseconds_to_absolutetime(deadline, &abstime);
3471 save_abstime = abstime;
3472 clock_absolutetime_interval_to_deadline(save_abstime, &abstime);
3473 thread_call_enter_delayed(task->rusage_cpu_callt, abstime);
3474 }
6d2010ae 3475 }
6d2010ae
A
3476
3477 return(0);
6d2010ae
A
3478}
3479
316670eb 3480int
39236c6e 3481task_clear_cpuusage(task_t task, int cpumon_entitled)
6d2010ae 3482{
316670eb 3483 int retval = 0;
6d2010ae 3484
316670eb 3485 task_lock(task);
39236c6e 3486 retval = task_clear_cpuusage_locked(task, cpumon_entitled);
316670eb
A
3487 task_unlock(task);
3488
3489 return(retval);
6d2010ae
A
3490}
3491
316670eb 3492int
39236c6e 3493task_clear_cpuusage_locked(task_t task, int cpumon_entitled)
6d2010ae 3494{
316670eb 3495 thread_call_t savecallt;
316670eb
A
3496
3497 /* cancel percentage handling if set */
3498 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_PROC_LIMIT) {
3499 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_PROC_LIMIT;
39236c6e 3500 ledger_set_limit(task->ledger, task_ledgers.cpu_time, LEDGER_LIMIT_INFINITY, 0);
316670eb
A
3501 task->rusage_cpu_percentage = 0;
3502 task->rusage_cpu_interval = 0;
3503 }
3504
39236c6e
A
3505 /*
3506 * Disable the CPU usage monitor.
3507 */
3508 if (cpumon_entitled) {
3509 task_disable_cpumon(task);
316670eb
A
3510 }
3511
3512 /* cancel deadline handling if set */
3513 if (task->rusage_cpu_flags & TASK_RUSECPU_FLAGS_DEADLINE) {
3514 task->rusage_cpu_flags &= ~TASK_RUSECPU_FLAGS_DEADLINE;
3515 if (task->rusage_cpu_callt != 0) {
3516 savecallt = task->rusage_cpu_callt;
3517 task->rusage_cpu_callt = NULL;
3518 task->rusage_cpu_deadline = 0;
3519 task_unlock(task);
3520 thread_call_cancel_wait(savecallt);
3521 thread_call_free(savecallt);
3522 task_lock(task);
3523 }
3524 }
6d2010ae
A
3525 return(0);
3526}
3527
3528/* called by ledger unit to enforce action due to resource usage criteria being met */
316670eb
A
3529void
3530task_action_cpuusage(thread_call_param_t param0, __unused thread_call_param_t param1)
3531{
3532 task_t task = (task_t)param0;
3533 (void)task_apply_resource_actions(task, TASK_POLICY_CPU_RESOURCE_USAGE);
3534 return;
3535}
3536
316670eb 3537
39236c6e
A
3538/*
3539 * Routines for taskwatch and pidbind
3540 */
6d2010ae 3541
316670eb 3542
39236c6e
A
3543/*
3544 * Routines for importance donation/inheritance/boosting
3545 */
316670eb 3546
fe8ab488
A
3547static void
3548task_importance_update_live_donor(task_t target_task)
3549{
3550#if IMPORTANCE_INHERITANCE
3551
3552 ipc_importance_task_t task_imp;
3553
3554 task_imp = ipc_importance_for_task(target_task, FALSE);
3555 if (IIT_NULL != task_imp) {
3556 ipc_importance_task_update_live_donor(task_imp);
3557 ipc_importance_task_release(task_imp);
3558 }
3559#endif /* IMPORTANCE_INHERITANCE */
3560}
3561
39236c6e
A
3562void
3563task_importance_mark_donor(task_t task, boolean_t donating)
3564{
3565#if IMPORTANCE_INHERITANCE
fe8ab488
A
3566 ipc_importance_task_t task_imp;
3567
3568 task_imp = ipc_importance_for_task(task, FALSE);
3569 if (IIT_NULL != task_imp) {
3570 ipc_importance_task_mark_donor(task_imp, donating);
3571 ipc_importance_task_release(task_imp);
3572 }
3573#endif /* IMPORTANCE_INHERITANCE */
3574}
3575
3576void
3577task_importance_mark_live_donor(task_t task, boolean_t live_donating)
3578{
3579#if IMPORTANCE_INHERITANCE
3580 ipc_importance_task_t task_imp;
3581
3582 task_imp = ipc_importance_for_task(task, FALSE);
3583 if (IIT_NULL != task_imp) {
3584 ipc_importance_task_mark_live_donor(task_imp, live_donating);
3585 ipc_importance_task_release(task_imp);
3586 }
39236c6e
A
3587#endif /* IMPORTANCE_INHERITANCE */
3588}
316670eb 3589
39236c6e
A
3590void
3591task_importance_mark_receiver(task_t task, boolean_t receiving)
3592{
3593#if IMPORTANCE_INHERITANCE
fe8ab488 3594 ipc_importance_task_t task_imp;
39236c6e 3595
fe8ab488
A
3596 task_imp = ipc_importance_for_task(task, FALSE);
3597 if (IIT_NULL != task_imp) {
3598 ipc_importance_task_mark_receiver(task_imp, receiving);
3599 ipc_importance_task_release(task_imp);
316670eb 3600 }
39236c6e
A
3601#endif /* IMPORTANCE_INHERITANCE */
3602}
316670eb 3603
fe8ab488
A
3604void
3605task_importance_mark_denap_receiver(task_t task, boolean_t denap)
3606{
3607#if IMPORTANCE_INHERITANCE
3608 ipc_importance_task_t task_imp;
3609
3610 task_imp = ipc_importance_for_task(task, FALSE);
3611 if (IIT_NULL != task_imp) {
3612 ipc_importance_task_mark_denap_receiver(task_imp, denap);
3613 ipc_importance_task_release(task_imp);
3614 }
3615#endif /* IMPORTANCE_INHERITANCE */
3616}
316670eb 3617
fe8ab488
A
3618void
3619task_importance_reset(__imp_only task_t task)
3620{
39236c6e 3621#if IMPORTANCE_INHERITANCE
fe8ab488 3622 ipc_importance_task_t task_imp;
316670eb 3623
fe8ab488
A
3624 /* TODO: Lower importance downstream before disconnect */
3625 task_imp = task->task_imp_base;
3626 ipc_importance_reset(task_imp, FALSE);
3627 task_importance_update_live_donor(task);
3628#endif /* IMPORTANCE_INHERITANCE */
3629}
3630
3631#if IMPORTANCE_INHERITANCE
3632
3633/*
3634 * Sets the task boost bit to the provided value. Does NOT run the update function.
3635 *
3636 * Task lock must be held.
3637 */
3638void
3639task_set_boost_locked(task_t task, boolean_t boost_active)
39236c6e
A
3640{
3641#if IMPORTANCE_DEBUG
3642 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_START),
3e170ce0 3643 proc_selfpid(), task_pid(task), trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
39236c6e 3644#endif
316670eb 3645
fe8ab488 3646 task->requested_policy.t_boosted = boost_active;
39236c6e
A
3647
3648#if IMPORTANCE_DEBUG
3649 if (boost_active == TRUE){
3e170ce0 3650 DTRACE_BOOST2(boost, task_t, task, int, task_pid(task));
39236c6e 3651 } else {
3e170ce0 3652 DTRACE_BOOST2(unboost, task_t, task, int, task_pid(task));
39236c6e
A
3653 }
3654 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_BOOST, (boost_active ? IMP_BOOSTED : IMP_UNBOOSTED)) | DBG_FUNC_END),
3e170ce0 3655 proc_selfpid(), task_pid(task),
fe8ab488 3656 trequested_0(task, THREAD_NULL), trequested_1(task, THREAD_NULL), 0);
39236c6e 3657#endif
316670eb
A
3658}
3659
fe8ab488
A
3660/*
3661 * Sets the task boost bit to the provided value and applies the update.
3662 *
3663 * Task lock must be held. Must call update complete after unlocking the task.
3664 */
3665void
3666task_update_boost_locked(task_t task, boolean_t boost_active, task_pend_token_t pend_token)
3667{
3668 task_set_boost_locked(task, boost_active);
3669
3670 task_policy_update_locked(task, THREAD_NULL, pend_token);
3671}
3672
39236c6e
A
3673/*
3674 * Check if this task should donate importance.
3675 *
3676 * May be called without taking the task lock. In that case, donor status can change
3677 * so you must check only once for each donation event.
3678 */
3679boolean_t
3680task_is_importance_donor(task_t task)
316670eb 3681{
fe8ab488
A
3682 if (task->task_imp_base == IIT_NULL)
3683 return FALSE;
3684 return ipc_importance_task_is_donor(task->task_imp_base);
316670eb
A
3685}
3686
39236c6e 3687/*
fe8ab488 3688 * Query the status of the task's donor mark.
39236c6e
A
3689 */
3690boolean_t
fe8ab488 3691task_is_marked_importance_donor(task_t task)
316670eb 3692{
fe8ab488
A
3693 if (task->task_imp_base == IIT_NULL)
3694 return FALSE;
3695 return ipc_importance_task_is_marked_donor(task->task_imp_base);
316670eb
A
3696}
3697
39236c6e 3698/*
fe8ab488 3699 * Query the status of the task's live donor and donor mark.
39236c6e 3700 */
fe8ab488
A
3701boolean_t
3702task_is_marked_live_importance_donor(task_t task)
316670eb 3703{
fe8ab488
A
3704 if (task->task_imp_base == IIT_NULL)
3705 return FALSE;
3706 return ipc_importance_task_is_marked_live_donor(task->task_imp_base);
3707}
316670eb 3708
39236c6e 3709
fe8ab488
A
3710/*
3711 * This routine may be called without holding task lock
3712 * since the value of imp_receiver can never be unset.
3713 */
3714boolean_t
3715task_is_importance_receiver(task_t task)
3716{
3717 if (task->task_imp_base == IIT_NULL)
3718 return FALSE;
3719 return ipc_importance_task_is_marked_receiver(task->task_imp_base);
316670eb
A
3720}
3721
fe8ab488
A
3722/*
3723 * Query the task's receiver mark.
3724 */
3725boolean_t
3726task_is_marked_importance_receiver(task_t task)
316670eb 3727{
fe8ab488
A
3728 if (task->task_imp_base == IIT_NULL)
3729 return FALSE;
3730 return ipc_importance_task_is_marked_receiver(task->task_imp_base);
316670eb
A
3731}
3732
fe8ab488
A
3733/*
3734 * This routine may be called without holding task lock
3735 * since the value of de-nap receiver can never be unset.
3736 */
3737boolean_t
3738task_is_importance_denap_receiver(task_t task)
39236c6e 3739{
fe8ab488
A
3740 if (task->task_imp_base == IIT_NULL)
3741 return FALSE;
3742 return ipc_importance_task_is_denap_receiver(task->task_imp_base);
39236c6e
A
3743}
3744
fe8ab488
A
3745/*
3746 * Query the task's de-nap receiver mark.
3747 */
3748boolean_t
3749task_is_marked_importance_denap_receiver(task_t task)
316670eb 3750{
fe8ab488
A
3751 if (task->task_imp_base == IIT_NULL)
3752 return FALSE;
3753 return ipc_importance_task_is_marked_denap_receiver(task->task_imp_base);
39236c6e 3754}
316670eb 3755
39236c6e 3756/*
fe8ab488
A
3757 * This routine may be called without holding task lock
3758 * since the value of imp_receiver can never be unset.
39236c6e 3759 */
fe8ab488
A
3760boolean_t
3761task_is_importance_receiver_type(task_t task)
39236c6e 3762{
fe8ab488
A
3763 if (task->task_imp_base == IIT_NULL)
3764 return FALSE;
3765 return (task_is_importance_receiver(task) ||
3766 task_is_importance_denap_receiver(task));
39236c6e 3767}
316670eb 3768
39236c6e 3769/*
fe8ab488
A
3770 * External importance assertions are managed by the process in userspace
3771 * Internal importance assertions are the responsibility of the kernel
3772 * Assertions are changed from internal to external via task_importance_externalize_assertion
39236c6e 3773 */
fe8ab488
A
3774
3775int
3776task_importance_hold_watchport_assertion(task_t target_task, uint32_t count)
39236c6e 3777{
fe8ab488
A
3778 ipc_importance_task_t task_imp;
3779 kern_return_t ret;
316670eb 3780
fe8ab488
A
3781 /* must already have set up an importance */
3782 task_imp = target_task->task_imp_base;
3783 assert(IIT_NULL != task_imp);
316670eb 3784
fe8ab488
A
3785 ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3786 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3787}
316670eb 3788
fe8ab488
A
3789int
3790task_importance_hold_internal_assertion(task_t target_task, uint32_t count)
3791{
3792 ipc_importance_task_t task_imp;
3793 kern_return_t ret;
39236c6e 3794
fe8ab488
A
3795 /* may be first time, so allow for possible importance setup */
3796 task_imp = ipc_importance_for_task(target_task, FALSE);
3797 if (IIT_NULL == task_imp) {
3798 return EOVERFLOW;
316670eb 3799 }
fe8ab488
A
3800 ret = ipc_importance_task_hold_internal_assertion(task_imp, count);
3801 ipc_importance_task_release(task_imp);
316670eb 3802
fe8ab488
A
3803 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3804}
39236c6e 3805
fe8ab488
A
3806int
3807task_importance_hold_file_lock_assertion(task_t target_task, uint32_t count)
3808{
3809 ipc_importance_task_t task_imp;
3810 kern_return_t ret;
39236c6e 3811
fe8ab488
A
3812 /* may be first time, so allow for possible importance setup */
3813 task_imp = ipc_importance_for_task(target_task, FALSE);
3814 if (IIT_NULL == task_imp) {
3815 return EOVERFLOW;
316670eb 3816 }
fe8ab488
A
3817 ret = ipc_importance_task_hold_file_lock_assertion(task_imp, count);
3818 ipc_importance_task_release(task_imp);
39236c6e 3819
fe8ab488 3820 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
316670eb
A
3821}
3822
39236c6e 3823int
fe8ab488 3824task_importance_hold_legacy_external_assertion(task_t target_task, uint32_t count)
316670eb 3825{
fe8ab488
A
3826 ipc_importance_task_t task_imp;
3827 kern_return_t ret;
3828
3829 /* must already have set up an importance */
3830 task_imp = target_task->task_imp_base;
3831 if (IIT_NULL == task_imp) {
3832 return EOVERFLOW;
3833 }
3834 ret = ipc_importance_task_hold_legacy_external_assertion(task_imp, count);
3835 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
316670eb
A
3836}
3837
fe8ab488
A
3838int
3839task_importance_drop_internal_assertion(task_t target_task, uint32_t count)
3840{
3841 ipc_importance_task_t task_imp;
3842 kern_return_t ret;
3843
3844 /* must already have set up an importance */
3845 task_imp = target_task->task_imp_base;
3846 if (IIT_NULL == task_imp) {
3847 return EOVERFLOW;
3848 }
3849 ret = ipc_importance_task_drop_internal_assertion(target_task->task_imp_base, count);
3850 return (KERN_SUCCESS != ret) ? ENOTSUP : 0;
3851}
39236c6e 3852
fe8ab488
A
3853int
3854task_importance_drop_file_lock_assertion(task_t target_task, uint32_t count)
316670eb 3855{
fe8ab488
A
3856 ipc_importance_task_t task_imp;
3857 kern_return_t ret;
3858
3859 /* must already have set up an importance */
3860 task_imp = target_task->task_imp_base;
3861 if (IIT_NULL == task_imp) {
3862 return EOVERFLOW;
3863 }
3864 ret = ipc_importance_task_drop_file_lock_assertion(target_task->task_imp_base, count);
3865 return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
3866}
316670eb 3867
fe8ab488
A
3868int
3869task_importance_drop_legacy_external_assertion(task_t target_task, uint32_t count)
3870{
3871 ipc_importance_task_t task_imp;
3872 kern_return_t ret;
3873
3874 /* must already have set up an importance */
3875 task_imp = target_task->task_imp_base;
3876 if (IIT_NULL == task_imp) {
3877 return EOVERFLOW;
3878 }
3879 ret = ipc_importance_task_drop_legacy_external_assertion(task_imp, count);
3880 return (KERN_SUCCESS != ret) ? EOVERFLOW : 0;
316670eb
A
3881}
3882
fe8ab488
A
3883static void
3884task_add_importance_watchport(task_t task, mach_port_t port, int *boostp)
316670eb 3885{
39236c6e 3886 int boost = 0;
316670eb 3887
39236c6e 3888 __impdebug_only int released_pid = 0;
3e170ce0 3889 __impdebug_only int pid = task_pid(task);
316670eb 3890
fe8ab488 3891 ipc_importance_task_t release_imp_task = IIT_NULL;
316670eb 3892
39236c6e 3893 if (IP_VALID(port) != 0) {
fe8ab488
A
3894 ipc_importance_task_t new_imp_task = ipc_importance_for_task(task, FALSE);
3895
39236c6e 3896 ip_lock(port);
316670eb 3897
39236c6e
A
3898 /*
3899 * The port must have been marked tempowner already.
3900 * This also filters out ports whose receive rights
3901 * are already enqueued in a message, as you can't
3902 * change the right's destination once it's already
3903 * on its way.
316670eb 3904 */
39236c6e
A
3905 if (port->ip_tempowner != 0) {
3906 assert(port->ip_impdonation != 0);
3907
3908 boost = port->ip_impcount;
fe8ab488 3909 if (IIT_NULL != port->ip_imp_task) {
39236c6e
A
3910 /*
3911 * if this port is already bound to a task,
3912 * release the task reference and drop any
3913 * watchport-forwarded boosts
3914 */
3915 release_imp_task = port->ip_imp_task;
fe8ab488 3916 port->ip_imp_task = IIT_NULL;
39236c6e 3917 }
316670eb 3918
fe8ab488
A
3919 /* mark the port is watching another task (reference held in port->ip_imp_task) */
3920 if (ipc_importance_task_is_marked_receiver(new_imp_task)) {
3921 port->ip_imp_task = new_imp_task;
3922 new_imp_task = IIT_NULL;
3923 }
39236c6e
A
3924 }
3925 ip_unlock(port);
316670eb 3926
fe8ab488
A
3927 if (IIT_NULL != new_imp_task) {
3928 ipc_importance_task_release(new_imp_task);
3929 }
3930
3931 if (IIT_NULL != release_imp_task) {
39236c6e 3932 if (boost > 0)
fe8ab488
A
3933 ipc_importance_task_drop_internal_assertion(release_imp_task, boost);
3934
3e170ce0 3935 // released_pid = task_pid(release_imp_task); /* TODO: Need ref-safe way to get pid */
fe8ab488 3936 ipc_importance_task_release(release_imp_task);
39236c6e
A
3937 }
3938#if IMPORTANCE_DEBUG
3939 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (IMPORTANCE_CODE(IMP_WATCHPORT, 0)) | DBG_FUNC_NONE,
3940 proc_selfpid(), pid, boost, released_pid, 0);
3941#endif /* IMPORTANCE_DEBUG */
316670eb
A
3942 }
3943
39236c6e
A
3944 *boostp = boost;
3945 return;
316670eb 3946}
316670eb 3947
fe8ab488 3948#endif /* IMPORTANCE_INHERITANCE */
316670eb 3949
39236c6e
A
3950/*
3951 * Routines for VM to query task importance
3952 */
6d2010ae 3953
6d2010ae 3954
39236c6e
A
3955/*
3956 * Order to be considered while estimating importance
3957 * for low memory notification and purging purgeable memory.
3958 */
3959#define TASK_IMPORTANCE_FOREGROUND 4
3960#define TASK_IMPORTANCE_NOTDARWINBG 1
3961
3962
3e170ce0
A
3963/*
3964 * (Un)Mark the task as a privileged listener for memory notifications.
3965 * if marked, this task will be among the first to be notified amongst
3966 * the bulk of all other tasks when the system enters a pressure level
3967 * of interest to this task.
3968 */
3969int
3970task_low_mem_privileged_listener(task_t task, boolean_t new_value, boolean_t *old_value)
3971{
3972 if (old_value != NULL) {
3973 *old_value = (boolean_t)task->low_mem_privileged_listener;
3974 } else {
3975 task_lock(task);
3976 task->low_mem_privileged_listener = (uint32_t)new_value;
3977 task_unlock(task);
3978 }
3979
3980 return 0;
3981}
3982
39236c6e
A
3983/*
3984 * Checks if the task is already notified.
3985 *
3986 * Condition: task lock should be held while calling this function.
3987 */
3988boolean_t
3989task_has_been_notified(task_t task, int pressurelevel)
3990{
3991 if (task == NULL) {
3992 return FALSE;
6d2010ae 3993 }
39236c6e
A
3994
3995 if (pressurelevel == kVMPressureWarning)
3996 return (task->low_mem_notified_warn ? TRUE : FALSE);
3997 else if (pressurelevel == kVMPressureCritical)
3998 return (task->low_mem_notified_critical ? TRUE : FALSE);
3999 else
4000 return TRUE;
4001}
6d2010ae 4002
316670eb 4003
39236c6e
A
4004/*
4005 * Checks if the task is used for purging.
4006 *
4007 * Condition: task lock should be held while calling this function.
4008 */
4009boolean_t
4010task_used_for_purging(task_t task, int pressurelevel)
4011{
4012 if (task == NULL) {
4013 return FALSE;
316670eb 4014 }
39236c6e
A
4015
4016 if (pressurelevel == kVMPressureWarning)
4017 return (task->purged_memory_warn ? TRUE : FALSE);
4018 else if (pressurelevel == kVMPressureCritical)
4019 return (task->purged_memory_critical ? TRUE : FALSE);
4020 else
4021 return TRUE;
4022}
6d2010ae 4023
6d2010ae 4024
39236c6e
A
4025/*
4026 * Mark the task as notified with memory notification.
4027 *
4028 * Condition: task lock should be held while calling this function.
4029 */
4030void
4031task_mark_has_been_notified(task_t task, int pressurelevel)
4032{
4033 if (task == NULL) {
4034 return;
4035 }
4036
4037 if (pressurelevel == kVMPressureWarning)
4038 task->low_mem_notified_warn = 1;
4039 else if (pressurelevel == kVMPressureCritical)
4040 task->low_mem_notified_critical = 1;
6d2010ae
A
4041}
4042
39236c6e
A
4043
4044/*
4045 * Mark the task as purged.
4046 *
4047 * Condition: task lock should be held while calling this function.
4048 */
4049void
4050task_mark_used_for_purging(task_t task, int pressurelevel)
6d2010ae 4051{
39236c6e
A
4052 if (task == NULL) {
4053 return;
4054 }
4055
4056 if (pressurelevel == kVMPressureWarning)
4057 task->purged_memory_warn = 1;
4058 else if (pressurelevel == kVMPressureCritical)
4059 task->purged_memory_critical = 1;
4060}
6d2010ae 4061
6d2010ae 4062
39236c6e
A
4063/*
4064 * Mark the task eligible for low memory notification.
4065 *
4066 * Condition: task lock should be held while calling this function.
4067 */
4068void
4069task_clear_has_been_notified(task_t task, int pressurelevel)
4070{
4071 if (task == NULL) {
4072 return;
4073 }
4074
4075 if (pressurelevel == kVMPressureWarning)
4076 task->low_mem_notified_warn = 0;
4077 else if (pressurelevel == kVMPressureCritical)
4078 task->low_mem_notified_critical = 0;
4079}
6d2010ae 4080
6d2010ae 4081
39236c6e
A
4082/*
4083 * Mark the task eligible for purging its purgeable memory.
4084 *
4085 * Condition: task lock should be held while calling this function.
4086 */
4087void
4088task_clear_used_for_purging(task_t task)
4089{
4090 if (task == NULL) {
4091 return;
4092 }
4093
4094 task->purged_memory_warn = 0;
4095 task->purged_memory_critical = 0;
6d2010ae
A
4096}
4097
39236c6e
A
4098
4099/*
4100 * Estimate task importance for purging its purgeable memory
4101 * and low memory notification.
4102 *
4103 * Importance is calculated in the following order of criteria:
4104 * -Task role : Background vs Foreground
4105 * -Boost status: Not boosted vs Boosted
4106 * -Darwin BG status.
4107 *
4108 * Returns: Estimated task importance. Less important task will have lower
4109 * estimated importance.
4110 */
316670eb 4111int
39236c6e 4112task_importance_estimate(task_t task)
316670eb 4113{
39236c6e 4114 int task_importance = 0;
316670eb 4115
39236c6e
A
4116 if (task == NULL) {
4117 return 0;
4118 }
4119
4120 if (proc_get_effective_task_policy(task, TASK_POLICY_ROLE) == TASK_FOREGROUND_APPLICATION)
4121 task_importance += TASK_IMPORTANCE_FOREGROUND;
4122
4123 if (proc_get_effective_task_policy(task, TASK_POLICY_DARWIN_BG) == 0)
4124 task_importance += TASK_IMPORTANCE_NOTDARWINBG;
4125
4126 return task_importance;
316670eb 4127}
39236c6e 4128