]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/thread_policy.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / osfmk / kern / thread_policy.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <mach/task_policy.h>
37 #include <kern/sfi.h>
38 #include <kern/policy_internal.h>
39 #include <sys/errno.h>
40 #include <sys/ulock.h>
41
42 #include <mach/machine/sdt.h>
43
44 #ifdef MACH_BSD
45 extern int proc_selfpid(void);
46 extern char * proc_name_address(void *p);
47 extern void rethrottle_thread(void * uthread);
48 #endif /* MACH_BSD */
49
50 #define QOS_EXTRACT(q) ((q) & 0xff)
51
52 uint32_t qos_override_mode;
53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
57
58 extern zone_t thread_qos_override_zone;
59
60 static void
61 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
62
63 /*
64 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
65 * to threads that don't have a QoS class set.
66 */
67 const qos_policy_params_t thread_qos_policy_params = {
68 /*
69 * This table defines the starting base priority of the thread,
70 * which will be modified by the thread importance and the task max priority
71 * before being applied.
72 */
73 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
74 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
75 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
76 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
77 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
78 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
79 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
80
81 /*
82 * This table defines the highest IO priority that a thread marked with this
83 * QoS class can have.
84 */
85 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
86 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
87 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
88 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
89 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
90 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
91 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
92
93 /*
94 * This table defines the highest QoS level that
95 * a thread marked with this QoS class can have.
96 */
97
98 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
99 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
100 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
101 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
102 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
103 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
104 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
105
106 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
107 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
108 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
109 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
110 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
111 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
112 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
113 };
114
115 static void
116 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
117
118 static int
119 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
120
121 static void
122 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
123
124 static void
125 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
126
127 static void
128 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
129
130 static void
131 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2);
132
133 static int
134 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
135
136 static int
137 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
138
139 static void
140 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
141
142 static void
143 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
144
145 void
146 thread_policy_init(void)
147 {
148 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
149 printf("QOS override mode: 0x%08x\n", qos_override_mode);
150 } else {
151 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
152 }
153 }
154
155 boolean_t
156 thread_has_qos_policy(thread_t thread)
157 {
158 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160
161
162 static void
163 thread_remove_qos_policy_locked(thread_t thread,
164 task_pend_token_t pend_token)
165 {
166 __unused int prev_qos = thread->requested_policy.thrp_qos;
167
168 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169
170 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173
174 kern_return_t
175 thread_remove_qos_policy(thread_t thread)
176 {
177 struct task_pend_token pend_token = {};
178
179 thread_mtx_lock(thread);
180 if (!thread->active) {
181 thread_mtx_unlock(thread);
182 return KERN_TERMINATED;
183 }
184
185 thread_remove_qos_policy_locked(thread, &pend_token);
186
187 thread_mtx_unlock(thread);
188
189 thread_policy_update_complete_unlocked(thread, &pend_token);
190
191 return KERN_SUCCESS;
192 }
193
194
195 boolean_t
196 thread_is_static_param(thread_t thread)
197 {
198 if (thread->static_param) {
199 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 return TRUE;
201 }
202 return FALSE;
203 }
204
205 /*
206 * Relative priorities can range between 0REL and -15REL. These
207 * map to QoS-specific ranges, to create non-overlapping priority
208 * ranges.
209 */
210 static int
211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 int next_lower_qos;
214
215 /* Fast path, since no validation or scaling is needed */
216 if (qos_relprio == 0) {
217 return 0;
218 }
219
220 switch (qos) {
221 case THREAD_QOS_USER_INTERACTIVE:
222 next_lower_qos = THREAD_QOS_USER_INITIATED;
223 break;
224 case THREAD_QOS_USER_INITIATED:
225 next_lower_qos = THREAD_QOS_LEGACY;
226 break;
227 case THREAD_QOS_LEGACY:
228 next_lower_qos = THREAD_QOS_UTILITY;
229 break;
230 case THREAD_QOS_UTILITY:
231 next_lower_qos = THREAD_QOS_BACKGROUND;
232 break;
233 case THREAD_QOS_MAINTENANCE:
234 case THREAD_QOS_BACKGROUND:
235 next_lower_qos = 0;
236 break;
237 default:
238 panic("Unrecognized QoS %d", qos);
239 return 0;
240 }
241
242 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244
245 /*
246 * We now have the valid range that the scaled relative priority can map to. Note
247 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 * remainder.
251 */
252 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253
254 return scaled_relprio;
255 }
256
257 /*
258 * flag set by -qos-policy-allow boot-arg to allow
259 * testing thread qos policy from userspace
260 */
261 boolean_t allow_qos_policy_set = FALSE;
262
263 kern_return_t
264 thread_policy_set(
265 thread_t thread,
266 thread_policy_flavor_t flavor,
267 thread_policy_t policy_info,
268 mach_msg_type_number_t count)
269 {
270 thread_qos_policy_data_t req_qos;
271 kern_return_t kr;
272
273 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274
275 if (thread == THREAD_NULL) {
276 return KERN_INVALID_ARGUMENT;
277 }
278
279 if (allow_qos_policy_set == FALSE) {
280 if (thread_is_static_param(thread)) {
281 return KERN_POLICY_STATIC;
282 }
283
284 if (flavor == THREAD_QOS_POLICY) {
285 return KERN_INVALID_ARGUMENT;
286 }
287 }
288
289 /* Threads without static_param set reset their QoS when other policies are applied. */
290 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
291 /* Store the existing tier, if we fail this call it is used to reset back. */
292 req_qos.qos_tier = thread->requested_policy.thrp_qos;
293 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
294
295 kr = thread_remove_qos_policy(thread);
296 if (kr != KERN_SUCCESS) {
297 return kr;
298 }
299 }
300
301 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
302
303 /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
304 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
305 if (kr != KERN_SUCCESS) {
306 /* Reset back to our original tier as the set failed. */
307 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
308 }
309 }
310
311 return kr;
312 }
313
314 kern_return_t
315 thread_policy_set_internal(
316 thread_t thread,
317 thread_policy_flavor_t flavor,
318 thread_policy_t policy_info,
319 mach_msg_type_number_t count)
320 {
321 kern_return_t result = KERN_SUCCESS;
322 struct task_pend_token pend_token = {};
323
324 thread_mtx_lock(thread);
325 if (!thread->active) {
326 thread_mtx_unlock(thread);
327
328 return KERN_TERMINATED;
329 }
330
331 switch (flavor) {
332 case THREAD_EXTENDED_POLICY:
333 {
334 boolean_t timeshare = TRUE;
335
336 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
337 thread_extended_policy_t info;
338
339 info = (thread_extended_policy_t)policy_info;
340 timeshare = info->timeshare;
341 }
342
343 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
344
345 spl_t s = splsched();
346 thread_lock(thread);
347
348 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
349
350 thread_unlock(thread);
351 splx(s);
352
353 pend_token.tpt_update_thread_sfi = 1;
354
355 break;
356 }
357
358 case THREAD_TIME_CONSTRAINT_POLICY:
359 {
360 thread_time_constraint_policy_t info;
361
362 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
363 result = KERN_INVALID_ARGUMENT;
364 break;
365 }
366
367 info = (thread_time_constraint_policy_t)policy_info;
368 if (info->constraint < info->computation ||
369 info->computation > max_rt_quantum ||
370 info->computation < min_rt_quantum) {
371 result = KERN_INVALID_ARGUMENT;
372 break;
373 }
374
375 spl_t s = splsched();
376 thread_lock(thread);
377
378 thread->realtime.period = info->period;
379 thread->realtime.computation = info->computation;
380 thread->realtime.constraint = info->constraint;
381 thread->realtime.preemptible = info->preemptible;
382
383 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
384
385 thread_unlock(thread);
386 splx(s);
387
388 pend_token.tpt_update_thread_sfi = 1;
389
390 break;
391 }
392
393 case THREAD_PRECEDENCE_POLICY:
394 {
395 thread_precedence_policy_t info;
396
397 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
398 result = KERN_INVALID_ARGUMENT;
399 break;
400 }
401 info = (thread_precedence_policy_t)policy_info;
402
403 spl_t s = splsched();
404 thread_lock(thread);
405
406 thread->importance = info->importance;
407
408 thread_recompute_priority(thread);
409
410 thread_unlock(thread);
411 splx(s);
412
413 break;
414 }
415
416 case THREAD_AFFINITY_POLICY:
417 {
418 thread_affinity_policy_t info;
419
420 if (!thread_affinity_is_supported()) {
421 result = KERN_NOT_SUPPORTED;
422 break;
423 }
424 if (count < THREAD_AFFINITY_POLICY_COUNT) {
425 result = KERN_INVALID_ARGUMENT;
426 break;
427 }
428
429 info = (thread_affinity_policy_t) policy_info;
430 /*
431 * Unlock the thread mutex here and
432 * return directly after calling thread_affinity_set().
433 * This is necessary for correct lock ordering because
434 * thread_affinity_set() takes the task lock.
435 */
436 thread_mtx_unlock(thread);
437 return thread_affinity_set(thread, info->affinity_tag);
438 }
439
440 #if CONFIG_EMBEDDED
441 case THREAD_BACKGROUND_POLICY:
442 {
443 thread_background_policy_t info;
444
445 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
446 result = KERN_INVALID_ARGUMENT;
447 break;
448 }
449
450 if (thread->task != current_task()) {
451 result = KERN_PROTECTION_FAILURE;
452 break;
453 }
454
455 info = (thread_background_policy_t) policy_info;
456
457 int enable;
458
459 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
460 enable = TASK_POLICY_ENABLE;
461 } else {
462 enable = TASK_POLICY_DISABLE;
463 }
464
465 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
466
467 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
468
469 break;
470 }
471 #endif /* CONFIG_EMBEDDED */
472
473 case THREAD_THROUGHPUT_QOS_POLICY:
474 {
475 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
476 thread_throughput_qos_t tqos;
477
478 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
479 result = KERN_INVALID_ARGUMENT;
480 break;
481 }
482
483 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
484 break;
485 }
486
487 tqos = qos_extract(info->thread_throughput_qos_tier);
488
489 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
490 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
491
492 break;
493 }
494
495 case THREAD_LATENCY_QOS_POLICY:
496 {
497 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
498 thread_latency_qos_t lqos;
499
500 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
501 result = KERN_INVALID_ARGUMENT;
502 break;
503 }
504
505 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
506 break;
507 }
508
509 lqos = qos_extract(info->thread_latency_qos_tier);
510
511 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
512 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
513
514 break;
515 }
516
517 case THREAD_QOS_POLICY:
518 {
519 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
520
521 if (count < THREAD_QOS_POLICY_COUNT) {
522 result = KERN_INVALID_ARGUMENT;
523 break;
524 }
525
526 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
527 result = KERN_INVALID_ARGUMENT;
528 break;
529 }
530
531 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
532 result = KERN_INVALID_ARGUMENT;
533 break;
534 }
535
536 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
537 result = KERN_INVALID_ARGUMENT;
538 break;
539 }
540
541 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
542 info->qos_tier, -info->tier_importance, &pend_token);
543
544 break;
545 }
546
547 default:
548 result = KERN_INVALID_ARGUMENT;
549 break;
550 }
551
552 thread_mtx_unlock(thread);
553
554 thread_policy_update_complete_unlocked(thread, &pend_token);
555
556 return result;
557 }
558
559 /*
560 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
561 * Both result in FIXED mode scheduling.
562 */
563 static sched_mode_t
564 convert_policy_to_sched_mode(integer_t policy)
565 {
566 switch (policy) {
567 case POLICY_TIMESHARE:
568 return TH_MODE_TIMESHARE;
569 case POLICY_RR:
570 case POLICY_FIFO:
571 return TH_MODE_FIXED;
572 default:
573 panic("unexpected sched policy: %d", policy);
574 return TH_MODE_NONE;
575 }
576 }
577
578 /*
579 * Called either with the thread mutex locked
580 * or from the pthread kext in a 'safe place'.
581 */
582 static kern_return_t
583 thread_set_mode_and_absolute_pri_internal(thread_t thread,
584 sched_mode_t mode,
585 integer_t priority,
586 task_pend_token_t pend_token)
587 {
588 kern_return_t kr = KERN_SUCCESS;
589
590 spl_t s = splsched();
591 thread_lock(thread);
592
593 /* This path isn't allowed to change a thread out of realtime. */
594 if ((thread->sched_mode == TH_MODE_REALTIME) ||
595 (thread->saved_mode == TH_MODE_REALTIME)) {
596 kr = KERN_FAILURE;
597 goto unlock;
598 }
599
600 if (thread->policy_reset) {
601 kr = KERN_SUCCESS;
602 goto unlock;
603 }
604
605 sched_mode_t old_mode = thread->sched_mode;
606
607 /*
608 * Reverse engineer and apply the correct importance value
609 * from the requested absolute priority value.
610 *
611 * TODO: Store the absolute priority value instead
612 */
613
614 if (priority >= thread->max_priority) {
615 priority = thread->max_priority - thread->task_priority;
616 } else if (priority >= MINPRI_KERNEL) {
617 priority -= MINPRI_KERNEL;
618 } else if (priority >= MINPRI_RESERVED) {
619 priority -= MINPRI_RESERVED;
620 } else {
621 priority -= BASEPRI_DEFAULT;
622 }
623
624 priority += thread->task_priority;
625
626 if (priority > thread->max_priority) {
627 priority = thread->max_priority;
628 } else if (priority < MINPRI) {
629 priority = MINPRI;
630 }
631
632 thread->importance = priority - thread->task_priority;
633
634 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
635
636 if (mode != old_mode) {
637 pend_token->tpt_update_thread_sfi = 1;
638 }
639
640 unlock:
641 thread_unlock(thread);
642 splx(s);
643
644 return kr;
645 }
646
647 uint8_t
648 thread_workq_pri_for_qos(thread_qos_t qos)
649 {
650 assert(qos < THREAD_QOS_LAST);
651 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
652 }
653
654 thread_qos_t
655 thread_workq_qos_for_pri(int priority)
656 {
657 int qos;
658 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
659 // indicate that workq should map >UI threads to workq's
660 // internal notation for above-UI work.
661 return THREAD_QOS_UNSPECIFIED;
662 }
663 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
664 // map a given priority up to the next nearest qos band.
665 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
666 return qos;
667 }
668 }
669 return THREAD_QOS_MAINTENANCE;
670 }
671
672 /*
673 * private interface for pthread workqueues
674 *
675 * Set scheduling policy & absolute priority for thread
676 * May be called with spinlocks held
677 * Thread mutex lock is not held
678 */
679 void
680 thread_reset_workq_qos(thread_t thread, uint32_t qos)
681 {
682 struct task_pend_token pend_token = {};
683
684 assert(qos < THREAD_QOS_LAST);
685
686 spl_t s = splsched();
687 thread_lock(thread);
688
689 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
690 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
691 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
692 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
693 &pend_token);
694
695 assert(pend_token.tpt_update_sockets == 0);
696
697 thread_unlock(thread);
698 splx(s);
699
700 thread_policy_update_complete_unlocked(thread, &pend_token);
701 }
702
703 /*
704 * private interface for pthread workqueues
705 *
706 * Set scheduling policy & absolute priority for thread
707 * May be called with spinlocks held
708 * Thread mutex lock is held
709 */
710 void
711 thread_set_workq_override(thread_t thread, uint32_t qos)
712 {
713 struct task_pend_token pend_token = {};
714
715 assert(qos < THREAD_QOS_LAST);
716
717 spl_t s = splsched();
718 thread_lock(thread);
719
720 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
721 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
722
723 assert(pend_token.tpt_update_sockets == 0);
724
725 thread_unlock(thread);
726 splx(s);
727
728 thread_policy_update_complete_unlocked(thread, &pend_token);
729 }
730
731 /*
732 * private interface for pthread workqueues
733 *
734 * Set scheduling policy & absolute priority for thread
735 * May be called with spinlocks held
736 * Thread mutex lock is not held
737 */
738 void
739 thread_set_workq_pri(thread_t thread,
740 thread_qos_t qos,
741 integer_t priority,
742 integer_t policy)
743 {
744 struct task_pend_token pend_token = {};
745 sched_mode_t mode = convert_policy_to_sched_mode(policy);
746
747 assert(qos < THREAD_QOS_LAST);
748 assert(thread->static_param);
749
750 if (!thread->static_param || !thread->active) {
751 return;
752 }
753
754 spl_t s = splsched();
755 thread_lock(thread);
756
757 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
758 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
759 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
760 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
761 0, &pend_token);
762
763 thread_unlock(thread);
764 splx(s);
765
766 /* Concern: this doesn't hold the mutex... */
767
768 __assert_only kern_return_t kr;
769 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
770 &pend_token);
771 assert(kr == KERN_SUCCESS);
772
773 if (pend_token.tpt_update_thread_sfi) {
774 sfi_reevaluate(thread);
775 }
776 }
777
778 /*
779 * thread_set_mode_and_absolute_pri:
780 *
781 * Set scheduling policy & absolute priority for thread, for deprecated
782 * thread_set_policy and thread_policy interfaces.
783 *
784 * Called with nothing locked.
785 */
786 kern_return_t
787 thread_set_mode_and_absolute_pri(thread_t thread,
788 integer_t policy,
789 integer_t priority)
790 {
791 kern_return_t kr = KERN_SUCCESS;
792 struct task_pend_token pend_token = {};
793
794 sched_mode_t mode = convert_policy_to_sched_mode(policy);
795
796 thread_mtx_lock(thread);
797
798 if (!thread->active) {
799 kr = KERN_TERMINATED;
800 goto unlock;
801 }
802
803 if (thread_is_static_param(thread)) {
804 kr = KERN_POLICY_STATIC;
805 goto unlock;
806 }
807
808 /* Setting legacy policies on threads kills the current QoS */
809 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
810 thread_remove_qos_policy_locked(thread, &pend_token);
811 }
812
813 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
814
815 unlock:
816 thread_mtx_unlock(thread);
817
818 thread_policy_update_complete_unlocked(thread, &pend_token);
819
820 return kr;
821 }
822
823 /*
824 * Set the thread's requested mode and recompute priority
825 * Called with thread mutex and thread locked
826 *
827 * TODO: Mitigate potential problems caused by moving thread to end of runq
828 * whenever its priority is recomputed
829 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
830 */
831 static void
832 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
833 {
834 if (thread->policy_reset) {
835 return;
836 }
837
838 boolean_t removed = thread_run_queue_remove(thread);
839
840 /*
841 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
842 * That way there's zero confusion over which the user wants
843 * and which the kernel wants.
844 */
845 if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
846 thread->saved_mode = mode;
847 } else {
848 sched_set_thread_mode(thread, mode);
849 }
850
851 thread_recompute_priority(thread);
852
853 if (removed) {
854 thread_run_queue_reinsert(thread, SCHED_TAILQ);
855 }
856 }
857
858 /* called at splsched with thread lock locked */
859 static void
860 thread_update_qos_cpu_time_locked(thread_t thread)
861 {
862 task_t task = thread->task;
863 uint64_t timer_sum, timer_delta;
864
865 /*
866 * This is only as accurate as the distance between
867 * last context switch (embedded) or last user/kernel boundary transition (desktop)
868 * because user_timer and system_timer are only updated then.
869 *
870 * TODO: Consider running a timer_update operation here to update it first.
871 * Maybe doable with interrupts disabled from current thread.
872 * If the thread is on a different core, may not be easy to get right.
873 *
874 * TODO: There should be a function for this in timer.c
875 */
876
877 timer_sum = timer_grab(&thread->user_timer);
878 timer_sum += timer_grab(&thread->system_timer);
879 timer_delta = timer_sum - thread->vtimer_qos_save;
880
881 thread->vtimer_qos_save = timer_sum;
882
883 uint64_t* task_counter = NULL;
884
885 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
886 switch (thread->effective_policy.thep_qos) {
887 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
888 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
889 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
890 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
891 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
892 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
893 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
894 default:
895 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
896 }
897
898 OSAddAtomic64(timer_delta, task_counter);
899
900 /* Update the task-level qos stats atomically, because we don't have the task lock. */
901 switch (thread->requested_policy.thrp_qos) {
902 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
903 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
904 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
905 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
906 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
907 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
908 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
909 default:
910 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
911 }
912
913 OSAddAtomic64(timer_delta, task_counter);
914 }
915
916 /*
917 * called with no thread locks held
918 * may hold task lock
919 */
920 void
921 thread_update_qos_cpu_time(thread_t thread)
922 {
923 thread_mtx_lock(thread);
924
925 spl_t s = splsched();
926 thread_lock(thread);
927
928 thread_update_qos_cpu_time_locked(thread);
929
930 thread_unlock(thread);
931 splx(s);
932
933 thread_mtx_unlock(thread);
934 }
935
936 /*
937 * Calculate base priority from thread attributes, and set it on the thread
938 *
939 * Called with thread_lock and thread mutex held.
940 */
941 void
942 thread_recompute_priority(
943 thread_t thread)
944 {
945 integer_t priority;
946
947 if (thread->policy_reset) {
948 return;
949 }
950
951 if (thread->sched_mode == TH_MODE_REALTIME) {
952 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
953 return;
954 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
955 int qos = thread->effective_policy.thep_qos;
956 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
957 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
958 int qos_scaled_relprio;
959
960 assert(qos >= 0 && qos < THREAD_QOS_LAST);
961 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
962
963 priority = thread_qos_policy_params.qos_pri[qos];
964 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
965
966 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
967 /* Bump priority 46 to 47 when in a frontmost app */
968 qos_scaled_relprio += 1;
969 }
970
971 /* TODO: factor in renice priority here? */
972
973 priority += qos_scaled_relprio;
974 } else {
975 if (thread->importance > MAXPRI) {
976 priority = MAXPRI;
977 } else if (thread->importance < -MAXPRI) {
978 priority = -MAXPRI;
979 } else {
980 priority = thread->importance;
981 }
982
983 priority += thread->task_priority;
984 }
985
986 priority = MAX(priority, thread->user_promotion_basepri);
987
988 /*
989 * Clamp priority back into the allowed range for this task.
990 * The initial priority value could be out of this range due to:
991 * Task clamped to BG or Utility (max-pri is 4, or 20)
992 * Task is user task (max-pri is 63)
993 * Task is kernel task (max-pri is 95)
994 * Note that thread->importance is user-settable to any integer
995 * via THREAD_PRECEDENCE_POLICY.
996 */
997 if (priority > thread->max_priority) {
998 priority = thread->max_priority;
999 } else if (priority < MINPRI) {
1000 priority = MINPRI;
1001 }
1002
1003 if (thread->saved_mode == TH_MODE_REALTIME &&
1004 thread->sched_flags & TH_SFLAG_FAILSAFE) {
1005 priority = DEPRESSPRI;
1006 }
1007
1008 if (thread->effective_policy.thep_terminated == TRUE) {
1009 /*
1010 * We temporarily want to override the expected priority to
1011 * ensure that the thread exits in a timely manner.
1012 * Note that this is allowed to exceed thread->max_priority
1013 * so that the thread is no longer clamped to background
1014 * during the final exit phase.
1015 */
1016 if (priority < thread->task_priority) {
1017 priority = thread->task_priority;
1018 }
1019 if (priority < BASEPRI_DEFAULT) {
1020 priority = BASEPRI_DEFAULT;
1021 }
1022 }
1023
1024 #if CONFIG_EMBEDDED
1025 /* No one can have a base priority less than MAXPRI_THROTTLE */
1026 if (priority < MAXPRI_THROTTLE) {
1027 priority = MAXPRI_THROTTLE;
1028 }
1029 #endif /* CONFIG_EMBEDDED */
1030
1031 sched_set_thread_base_priority(thread, priority);
1032 }
1033
1034 /* Called with the task lock held, but not the thread mutex or spinlock */
1035 void
1036 thread_policy_update_tasklocked(
1037 thread_t thread,
1038 integer_t priority,
1039 integer_t max_priority,
1040 task_pend_token_t pend_token)
1041 {
1042 thread_mtx_lock(thread);
1043
1044 if (!thread->active || thread->policy_reset) {
1045 thread_mtx_unlock(thread);
1046 return;
1047 }
1048
1049 spl_t s = splsched();
1050 thread_lock(thread);
1051
1052 __unused
1053 integer_t old_max_priority = thread->max_priority;
1054
1055 thread->task_priority = priority;
1056 thread->max_priority = max_priority;
1057
1058 #if CONFIG_EMBEDDED
1059 /*
1060 * When backgrounding a thread, iOS has the semantic that
1061 * realtime and fixed priority threads should be demoted
1062 * to timeshare background threads.
1063 *
1064 * On OSX, realtime and fixed priority threads don't lose their mode.
1065 *
1066 * TODO: Do this inside the thread policy update routine in order to avoid double
1067 * remove/reinsert for a runnable thread
1068 */
1069 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1070 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1071 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1072 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1073 }
1074 #endif /* CONFIG_EMBEDDED */
1075
1076 thread_policy_update_spinlocked(thread, TRUE, pend_token);
1077
1078 thread_unlock(thread);
1079 splx(s);
1080
1081 thread_mtx_unlock(thread);
1082 }
1083
1084 /*
1085 * Reset thread to default state in preparation for termination
1086 * Called with thread mutex locked
1087 *
1088 * Always called on current thread, so we don't need a run queue remove
1089 */
1090 void
1091 thread_policy_reset(
1092 thread_t thread)
1093 {
1094 spl_t s;
1095
1096 assert(thread == current_thread());
1097
1098 s = splsched();
1099 thread_lock(thread);
1100
1101 if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1102 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1103 }
1104
1105 if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1106 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1107 }
1108
1109 /* At this point, the various demotions should be inactive */
1110 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1111 assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1112 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1113
1114 /* Reset thread back to task-default basepri and mode */
1115 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1116
1117 sched_set_thread_mode(thread, newmode);
1118
1119 thread->importance = 0;
1120
1121 /* Prevent further changes to thread base priority or mode */
1122 thread->policy_reset = 1;
1123
1124 sched_set_thread_base_priority(thread, thread->task_priority);
1125
1126 thread_unlock(thread);
1127 splx(s);
1128 }
1129
1130 kern_return_t
1131 thread_policy_get(
1132 thread_t thread,
1133 thread_policy_flavor_t flavor,
1134 thread_policy_t policy_info,
1135 mach_msg_type_number_t *count,
1136 boolean_t *get_default)
1137 {
1138 kern_return_t result = KERN_SUCCESS;
1139
1140 if (thread == THREAD_NULL) {
1141 return KERN_INVALID_ARGUMENT;
1142 }
1143
1144 thread_mtx_lock(thread);
1145 if (!thread->active) {
1146 thread_mtx_unlock(thread);
1147
1148 return KERN_TERMINATED;
1149 }
1150
1151 switch (flavor) {
1152 case THREAD_EXTENDED_POLICY:
1153 {
1154 boolean_t timeshare = TRUE;
1155
1156 if (!(*get_default)) {
1157 spl_t s = splsched();
1158 thread_lock(thread);
1159
1160 if ((thread->sched_mode != TH_MODE_REALTIME) &&
1161 (thread->saved_mode != TH_MODE_REALTIME)) {
1162 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1163 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1164 } else {
1165 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1166 }
1167 } else {
1168 *get_default = TRUE;
1169 }
1170
1171 thread_unlock(thread);
1172 splx(s);
1173 }
1174
1175 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1176 thread_extended_policy_t info;
1177
1178 info = (thread_extended_policy_t)policy_info;
1179 info->timeshare = timeshare;
1180 }
1181
1182 break;
1183 }
1184
1185 case THREAD_TIME_CONSTRAINT_POLICY:
1186 {
1187 thread_time_constraint_policy_t info;
1188
1189 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1190 result = KERN_INVALID_ARGUMENT;
1191 break;
1192 }
1193
1194 info = (thread_time_constraint_policy_t)policy_info;
1195
1196 if (!(*get_default)) {
1197 spl_t s = splsched();
1198 thread_lock(thread);
1199
1200 if ((thread->sched_mode == TH_MODE_REALTIME) ||
1201 (thread->saved_mode == TH_MODE_REALTIME)) {
1202 info->period = thread->realtime.period;
1203 info->computation = thread->realtime.computation;
1204 info->constraint = thread->realtime.constraint;
1205 info->preemptible = thread->realtime.preemptible;
1206 } else {
1207 *get_default = TRUE;
1208 }
1209
1210 thread_unlock(thread);
1211 splx(s);
1212 }
1213
1214 if (*get_default) {
1215 info->period = 0;
1216 info->computation = default_timeshare_computation;
1217 info->constraint = default_timeshare_constraint;
1218 info->preemptible = TRUE;
1219 }
1220
1221 break;
1222 }
1223
1224 case THREAD_PRECEDENCE_POLICY:
1225 {
1226 thread_precedence_policy_t info;
1227
1228 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1229 result = KERN_INVALID_ARGUMENT;
1230 break;
1231 }
1232
1233 info = (thread_precedence_policy_t)policy_info;
1234
1235 if (!(*get_default)) {
1236 spl_t s = splsched();
1237 thread_lock(thread);
1238
1239 info->importance = thread->importance;
1240
1241 thread_unlock(thread);
1242 splx(s);
1243 } else {
1244 info->importance = 0;
1245 }
1246
1247 break;
1248 }
1249
1250 case THREAD_AFFINITY_POLICY:
1251 {
1252 thread_affinity_policy_t info;
1253
1254 if (!thread_affinity_is_supported()) {
1255 result = KERN_NOT_SUPPORTED;
1256 break;
1257 }
1258 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1259 result = KERN_INVALID_ARGUMENT;
1260 break;
1261 }
1262
1263 info = (thread_affinity_policy_t)policy_info;
1264
1265 if (!(*get_default)) {
1266 info->affinity_tag = thread_affinity_get(thread);
1267 } else {
1268 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1269 }
1270
1271 break;
1272 }
1273
1274 case THREAD_POLICY_STATE:
1275 {
1276 thread_policy_state_t info;
1277
1278 if (*count < THREAD_POLICY_STATE_COUNT) {
1279 result = KERN_INVALID_ARGUMENT;
1280 break;
1281 }
1282
1283 /* Only root can get this info */
1284 if (current_task()->sec_token.val[0] != 0) {
1285 result = KERN_PROTECTION_FAILURE;
1286 break;
1287 }
1288
1289 info = (thread_policy_state_t)(void*)policy_info;
1290
1291 if (!(*get_default)) {
1292 info->flags = 0;
1293
1294 spl_t s = splsched();
1295 thread_lock(thread);
1296
1297 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1298
1299 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1300 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1301
1302 info->thps_user_promotions = 0;
1303 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1304 info->thps_ipc_overrides = thread->ipc_overrides;
1305
1306 proc_get_thread_policy_bitfield(thread, info);
1307
1308 thread_unlock(thread);
1309 splx(s);
1310 } else {
1311 info->requested = 0;
1312 info->effective = 0;
1313 info->pending = 0;
1314 }
1315
1316 break;
1317 }
1318
1319 case THREAD_LATENCY_QOS_POLICY:
1320 {
1321 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1322 thread_latency_qos_t plqos;
1323
1324 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1325 result = KERN_INVALID_ARGUMENT;
1326 break;
1327 }
1328
1329 if (*get_default) {
1330 plqos = 0;
1331 } else {
1332 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1333 }
1334
1335 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1336 }
1337 break;
1338
1339 case THREAD_THROUGHPUT_QOS_POLICY:
1340 {
1341 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1342 thread_throughput_qos_t ptqos;
1343
1344 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1345 result = KERN_INVALID_ARGUMENT;
1346 break;
1347 }
1348
1349 if (*get_default) {
1350 ptqos = 0;
1351 } else {
1352 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1353 }
1354
1355 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1356 }
1357 break;
1358
1359 case THREAD_QOS_POLICY:
1360 {
1361 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1362
1363 if (*count < THREAD_QOS_POLICY_COUNT) {
1364 result = KERN_INVALID_ARGUMENT;
1365 break;
1366 }
1367
1368 if (!(*get_default)) {
1369 int relprio_value = 0;
1370 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1371 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1372
1373 info->tier_importance = -relprio_value;
1374 } else {
1375 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1376 info->tier_importance = 0;
1377 }
1378
1379 break;
1380 }
1381
1382 default:
1383 result = KERN_INVALID_ARGUMENT;
1384 break;
1385 }
1386
1387 thread_mtx_unlock(thread);
1388
1389 return result;
1390 }
1391
1392 void
1393 thread_policy_create(thread_t thread)
1394 {
1395 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1396 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1397 thread_tid(thread), theffective_0(thread),
1398 theffective_1(thread), thread->base_pri, 0);
1399
1400 /* We pass a pend token but ignore it */
1401 struct task_pend_token pend_token = {};
1402
1403 thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
1404
1405 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1406 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1407 thread_tid(thread), theffective_0(thread),
1408 theffective_1(thread), thread->base_pri, 0);
1409 }
1410
1411 static void
1412 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
1413 {
1414 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1415 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1416 thread_tid(thread), theffective_0(thread),
1417 theffective_1(thread), thread->base_pri, 0);
1418
1419 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1420
1421 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1422 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1423 thread_tid(thread), theffective_0(thread),
1424 theffective_1(thread), thread->base_pri, 0);
1425 }
1426
1427
1428
1429 /*
1430 * One thread state update function TO RULE THEM ALL
1431 *
1432 * This function updates the thread effective policy fields
1433 * and pushes the results to the relevant subsystems.
1434 *
1435 * Returns TRUE if a pended action needs to be run.
1436 *
1437 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1438 */
1439 static void
1440 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
1441 task_pend_token_t pend_token)
1442 {
1443 /*
1444 * Step 1:
1445 * Gather requested policy and effective task state
1446 */
1447
1448 struct thread_requested_policy requested = thread->requested_policy;
1449 struct task_effective_policy task_effective = thread->task->effective_policy;
1450
1451 /*
1452 * Step 2:
1453 * Calculate new effective policies from requested policy, task and thread state
1454 * Rules:
1455 * Don't change requested, it won't take effect
1456 */
1457
1458 struct thread_effective_policy next = {};
1459
1460 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1461
1462 uint32_t next_qos = requested.thrp_qos;
1463
1464 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1465 next_qos = MAX(requested.thrp_qos_override, next_qos);
1466 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1467 next_qos = MAX(requested.thrp_qos_ipc_override, next_qos);
1468 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1469 }
1470
1471 next.thep_qos = next_qos;
1472
1473 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1474 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1475 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1476 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1477 } else {
1478 next.thep_qos = task_effective.tep_qos_clamp;
1479 }
1480 }
1481
1482 /*
1483 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1484 * This allows QoS promotions to work properly even after the process is unclamped.
1485 */
1486 next.thep_qos_promote = next.thep_qos;
1487
1488 /* The ceiling only applies to threads that are in the QoS world */
1489 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1490 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1491 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1492 }
1493
1494 /* Apply the sync ipc qos override */
1495 assert(requested.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
1496
1497 /*
1498 * The QoS relative priority is only applicable when the original programmer's
1499 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1500 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1501 * since otherwise it would be lower than unclamped threads. Similarly, in the
1502 * presence of boosting, the programmer doesn't know what other actors
1503 * are boosting the thread.
1504 */
1505 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1506 (requested.thrp_qos == next.thep_qos) &&
1507 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1508 next.thep_qos_relprio = requested.thrp_qos_relprio;
1509 } else {
1510 next.thep_qos_relprio = 0;
1511 }
1512
1513 /* Calculate DARWIN_BG */
1514 boolean_t wants_darwinbg = FALSE;
1515 boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */
1516
1517 /*
1518 * If DARWIN_BG has been requested at either level, it's engaged.
1519 * darwinbg threads always create bg sockets,
1520 * but only some types of darwinbg change the sockets
1521 * after they're created
1522 */
1523 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1524 wants_all_sockets_bg = wants_darwinbg = TRUE;
1525 }
1526
1527 if (requested.thrp_pidbind_bg) {
1528 wants_all_sockets_bg = wants_darwinbg = TRUE;
1529 }
1530
1531 if (task_effective.tep_darwinbg) {
1532 wants_darwinbg = TRUE;
1533 }
1534
1535 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1536 next.thep_qos == THREAD_QOS_MAINTENANCE) {
1537 wants_darwinbg = TRUE;
1538 }
1539
1540 /* Calculate side effects of DARWIN_BG */
1541
1542 if (wants_darwinbg) {
1543 next.thep_darwinbg = 1;
1544 }
1545
1546 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1547 next.thep_new_sockets_bg = 1;
1548 }
1549
1550 /* Don't use task_effective.tep_all_sockets_bg here */
1551 if (wants_all_sockets_bg) {
1552 next.thep_all_sockets_bg = 1;
1553 }
1554
1555 /* darwinbg implies background QOS (or lower) */
1556 if (next.thep_darwinbg &&
1557 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1558 next.thep_qos = THREAD_QOS_BACKGROUND;
1559 next.thep_qos_relprio = 0;
1560 }
1561
1562 /* Calculate IO policy */
1563
1564 int iopol = THROTTLE_LEVEL_TIER0;
1565
1566 /* Factor in the task's IO policy */
1567 if (next.thep_darwinbg) {
1568 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1569 }
1570
1571 iopol = MAX(iopol, task_effective.tep_io_tier);
1572
1573 /* Look up the associated IO tier value for the QoS class */
1574 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1575
1576 iopol = MAX(iopol, requested.thrp_int_iotier);
1577 iopol = MAX(iopol, requested.thrp_ext_iotier);
1578
1579 next.thep_io_tier = iopol;
1580
1581 /*
1582 * If a QoS override is causing IO to go into a lower tier, we also set
1583 * the passive bit so that a thread doesn't end up stuck in its own throttle
1584 * window when the override goes away.
1585 */
1586 boolean_t qos_io_override_active = FALSE;
1587 if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
1588 thread_qos_policy_params.qos_iotier[requested.thrp_qos]) {
1589 qos_io_override_active = TRUE;
1590 }
1591
1592 /* Calculate Passive IO policy */
1593 if (requested.thrp_ext_iopassive ||
1594 requested.thrp_int_iopassive ||
1595 qos_io_override_active ||
1596 task_effective.tep_io_passive) {
1597 next.thep_io_passive = 1;
1598 }
1599
1600 /* Calculate timer QOS */
1601 uint32_t latency_qos = requested.thrp_latency_qos;
1602
1603 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1604 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1605
1606 next.thep_latency_qos = latency_qos;
1607
1608 /* Calculate throughput QOS */
1609 uint32_t through_qos = requested.thrp_through_qos;
1610
1611 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1612 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1613
1614 next.thep_through_qos = through_qos;
1615
1616 if (task_effective.tep_terminated || requested.thrp_terminated) {
1617 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1618 next.thep_terminated = 1;
1619 next.thep_darwinbg = 0;
1620 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1621 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1622 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1623 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1624 }
1625
1626 /*
1627 * Step 3:
1628 * Swap out old policy for new policy
1629 */
1630
1631 struct thread_effective_policy prev = thread->effective_policy;
1632
1633 thread_update_qos_cpu_time_locked(thread);
1634
1635 /* This is the point where the new values become visible to other threads */
1636 thread->effective_policy = next;
1637
1638 /*
1639 * Step 4:
1640 * Pend updates that can't be done while holding the thread lock
1641 */
1642
1643 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1644 pend_token->tpt_update_sockets = 1;
1645 }
1646
1647 /* TODO: Doesn't this only need to be done if the throttle went up? */
1648 if (prev.thep_io_tier != next.thep_io_tier) {
1649 pend_token->tpt_update_throttle = 1;
1650 }
1651
1652 /*
1653 * Check for the attributes that sfi_thread_classify() consults,
1654 * and trigger SFI re-evaluation.
1655 */
1656 if (prev.thep_qos != next.thep_qos ||
1657 prev.thep_darwinbg != next.thep_darwinbg) {
1658 pend_token->tpt_update_thread_sfi = 1;
1659 }
1660
1661 /*
1662 * Step 5:
1663 * Update other subsystems as necessary if something has changed
1664 */
1665
1666 /* Check for the attributes that thread_recompute_priority() consults */
1667 if (prev.thep_qos != next.thep_qos ||
1668 prev.thep_qos_relprio != next.thep_qos_relprio ||
1669 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1670 prev.thep_terminated != next.thep_terminated ||
1671 pend_token->tpt_force_recompute_pri == 1 ||
1672 recompute_priority) {
1673 thread_recompute_priority(thread);
1674 }
1675 }
1676
1677
1678 /*
1679 * Initiate a thread policy state transition on a thread with its TID
1680 * Useful if you cannot guarantee the thread won't get terminated
1681 * Precondition: No locks are held
1682 * Will take task lock - using the non-tid variant is faster
1683 * if you already have a thread ref.
1684 */
1685 void
1686 proc_set_thread_policy_with_tid(task_t task,
1687 uint64_t tid,
1688 int category,
1689 int flavor,
1690 int value)
1691 {
1692 /* takes task lock, returns ref'ed thread or NULL */
1693 thread_t thread = task_findtid(task, tid);
1694
1695 if (thread == THREAD_NULL) {
1696 return;
1697 }
1698
1699 proc_set_thread_policy(thread, category, flavor, value);
1700
1701 thread_deallocate(thread);
1702 }
1703
1704 /*
1705 * Initiate a thread policy transition on a thread
1706 * This path supports networking transitions (i.e. darwinbg transitions)
1707 * Precondition: No locks are held
1708 */
1709 void
1710 proc_set_thread_policy(thread_t thread,
1711 int category,
1712 int flavor,
1713 int value)
1714 {
1715 struct task_pend_token pend_token = {};
1716
1717 thread_mtx_lock(thread);
1718
1719 proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1720
1721 thread_mtx_unlock(thread);
1722
1723 thread_policy_update_complete_unlocked(thread, &pend_token);
1724 }
1725
1726 /*
1727 * Do the things that can't be done while holding a thread mutex.
1728 * These are set up to call back into thread policy to get the latest value,
1729 * so they don't have to be synchronized with the update.
1730 * The only required semantic is 'call this sometime after updating effective policy'
1731 *
1732 * Precondition: Thread mutex is not held
1733 *
1734 * This may be called with the task lock held, but in that case it won't be
1735 * called with tpt_update_sockets set.
1736 */
1737 void
1738 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1739 {
1740 #ifdef MACH_BSD
1741 if (pend_token->tpt_update_sockets) {
1742 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1743 }
1744 #endif /* MACH_BSD */
1745
1746 if (pend_token->tpt_update_throttle) {
1747 rethrottle_thread(thread->uthread);
1748 }
1749
1750 if (pend_token->tpt_update_thread_sfi) {
1751 sfi_reevaluate(thread);
1752 }
1753 }
1754
1755 /*
1756 * Set and update thread policy
1757 * Thread mutex might be held
1758 */
1759 static void
1760 proc_set_thread_policy_locked(thread_t thread,
1761 int category,
1762 int flavor,
1763 int value,
1764 int value2,
1765 task_pend_token_t pend_token)
1766 {
1767 spl_t s = splsched();
1768 thread_lock(thread);
1769
1770 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1771
1772 thread_unlock(thread);
1773 splx(s);
1774 }
1775
1776 /*
1777 * Set and update thread policy
1778 * Thread spinlock is held
1779 */
1780 static void
1781 proc_set_thread_policy_spinlocked(thread_t thread,
1782 int category,
1783 int flavor,
1784 int value,
1785 int value2,
1786 task_pend_token_t pend_token)
1787 {
1788 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1789 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1790 thread_tid(thread), threquested_0(thread),
1791 threquested_1(thread), value, 0);
1792
1793 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2);
1794
1795 thread_policy_update_spinlocked(thread, FALSE, pend_token);
1796
1797 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1798 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1799 thread_tid(thread), threquested_0(thread),
1800 threquested_1(thread), tpending(pend_token), 0);
1801 }
1802
1803 /*
1804 * Set the requested state for a specific flavor to a specific value.
1805 */
1806 static void
1807 thread_set_requested_policy_spinlocked(thread_t thread,
1808 int category,
1809 int flavor,
1810 int value,
1811 int value2)
1812 {
1813 int tier, passive;
1814
1815 struct thread_requested_policy requested = thread->requested_policy;
1816
1817 switch (flavor) {
1818 /* Category: EXTERNAL and INTERNAL, thread and task */
1819
1820 case TASK_POLICY_DARWIN_BG:
1821 if (category == TASK_POLICY_EXTERNAL) {
1822 requested.thrp_ext_darwinbg = value;
1823 } else {
1824 requested.thrp_int_darwinbg = value;
1825 }
1826 break;
1827
1828 case TASK_POLICY_IOPOL:
1829 proc_iopol_to_tier(value, &tier, &passive);
1830 if (category == TASK_POLICY_EXTERNAL) {
1831 requested.thrp_ext_iotier = tier;
1832 requested.thrp_ext_iopassive = passive;
1833 } else {
1834 requested.thrp_int_iotier = tier;
1835 requested.thrp_int_iopassive = passive;
1836 }
1837 break;
1838
1839 case TASK_POLICY_IO:
1840 if (category == TASK_POLICY_EXTERNAL) {
1841 requested.thrp_ext_iotier = value;
1842 } else {
1843 requested.thrp_int_iotier = value;
1844 }
1845 break;
1846
1847 case TASK_POLICY_PASSIVE_IO:
1848 if (category == TASK_POLICY_EXTERNAL) {
1849 requested.thrp_ext_iopassive = value;
1850 } else {
1851 requested.thrp_int_iopassive = value;
1852 }
1853 break;
1854
1855 /* Category: ATTRIBUTE, thread only */
1856
1857 case TASK_POLICY_PIDBIND_BG:
1858 assert(category == TASK_POLICY_ATTRIBUTE);
1859 requested.thrp_pidbind_bg = value;
1860 break;
1861
1862 case TASK_POLICY_LATENCY_QOS:
1863 assert(category == TASK_POLICY_ATTRIBUTE);
1864 requested.thrp_latency_qos = value;
1865 break;
1866
1867 case TASK_POLICY_THROUGH_QOS:
1868 assert(category == TASK_POLICY_ATTRIBUTE);
1869 requested.thrp_through_qos = value;
1870 break;
1871
1872 case TASK_POLICY_QOS:
1873 assert(category == TASK_POLICY_ATTRIBUTE);
1874 requested.thrp_qos = value;
1875 break;
1876
1877 case TASK_POLICY_QOS_OVERRIDE:
1878 assert(category == TASK_POLICY_ATTRIBUTE);
1879 requested.thrp_qos_override = value;
1880 break;
1881
1882 case TASK_POLICY_QOS_AND_RELPRIO:
1883 assert(category == TASK_POLICY_ATTRIBUTE);
1884 requested.thrp_qos = value;
1885 requested.thrp_qos_relprio = value2;
1886 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1887 break;
1888
1889 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1890 assert(category == TASK_POLICY_ATTRIBUTE);
1891 requested.thrp_qos_workq_override = value;
1892 break;
1893
1894 case TASK_POLICY_QOS_PROMOTE:
1895 assert(category == TASK_POLICY_ATTRIBUTE);
1896 requested.thrp_qos_promote = value;
1897 break;
1898
1899 case TASK_POLICY_QOS_IPC_OVERRIDE:
1900 assert(category == TASK_POLICY_ATTRIBUTE);
1901 requested.thrp_qos_ipc_override = value;
1902 break;
1903
1904 case TASK_POLICY_TERMINATED:
1905 assert(category == TASK_POLICY_ATTRIBUTE);
1906 requested.thrp_terminated = value;
1907 break;
1908
1909 default:
1910 panic("unknown task policy: %d %d %d", category, flavor, value);
1911 break;
1912 }
1913
1914 thread->requested_policy = requested;
1915 }
1916
1917 /*
1918 * Gets what you set. Effective values may be different.
1919 * Precondition: No locks are held
1920 */
1921 int
1922 proc_get_thread_policy(thread_t thread,
1923 int category,
1924 int flavor)
1925 {
1926 int value = 0;
1927 thread_mtx_lock(thread);
1928 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
1929 thread_mtx_unlock(thread);
1930 return value;
1931 }
1932
1933 static int
1934 proc_get_thread_policy_locked(thread_t thread,
1935 int category,
1936 int flavor,
1937 int* value2)
1938 {
1939 int value = 0;
1940
1941 spl_t s = splsched();
1942 thread_lock(thread);
1943
1944 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
1945
1946 thread_unlock(thread);
1947 splx(s);
1948
1949 return value;
1950 }
1951
1952 /*
1953 * Gets what you set. Effective values may be different.
1954 */
1955 static int
1956 thread_get_requested_policy_spinlocked(thread_t thread,
1957 int category,
1958 int flavor,
1959 int* value2)
1960 {
1961 int value = 0;
1962
1963 struct thread_requested_policy requested = thread->requested_policy;
1964
1965 switch (flavor) {
1966 case TASK_POLICY_DARWIN_BG:
1967 if (category == TASK_POLICY_EXTERNAL) {
1968 value = requested.thrp_ext_darwinbg;
1969 } else {
1970 value = requested.thrp_int_darwinbg;
1971 }
1972 break;
1973 case TASK_POLICY_IOPOL:
1974 if (category == TASK_POLICY_EXTERNAL) {
1975 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
1976 requested.thrp_ext_iopassive);
1977 } else {
1978 value = proc_tier_to_iopol(requested.thrp_int_iotier,
1979 requested.thrp_int_iopassive);
1980 }
1981 break;
1982 case TASK_POLICY_IO:
1983 if (category == TASK_POLICY_EXTERNAL) {
1984 value = requested.thrp_ext_iotier;
1985 } else {
1986 value = requested.thrp_int_iotier;
1987 }
1988 break;
1989 case TASK_POLICY_PASSIVE_IO:
1990 if (category == TASK_POLICY_EXTERNAL) {
1991 value = requested.thrp_ext_iopassive;
1992 } else {
1993 value = requested.thrp_int_iopassive;
1994 }
1995 break;
1996 case TASK_POLICY_QOS:
1997 assert(category == TASK_POLICY_ATTRIBUTE);
1998 value = requested.thrp_qos;
1999 break;
2000 case TASK_POLICY_QOS_OVERRIDE:
2001 assert(category == TASK_POLICY_ATTRIBUTE);
2002 value = requested.thrp_qos_override;
2003 break;
2004 case TASK_POLICY_LATENCY_QOS:
2005 assert(category == TASK_POLICY_ATTRIBUTE);
2006 value = requested.thrp_latency_qos;
2007 break;
2008 case TASK_POLICY_THROUGH_QOS:
2009 assert(category == TASK_POLICY_ATTRIBUTE);
2010 value = requested.thrp_through_qos;
2011 break;
2012 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2013 assert(category == TASK_POLICY_ATTRIBUTE);
2014 value = requested.thrp_qos_workq_override;
2015 break;
2016 case TASK_POLICY_QOS_AND_RELPRIO:
2017 assert(category == TASK_POLICY_ATTRIBUTE);
2018 assert(value2 != NULL);
2019 value = requested.thrp_qos;
2020 *value2 = requested.thrp_qos_relprio;
2021 break;
2022 case TASK_POLICY_QOS_PROMOTE:
2023 assert(category == TASK_POLICY_ATTRIBUTE);
2024 value = requested.thrp_qos_promote;
2025 break;
2026 case TASK_POLICY_QOS_IPC_OVERRIDE:
2027 assert(category == TASK_POLICY_ATTRIBUTE);
2028 value = requested.thrp_qos_ipc_override;
2029 break;
2030 case TASK_POLICY_TERMINATED:
2031 assert(category == TASK_POLICY_ATTRIBUTE);
2032 value = requested.thrp_terminated;
2033 break;
2034
2035 default:
2036 panic("unknown policy_flavor %d", flavor);
2037 break;
2038 }
2039
2040 return value;
2041 }
2042
2043 /*
2044 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2045 *
2046 * NOTE: This accessor does not take the task or thread lock.
2047 * Notifications of state updates need to be externally synchronized with state queries.
2048 * This routine *MUST* remain interrupt safe, as it is potentially invoked
2049 * within the context of a timer interrupt.
2050 *
2051 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2052 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2053 * I don't think that cost is worth not having the right answer.
2054 */
2055 int
2056 proc_get_effective_thread_policy(thread_t thread,
2057 int flavor)
2058 {
2059 int value = 0;
2060
2061 switch (flavor) {
2062 case TASK_POLICY_DARWIN_BG:
2063 /*
2064 * This call is used within the timer layer, as well as
2065 * prioritizing requests to the graphics system.
2066 * It also informs SFI and originator-bg-state.
2067 * Returns 1 for background mode, 0 for normal mode
2068 */
2069
2070 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2071 break;
2072 case TASK_POLICY_IO:
2073 /*
2074 * The I/O system calls here to find out what throttling tier to apply to an operation.
2075 * Returns THROTTLE_LEVEL_* values
2076 */
2077 value = thread->effective_policy.thep_io_tier;
2078 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2079 value = MIN(value, thread->iotier_override);
2080 }
2081 break;
2082 case TASK_POLICY_PASSIVE_IO:
2083 /*
2084 * The I/O system calls here to find out whether an operation should be passive.
2085 * (i.e. not cause operations with lower throttle tiers to be throttled)
2086 * Returns 1 for passive mode, 0 for normal mode
2087 *
2088 * If an override is causing IO to go into a lower tier, we also set
2089 * the passive bit so that a thread doesn't end up stuck in its own throttle
2090 * window when the override goes away.
2091 */
2092 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2093 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2094 thread->iotier_override < thread->effective_policy.thep_io_tier) {
2095 value = 1;
2096 }
2097 break;
2098 case TASK_POLICY_ALL_SOCKETS_BG:
2099 /*
2100 * do_background_socket() calls this to determine whether
2101 * it should change the thread's sockets
2102 * Returns 1 for background mode, 0 for normal mode
2103 * This consults both thread and task so un-DBGing a thread while the task is BG
2104 * doesn't get you out of the network throttle.
2105 */
2106 value = (thread->effective_policy.thep_all_sockets_bg ||
2107 thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2108 break;
2109 case TASK_POLICY_NEW_SOCKETS_BG:
2110 /*
2111 * socreate() calls this to determine if it should mark a new socket as background
2112 * Returns 1 for background mode, 0 for normal mode
2113 */
2114 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2115 break;
2116 case TASK_POLICY_LATENCY_QOS:
2117 /*
2118 * timer arming calls into here to find out the timer coalescing level
2119 * Returns a latency QoS tier (0-6)
2120 */
2121 value = thread->effective_policy.thep_latency_qos;
2122 break;
2123 case TASK_POLICY_THROUGH_QOS:
2124 /*
2125 * This value is passed into the urgency callout from the scheduler
2126 * to the performance management subsystem.
2127 *
2128 * Returns a throughput QoS tier (0-6)
2129 */
2130 value = thread->effective_policy.thep_through_qos;
2131 break;
2132 case TASK_POLICY_QOS:
2133 /*
2134 * This is communicated to the performance management layer and SFI.
2135 *
2136 * Returns a QoS policy tier
2137 */
2138 value = thread->effective_policy.thep_qos;
2139 break;
2140 default:
2141 panic("unknown thread policy flavor %d", flavor);
2142 break;
2143 }
2144
2145 return value;
2146 }
2147
2148
2149 /*
2150 * (integer_t) casts limit the number of bits we can fit here
2151 * this interface is deprecated and replaced by the _EXT struct ?
2152 */
2153 static void
2154 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2155 {
2156 uint64_t bits = 0;
2157 struct thread_requested_policy requested = thread->requested_policy;
2158
2159 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2160 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2161 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2162 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2163 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2164 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2165
2166 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2167 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2168
2169 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2170
2171 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2172 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2173
2174 info->requested = (integer_t) bits;
2175 bits = 0;
2176
2177 struct thread_effective_policy effective = thread->effective_policy;
2178
2179 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2180
2181 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2182 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2183 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2184 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2185
2186 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2187
2188 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2189 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2190
2191 info->effective = (integer_t)bits;
2192 bits = 0;
2193
2194 info->pending = 0;
2195 }
2196
2197 /*
2198 * Sneakily trace either the task and thread requested
2199 * or just the thread requested, depending on if we have enough room.
2200 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2201 *
2202 * LP32 LP64
2203 * threquested_0(thread) thread[0] task[0]
2204 * threquested_1(thread) thread[1] thread[0]
2205 *
2206 */
2207
2208 uintptr_t
2209 threquested_0(thread_t thread)
2210 {
2211 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2212
2213 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2214
2215 return raw[0];
2216 }
2217
2218 uintptr_t
2219 threquested_1(thread_t thread)
2220 {
2221 #if defined __LP64__
2222 return *(uintptr_t*)&thread->task->requested_policy;
2223 #else
2224 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2225 return raw[1];
2226 #endif
2227 }
2228
2229 uintptr_t
2230 theffective_0(thread_t thread)
2231 {
2232 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2233
2234 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2235 return raw[0];
2236 }
2237
2238 uintptr_t
2239 theffective_1(thread_t thread)
2240 {
2241 #if defined __LP64__
2242 return *(uintptr_t*)&thread->task->effective_policy;
2243 #else
2244 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2245 return raw[1];
2246 #endif
2247 }
2248
2249
2250 /*
2251 * Set an override on the thread which is consulted with a
2252 * higher priority than the task/thread policy. This should
2253 * only be set for temporary grants until the thread
2254 * returns to the userspace boundary
2255 *
2256 * We use atomic operations to swap in the override, with
2257 * the assumption that the thread itself can
2258 * read the override and clear it on return to userspace.
2259 *
2260 * No locking is performed, since it is acceptable to see
2261 * a stale override for one loop through throttle_lowpri_io().
2262 * However a thread reference must be held on the thread.
2263 */
2264
2265 void
2266 set_thread_iotier_override(thread_t thread, int policy)
2267 {
2268 int current_override;
2269
2270 /* Let most aggressive I/O policy win until user boundary */
2271 do {
2272 current_override = thread->iotier_override;
2273
2274 if (current_override != THROTTLE_LEVEL_NONE) {
2275 policy = MIN(current_override, policy);
2276 }
2277
2278 if (current_override == policy) {
2279 /* no effective change */
2280 return;
2281 }
2282 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2283
2284 /*
2285 * Since the thread may be currently throttled,
2286 * re-evaluate tiers and potentially break out
2287 * of an msleep
2288 */
2289 rethrottle_thread(thread->uthread);
2290 }
2291
2292 /*
2293 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2294 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2295 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2296 * priority thread. In these cases, we attempt to propagate the priority token, as long
2297 * as the subsystem informs us of the relationships between the threads. The userspace
2298 * synchronization subsystem should maintain the information of owner->resource and
2299 * resource->waiters itself.
2300 */
2301
2302 /*
2303 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2304 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2305 * to be handled specially in the future, but for now it's fine to slam
2306 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2307 */
2308 static void
2309 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2310 {
2311 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2312 /* Map all input resource/type to a single one */
2313 *resource = USER_ADDR_NULL;
2314 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2315 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2316 /* no transform */
2317 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2318 /* Map all mutex overrides to a single one, to avoid memory overhead */
2319 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2320 *resource = USER_ADDR_NULL;
2321 }
2322 }
2323 }
2324
2325 /* This helper routine finds an existing override if known. Locking should be done by caller */
2326 static struct thread_qos_override *
2327 find_qos_override(thread_t thread,
2328 user_addr_t resource,
2329 int resource_type)
2330 {
2331 struct thread_qos_override *override;
2332
2333 override = thread->overrides;
2334 while (override) {
2335 if (override->override_resource == resource &&
2336 override->override_resource_type == resource_type) {
2337 return override;
2338 }
2339
2340 override = override->override_next;
2341 }
2342
2343 return NULL;
2344 }
2345
2346 static void
2347 find_and_decrement_qos_override(thread_t thread,
2348 user_addr_t resource,
2349 int resource_type,
2350 boolean_t reset,
2351 struct thread_qos_override **free_override_list)
2352 {
2353 struct thread_qos_override *override, *override_prev;
2354
2355 override_prev = NULL;
2356 override = thread->overrides;
2357 while (override) {
2358 struct thread_qos_override *override_next = override->override_next;
2359
2360 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2361 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2362 if (reset) {
2363 override->override_contended_resource_count = 0;
2364 } else {
2365 override->override_contended_resource_count--;
2366 }
2367
2368 if (override->override_contended_resource_count == 0) {
2369 if (override_prev == NULL) {
2370 thread->overrides = override_next;
2371 } else {
2372 override_prev->override_next = override_next;
2373 }
2374
2375 /* Add to out-param for later zfree */
2376 override->override_next = *free_override_list;
2377 *free_override_list = override;
2378 } else {
2379 override_prev = override;
2380 }
2381
2382 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2383 return;
2384 }
2385 } else {
2386 override_prev = override;
2387 }
2388
2389 override = override_next;
2390 }
2391 }
2392
2393 /* This helper recalculates the current requested override using the policy selected at boot */
2394 static int
2395 calculate_requested_qos_override(thread_t thread)
2396 {
2397 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2398 return THREAD_QOS_UNSPECIFIED;
2399 }
2400
2401 /* iterate over all overrides and calculate MAX */
2402 struct thread_qos_override *override;
2403 int qos_override = THREAD_QOS_UNSPECIFIED;
2404
2405 override = thread->overrides;
2406 while (override) {
2407 qos_override = MAX(qos_override, override->override_qos);
2408 override = override->override_next;
2409 }
2410
2411 return qos_override;
2412 }
2413
2414 /*
2415 * Returns:
2416 * - 0 on success
2417 * - EINVAL if some invalid input was passed
2418 */
2419 static int
2420 proc_thread_qos_add_override_internal(thread_t thread,
2421 int override_qos,
2422 boolean_t first_override_for_resource,
2423 user_addr_t resource,
2424 int resource_type)
2425 {
2426 struct task_pend_token pend_token = {};
2427 int rc = 0;
2428
2429 thread_mtx_lock(thread);
2430
2431 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2432 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2433
2434 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2435 uint64_t, thread->requested_policy.thrp_qos,
2436 uint64_t, thread->effective_policy.thep_qos,
2437 int, override_qos, boolean_t, first_override_for_resource);
2438
2439 struct thread_qos_override *override;
2440 struct thread_qos_override *override_new = NULL;
2441 int new_qos_override, prev_qos_override;
2442 int new_effective_qos;
2443
2444 canonicalize_resource_and_type(&resource, &resource_type);
2445
2446 override = find_qos_override(thread, resource, resource_type);
2447 if (first_override_for_resource && !override) {
2448 /* We need to allocate a new object. Drop the thread lock and
2449 * recheck afterwards in case someone else added the override
2450 */
2451 thread_mtx_unlock(thread);
2452 override_new = zalloc(thread_qos_override_zone);
2453 thread_mtx_lock(thread);
2454 override = find_qos_override(thread, resource, resource_type);
2455 }
2456 if (first_override_for_resource && override) {
2457 /* Someone else already allocated while the thread lock was dropped */
2458 override->override_contended_resource_count++;
2459 } else if (!override && override_new) {
2460 override = override_new;
2461 override_new = NULL;
2462 override->override_next = thread->overrides;
2463 /* since first_override_for_resource was TRUE */
2464 override->override_contended_resource_count = 1;
2465 override->override_resource = resource;
2466 override->override_resource_type = resource_type;
2467 override->override_qos = THREAD_QOS_UNSPECIFIED;
2468 thread->overrides = override;
2469 }
2470
2471 if (override) {
2472 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2473 override->override_qos = override_qos;
2474 } else {
2475 override->override_qos = MAX(override->override_qos, override_qos);
2476 }
2477 }
2478
2479 /* Determine how to combine the various overrides into a single current
2480 * requested override
2481 */
2482 new_qos_override = calculate_requested_qos_override(thread);
2483
2484 prev_qos_override = proc_get_thread_policy_locked(thread,
2485 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2486
2487 if (new_qos_override != prev_qos_override) {
2488 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2489 TASK_POLICY_QOS_OVERRIDE,
2490 new_qos_override, 0, &pend_token);
2491 }
2492
2493 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2494
2495 thread_mtx_unlock(thread);
2496
2497 thread_policy_update_complete_unlocked(thread, &pend_token);
2498
2499 if (override_new) {
2500 zfree(thread_qos_override_zone, override_new);
2501 }
2502
2503 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2504 int, new_qos_override, int, new_effective_qos, int, rc);
2505
2506 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2507 new_qos_override, resource, resource_type, 0, 0);
2508
2509 return rc;
2510 }
2511
2512 int
2513 proc_thread_qos_add_override(task_t task,
2514 thread_t thread,
2515 uint64_t tid,
2516 int override_qos,
2517 boolean_t first_override_for_resource,
2518 user_addr_t resource,
2519 int resource_type)
2520 {
2521 boolean_t has_thread_reference = FALSE;
2522 int rc = 0;
2523
2524 if (thread == THREAD_NULL) {
2525 thread = task_findtid(task, tid);
2526 /* returns referenced thread */
2527
2528 if (thread == THREAD_NULL) {
2529 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2530 tid, 0, 0xdead, 0, 0);
2531 return ESRCH;
2532 }
2533 has_thread_reference = TRUE;
2534 } else {
2535 assert(thread->task == task);
2536 }
2537 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2538 first_override_for_resource, resource, resource_type);
2539 if (has_thread_reference) {
2540 thread_deallocate(thread);
2541 }
2542
2543 return rc;
2544 }
2545
2546 static void
2547 proc_thread_qos_remove_override_internal(thread_t thread,
2548 user_addr_t resource,
2549 int resource_type,
2550 boolean_t reset)
2551 {
2552 struct task_pend_token pend_token = {};
2553
2554 struct thread_qos_override *deferred_free_override_list = NULL;
2555 int new_qos_override, prev_qos_override, new_effective_qos;
2556
2557 thread_mtx_lock(thread);
2558
2559 canonicalize_resource_and_type(&resource, &resource_type);
2560
2561 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2562
2563 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2564 thread_tid(thread), resource, reset, 0, 0);
2565
2566 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2567 uint64_t, thread->requested_policy.thrp_qos,
2568 uint64_t, thread->effective_policy.thep_qos);
2569
2570 /* Determine how to combine the various overrides into a single current requested override */
2571 new_qos_override = calculate_requested_qos_override(thread);
2572
2573 spl_t s = splsched();
2574 thread_lock(thread);
2575
2576 /*
2577 * The override chain and therefore the value of the current override is locked with thread mutex,
2578 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2579 * This means you can't change the current override from a spinlock-only setter.
2580 */
2581 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2582
2583 if (new_qos_override != prev_qos_override) {
2584 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2585 }
2586
2587 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2588
2589 thread_unlock(thread);
2590 splx(s);
2591
2592 thread_mtx_unlock(thread);
2593
2594 thread_policy_update_complete_unlocked(thread, &pend_token);
2595
2596 while (deferred_free_override_list) {
2597 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2598
2599 zfree(thread_qos_override_zone, deferred_free_override_list);
2600 deferred_free_override_list = override_next;
2601 }
2602
2603 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2604 int, new_qos_override, int, new_effective_qos);
2605
2606 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2607 thread_tid(thread), 0, 0, 0, 0);
2608 }
2609
2610 int
2611 proc_thread_qos_remove_override(task_t task,
2612 thread_t thread,
2613 uint64_t tid,
2614 user_addr_t resource,
2615 int resource_type)
2616 {
2617 boolean_t has_thread_reference = FALSE;
2618
2619 if (thread == THREAD_NULL) {
2620 thread = task_findtid(task, tid);
2621 /* returns referenced thread */
2622
2623 if (thread == THREAD_NULL) {
2624 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2625 tid, 0, 0xdead, 0, 0);
2626 return ESRCH;
2627 }
2628 has_thread_reference = TRUE;
2629 } else {
2630 assert(task == thread->task);
2631 }
2632
2633 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2634
2635 if (has_thread_reference) {
2636 thread_deallocate(thread);
2637 }
2638
2639 return 0;
2640 }
2641
2642 /* Deallocate before thread termination */
2643 void
2644 proc_thread_qos_deallocate(thread_t thread)
2645 {
2646 /* This thread must have no more IPC overrides. */
2647 assert(thread->ipc_overrides == 0);
2648 assert(thread->requested_policy.thrp_qos_ipc_override == THREAD_QOS_UNSPECIFIED);
2649 assert(thread->sync_ipc_overrides == 0);
2650 assert(thread->requested_policy.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
2651
2652 /*
2653 * Clear out any lingering override objects.
2654 */
2655 struct thread_qos_override *override;
2656
2657 thread_mtx_lock(thread);
2658 override = thread->overrides;
2659 thread->overrides = NULL;
2660 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2661 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2662 thread_mtx_unlock(thread);
2663
2664 while (override) {
2665 struct thread_qos_override *override_next = override->override_next;
2666
2667 zfree(thread_qos_override_zone, override);
2668 override = override_next;
2669 }
2670 }
2671
2672 /*
2673 * Set up the primordial thread's QoS
2674 */
2675 void
2676 task_set_main_thread_qos(task_t task, thread_t thread)
2677 {
2678 struct task_pend_token pend_token = {};
2679
2680 assert(thread->task == task);
2681
2682 thread_mtx_lock(thread);
2683
2684 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2685 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2686 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2687 thread->requested_policy.thrp_qos, 0);
2688
2689 int primordial_qos = task_compute_main_thread_qos(task);
2690
2691 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS,
2692 primordial_qos, 0, &pend_token);
2693
2694 thread_mtx_unlock(thread);
2695
2696 thread_policy_update_complete_unlocked(thread, &pend_token);
2697
2698 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2699 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2700 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2701 primordial_qos, 0);
2702 }
2703
2704 /*
2705 * KPI for pthread kext
2706 *
2707 * Return a good guess at what the initial manager QoS will be
2708 * Dispatch can override this in userspace if it so chooses
2709 */
2710 int
2711 task_get_default_manager_qos(task_t task)
2712 {
2713 int primordial_qos = task_compute_main_thread_qos(task);
2714
2715 if (primordial_qos == THREAD_QOS_LEGACY) {
2716 primordial_qos = THREAD_QOS_USER_INITIATED;
2717 }
2718
2719 return primordial_qos;
2720 }
2721
2722 /*
2723 * Check if the user promotion on thread has changed
2724 * and apply it.
2725 *
2726 * thread locked on entry, might drop the thread lock
2727 * and reacquire it.
2728 */
2729 boolean_t
2730 thread_recompute_user_promotion_locked(thread_t thread)
2731 {
2732 boolean_t needs_update = FALSE;
2733 struct task_pend_token pend_token = {};
2734 int user_promotion_basepri = MIN(thread_get_inheritor_turnstile_priority(thread), MAXPRI_USER);
2735 int old_base_pri = thread->base_pri;
2736 thread_qos_t qos_promotion;
2737
2738 /* Check if user promotion has changed */
2739 if (thread->user_promotion_basepri == user_promotion_basepri) {
2740 return needs_update;
2741 } else {
2742 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2743 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2744 thread_tid(thread),
2745 user_promotion_basepri,
2746 thread->user_promotion_basepri,
2747 0, 0);
2748 }
2749
2750 /* Update the user promotion base pri */
2751 thread->user_promotion_basepri = user_promotion_basepri;
2752 pend_token.tpt_force_recompute_pri = 1;
2753
2754 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2755 qos_promotion = THREAD_QOS_UNSPECIFIED;
2756 } else {
2757 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2758 }
2759
2760 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2761 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2762
2763 if (thread_get_waiting_turnstile(thread) &&
2764 thread->base_pri != old_base_pri) {
2765 needs_update = TRUE;
2766 }
2767
2768 thread_unlock(thread);
2769
2770 thread_policy_update_complete_unlocked(thread, &pend_token);
2771
2772 thread_lock(thread);
2773
2774 return needs_update;
2775 }
2776
2777 /*
2778 * Convert the thread user promotion base pri to qos for threads in qos world.
2779 * For priority above UI qos, the qos would be set to UI.
2780 */
2781 thread_qos_t
2782 thread_user_promotion_qos_for_pri(int priority)
2783 {
2784 int qos;
2785 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2786 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2787 return qos;
2788 }
2789 }
2790 return THREAD_QOS_MAINTENANCE;
2791 }
2792
2793 /*
2794 * Set the thread's QoS IPC override
2795 * Owned by the IPC subsystem
2796 *
2797 * May be called with spinlocks held, but not spinlocks
2798 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2799 *
2800 * One 'add' must be balanced by one 'drop'.
2801 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2802 * Before the thread is deallocated, there must be 0 remaining overrides.
2803 */
2804 static void
2805 thread_ipc_override(thread_t thread,
2806 uint32_t qos_override,
2807 boolean_t is_new_override)
2808 {
2809 struct task_pend_token pend_token = {};
2810 boolean_t needs_update;
2811
2812 spl_t s = splsched();
2813 thread_lock(thread);
2814
2815 uint32_t old_override = thread->requested_policy.thrp_qos_ipc_override;
2816
2817 assert(qos_override > THREAD_QOS_UNSPECIFIED);
2818 assert(qos_override < THREAD_QOS_LAST);
2819
2820 if (is_new_override) {
2821 if (thread->ipc_overrides++ == 0) {
2822 /* This add is the first override for this thread */
2823 assert(old_override == THREAD_QOS_UNSPECIFIED);
2824 } else {
2825 /* There are already other overrides in effect for this thread */
2826 assert(old_override > THREAD_QOS_UNSPECIFIED);
2827 }
2828 } else {
2829 /* There must be at least one override (the previous add call) in effect */
2830 assert(thread->ipc_overrides > 0);
2831 assert(old_override > THREAD_QOS_UNSPECIFIED);
2832 }
2833
2834 /*
2835 * We can't allow lowering if there are several IPC overrides because
2836 * the caller can't possibly know the whole truth
2837 */
2838 if (thread->ipc_overrides == 1) {
2839 needs_update = qos_override != old_override;
2840 } else {
2841 needs_update = qos_override > old_override;
2842 }
2843
2844 if (needs_update) {
2845 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2846 TASK_POLICY_QOS_IPC_OVERRIDE,
2847 qos_override, 0, &pend_token);
2848 assert(pend_token.tpt_update_sockets == 0);
2849 }
2850
2851 thread_unlock(thread);
2852 splx(s);
2853
2854 thread_policy_update_complete_unlocked(thread, &pend_token);
2855 }
2856
2857 void
2858 thread_add_ipc_override(thread_t thread,
2859 uint32_t qos_override)
2860 {
2861 thread_ipc_override(thread, qos_override, TRUE);
2862 }
2863
2864 void
2865 thread_update_ipc_override(thread_t thread,
2866 uint32_t qos_override)
2867 {
2868 thread_ipc_override(thread, qos_override, FALSE);
2869 }
2870
2871 void
2872 thread_drop_ipc_override(thread_t thread)
2873 {
2874 struct task_pend_token pend_token = {};
2875
2876 spl_t s = splsched();
2877 thread_lock(thread);
2878
2879 assert(thread->ipc_overrides > 0);
2880
2881 if (--thread->ipc_overrides == 0) {
2882 /*
2883 * There are no more overrides for this thread, so we should
2884 * clear out the saturated override value
2885 */
2886
2887 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2888 TASK_POLICY_QOS_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
2889 0, &pend_token);
2890 }
2891
2892 thread_unlock(thread);
2893 splx(s);
2894
2895 thread_policy_update_complete_unlocked(thread, &pend_token);
2896 }
2897
2898 /* Get current requested qos / relpri, may be called from spinlock context */
2899 thread_qos_t
2900 thread_get_requested_qos(thread_t thread, int *relpri)
2901 {
2902 int relprio_value = 0;
2903 thread_qos_t qos;
2904
2905 qos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2906 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
2907 if (relpri) {
2908 *relpri = -relprio_value;
2909 }
2910 return qos;
2911 }
2912
2913 /*
2914 * This function will promote the thread priority
2915 * since exec could block other threads calling
2916 * proc_find on the proc. This boost must be removed
2917 * via call to thread_clear_exec_promotion.
2918 *
2919 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
2920 */
2921 void
2922 thread_set_exec_promotion(thread_t thread)
2923 {
2924 spl_t s = splsched();
2925 thread_lock(thread);
2926
2927 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
2928
2929 thread_unlock(thread);
2930 splx(s);
2931 }
2932
2933 /*
2934 * This function will clear the exec thread
2935 * promotion set on the thread by thread_set_exec_promotion.
2936 */
2937 void
2938 thread_clear_exec_promotion(thread_t thread)
2939 {
2940 spl_t s = splsched();
2941 thread_lock(thread);
2942
2943 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
2944
2945 thread_unlock(thread);
2946 splx(s);
2947 }