]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/thread_policy.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / kern / thread_policy.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <mach/task_policy.h>
37 #include <kern/sfi.h>
38 #include <kern/policy_internal.h>
39 #include <sys/errno.h>
40 #include <sys/ulock.h>
41
42 #include <mach/machine/sdt.h>
43
44 #ifdef MACH_BSD
45 extern int proc_selfpid(void);
46 extern char * proc_name_address(void *p);
47 extern void rethrottle_thread(void * uthread);
48 #endif /* MACH_BSD */
49
50 #define QOS_EXTRACT(q) ((q) & 0xff)
51
52 uint32_t qos_override_mode;
53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
57
58 extern zone_t thread_qos_override_zone;
59
60 static void
61 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
62
63 /*
64 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
65 * to threads that don't have a QoS class set.
66 */
67 const qos_policy_params_t thread_qos_policy_params = {
68 /*
69 * This table defines the starting base priority of the thread,
70 * which will be modified by the thread importance and the task max priority
71 * before being applied.
72 */
73 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
74 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
75 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
76 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
77 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
78 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
79 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
80
81 /*
82 * This table defines the highest IO priority that a thread marked with this
83 * QoS class can have.
84 */
85 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
86 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
87 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
88 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
89 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
90 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
91 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
92
93 /*
94 * This table defines the highest QoS level that
95 * a thread marked with this QoS class can have.
96 */
97
98 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
99 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
100 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
101 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
102 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
103 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
104 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
105
106 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
107 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
108 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
109 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
110 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
111 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
112 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
113 };
114
115 static void
116 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
117
118 static int
119 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
120
121 static void
122 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
123
124 static void
125 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
126
127 static void
128 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
129
130 static void
131 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
132
133 static int
134 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
135
136 static int
137 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
138
139 static void
140 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
141
142 static void
143 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token);
144
145 void
146 thread_policy_init(void)
147 {
148 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
149 printf("QOS override mode: 0x%08x\n", qos_override_mode);
150 } else {
151 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
152 }
153 }
154
155 boolean_t
156 thread_has_qos_policy(thread_t thread)
157 {
158 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160
161
162 static void
163 thread_remove_qos_policy_locked(thread_t thread,
164 task_pend_token_t pend_token)
165 {
166 __unused int prev_qos = thread->requested_policy.thrp_qos;
167
168 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169
170 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173
174 kern_return_t
175 thread_remove_qos_policy(thread_t thread)
176 {
177 struct task_pend_token pend_token = {};
178
179 thread_mtx_lock(thread);
180 if (!thread->active) {
181 thread_mtx_unlock(thread);
182 return KERN_TERMINATED;
183 }
184
185 thread_remove_qos_policy_locked(thread, &pend_token);
186
187 thread_mtx_unlock(thread);
188
189 thread_policy_update_complete_unlocked(thread, &pend_token);
190
191 return KERN_SUCCESS;
192 }
193
194
195 boolean_t
196 thread_is_static_param(thread_t thread)
197 {
198 if (thread->static_param) {
199 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 return TRUE;
201 }
202 return FALSE;
203 }
204
205 /*
206 * Relative priorities can range between 0REL and -15REL. These
207 * map to QoS-specific ranges, to create non-overlapping priority
208 * ranges.
209 */
210 static int
211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 int next_lower_qos;
214
215 /* Fast path, since no validation or scaling is needed */
216 if (qos_relprio == 0) {
217 return 0;
218 }
219
220 switch (qos) {
221 case THREAD_QOS_USER_INTERACTIVE:
222 next_lower_qos = THREAD_QOS_USER_INITIATED;
223 break;
224 case THREAD_QOS_USER_INITIATED:
225 next_lower_qos = THREAD_QOS_LEGACY;
226 break;
227 case THREAD_QOS_LEGACY:
228 next_lower_qos = THREAD_QOS_UTILITY;
229 break;
230 case THREAD_QOS_UTILITY:
231 next_lower_qos = THREAD_QOS_BACKGROUND;
232 break;
233 case THREAD_QOS_MAINTENANCE:
234 case THREAD_QOS_BACKGROUND:
235 next_lower_qos = 0;
236 break;
237 default:
238 panic("Unrecognized QoS %d", qos);
239 return 0;
240 }
241
242 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244
245 /*
246 * We now have the valid range that the scaled relative priority can map to. Note
247 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 * remainder.
251 */
252 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253
254 return scaled_relprio;
255 }
256
257 /*
258 * flag set by -qos-policy-allow boot-arg to allow
259 * testing thread qos policy from userspace
260 */
261 static TUNABLE(bool, allow_qos_policy_set, "-qos-policy-allow", false);
262
263 kern_return_t
264 thread_policy_set(
265 thread_t thread,
266 thread_policy_flavor_t flavor,
267 thread_policy_t policy_info,
268 mach_msg_type_number_t count)
269 {
270 thread_qos_policy_data_t req_qos;
271 kern_return_t kr;
272
273 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274
275 if (thread == THREAD_NULL) {
276 return KERN_INVALID_ARGUMENT;
277 }
278
279 if (!allow_qos_policy_set) {
280 if (thread_is_static_param(thread)) {
281 return KERN_POLICY_STATIC;
282 }
283
284 if (flavor == THREAD_QOS_POLICY) {
285 return KERN_INVALID_ARGUMENT;
286 }
287 }
288
289 /* Threads without static_param set reset their QoS when other policies are applied. */
290 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
291 /* Store the existing tier, if we fail this call it is used to reset back. */
292 req_qos.qos_tier = thread->requested_policy.thrp_qos;
293 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
294
295 kr = thread_remove_qos_policy(thread);
296 if (kr != KERN_SUCCESS) {
297 return kr;
298 }
299 }
300
301 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
302
303 /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
304 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
305 if (kr != KERN_SUCCESS) {
306 /* Reset back to our original tier as the set failed. */
307 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
308 }
309 }
310
311 return kr;
312 }
313
314 kern_return_t
315 thread_policy_set_internal(
316 thread_t thread,
317 thread_policy_flavor_t flavor,
318 thread_policy_t policy_info,
319 mach_msg_type_number_t count)
320 {
321 kern_return_t result = KERN_SUCCESS;
322 struct task_pend_token pend_token = {};
323
324 thread_mtx_lock(thread);
325 if (!thread->active) {
326 thread_mtx_unlock(thread);
327
328 return KERN_TERMINATED;
329 }
330
331 switch (flavor) {
332 case THREAD_EXTENDED_POLICY:
333 {
334 boolean_t timeshare = TRUE;
335
336 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
337 thread_extended_policy_t info;
338
339 info = (thread_extended_policy_t)policy_info;
340 timeshare = info->timeshare;
341 }
342
343 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
344
345 spl_t s = splsched();
346 thread_lock(thread);
347
348 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
349
350 thread_unlock(thread);
351 splx(s);
352
353 pend_token.tpt_update_thread_sfi = 1;
354
355 break;
356 }
357
358 case THREAD_TIME_CONSTRAINT_POLICY:
359 {
360 thread_time_constraint_policy_t info;
361
362 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
363 result = KERN_INVALID_ARGUMENT;
364 break;
365 }
366
367 info = (thread_time_constraint_policy_t)policy_info;
368
369
370 if (info->constraint < info->computation ||
371 info->computation > max_rt_quantum ||
372 info->computation < min_rt_quantum) {
373 result = KERN_INVALID_ARGUMENT;
374 break;
375 }
376
377 spl_t s = splsched();
378 thread_lock(thread);
379
380 thread->realtime.period = info->period;
381 thread->realtime.computation = info->computation;
382 thread->realtime.constraint = info->constraint;
383 thread->realtime.preemptible = info->preemptible;
384
385 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
386
387 thread_unlock(thread);
388 splx(s);
389
390 pend_token.tpt_update_thread_sfi = 1;
391
392 break;
393 }
394
395 case THREAD_PRECEDENCE_POLICY:
396 {
397 thread_precedence_policy_t info;
398
399 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
400 result = KERN_INVALID_ARGUMENT;
401 break;
402 }
403 info = (thread_precedence_policy_t)policy_info;
404
405 spl_t s = splsched();
406 thread_lock(thread);
407
408 thread->importance = info->importance;
409
410 thread_recompute_priority(thread);
411
412 thread_unlock(thread);
413 splx(s);
414
415 break;
416 }
417
418 case THREAD_AFFINITY_POLICY:
419 {
420 thread_affinity_policy_t info;
421
422 if (!thread_affinity_is_supported()) {
423 result = KERN_NOT_SUPPORTED;
424 break;
425 }
426 if (count < THREAD_AFFINITY_POLICY_COUNT) {
427 result = KERN_INVALID_ARGUMENT;
428 break;
429 }
430
431 info = (thread_affinity_policy_t) policy_info;
432 /*
433 * Unlock the thread mutex here and
434 * return directly after calling thread_affinity_set().
435 * This is necessary for correct lock ordering because
436 * thread_affinity_set() takes the task lock.
437 */
438 thread_mtx_unlock(thread);
439 return thread_affinity_set(thread, info->affinity_tag);
440 }
441
442 #if !defined(XNU_TARGET_OS_OSX)
443 case THREAD_BACKGROUND_POLICY:
444 {
445 thread_background_policy_t info;
446
447 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
448 result = KERN_INVALID_ARGUMENT;
449 break;
450 }
451
452 if (thread->task != current_task()) {
453 result = KERN_PROTECTION_FAILURE;
454 break;
455 }
456
457 info = (thread_background_policy_t) policy_info;
458
459 int enable;
460
461 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
462 enable = TASK_POLICY_ENABLE;
463 } else {
464 enable = TASK_POLICY_DISABLE;
465 }
466
467 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
468
469 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
470
471 break;
472 }
473 #endif /* !defined(XNU_TARGET_OS_OSX) */
474
475 case THREAD_THROUGHPUT_QOS_POLICY:
476 {
477 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
478 thread_throughput_qos_t tqos;
479
480 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
481 result = KERN_INVALID_ARGUMENT;
482 break;
483 }
484
485 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
486 break;
487 }
488
489 tqos = qos_extract(info->thread_throughput_qos_tier);
490
491 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
492 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
493
494 break;
495 }
496
497 case THREAD_LATENCY_QOS_POLICY:
498 {
499 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
500 thread_latency_qos_t lqos;
501
502 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
503 result = KERN_INVALID_ARGUMENT;
504 break;
505 }
506
507 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
508 break;
509 }
510
511 lqos = qos_extract(info->thread_latency_qos_tier);
512
513 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
514 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
515
516 break;
517 }
518
519 case THREAD_QOS_POLICY:
520 {
521 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
522
523 if (count < THREAD_QOS_POLICY_COUNT) {
524 result = KERN_INVALID_ARGUMENT;
525 break;
526 }
527
528 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
529 result = KERN_INVALID_ARGUMENT;
530 break;
531 }
532
533 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
534 result = KERN_INVALID_ARGUMENT;
535 break;
536 }
537
538 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
539 result = KERN_INVALID_ARGUMENT;
540 break;
541 }
542
543 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
544 info->qos_tier, -info->tier_importance, &pend_token);
545
546 break;
547 }
548
549 default:
550 result = KERN_INVALID_ARGUMENT;
551 break;
552 }
553
554 thread_mtx_unlock(thread);
555
556 thread_policy_update_complete_unlocked(thread, &pend_token);
557
558 return result;
559 }
560
561 /*
562 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
563 * Both result in FIXED mode scheduling.
564 */
565 static sched_mode_t
566 convert_policy_to_sched_mode(integer_t policy)
567 {
568 switch (policy) {
569 case POLICY_TIMESHARE:
570 return TH_MODE_TIMESHARE;
571 case POLICY_RR:
572 case POLICY_FIFO:
573 return TH_MODE_FIXED;
574 default:
575 panic("unexpected sched policy: %d", policy);
576 return TH_MODE_NONE;
577 }
578 }
579
580 /*
581 * Called either with the thread mutex locked
582 * or from the pthread kext in a 'safe place'.
583 */
584 static kern_return_t
585 thread_set_mode_and_absolute_pri_internal(thread_t thread,
586 sched_mode_t mode,
587 integer_t priority,
588 task_pend_token_t pend_token)
589 {
590 kern_return_t kr = KERN_SUCCESS;
591
592 spl_t s = splsched();
593 thread_lock(thread);
594
595 /* This path isn't allowed to change a thread out of realtime. */
596 if ((thread->sched_mode == TH_MODE_REALTIME) ||
597 (thread->saved_mode == TH_MODE_REALTIME)) {
598 kr = KERN_FAILURE;
599 goto unlock;
600 }
601
602 if (thread->policy_reset) {
603 kr = KERN_SUCCESS;
604 goto unlock;
605 }
606
607 sched_mode_t old_mode = thread->sched_mode;
608
609 /*
610 * Reverse engineer and apply the correct importance value
611 * from the requested absolute priority value.
612 *
613 * TODO: Store the absolute priority value instead
614 */
615
616 if (priority >= thread->max_priority) {
617 priority = thread->max_priority - thread->task_priority;
618 } else if (priority >= MINPRI_KERNEL) {
619 priority -= MINPRI_KERNEL;
620 } else if (priority >= MINPRI_RESERVED) {
621 priority -= MINPRI_RESERVED;
622 } else {
623 priority -= BASEPRI_DEFAULT;
624 }
625
626 priority += thread->task_priority;
627
628 if (priority > thread->max_priority) {
629 priority = thread->max_priority;
630 } else if (priority < MINPRI) {
631 priority = MINPRI;
632 }
633
634 thread->importance = priority - thread->task_priority;
635
636 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
637
638 if (mode != old_mode) {
639 pend_token->tpt_update_thread_sfi = 1;
640 }
641
642 unlock:
643 thread_unlock(thread);
644 splx(s);
645
646 return kr;
647 }
648
649 void
650 thread_freeze_base_pri(thread_t thread)
651 {
652 assert(thread == current_thread());
653
654 spl_t s = splsched();
655 thread_lock(thread);
656
657 assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
658 thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
659
660 thread_unlock(thread);
661 splx(s);
662 }
663
664 bool
665 thread_unfreeze_base_pri(thread_t thread)
666 {
667 assert(thread == current_thread());
668 integer_t base_pri;
669 ast_t ast = 0;
670
671 spl_t s = splsched();
672 thread_lock(thread);
673
674 assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
675 thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
676
677 base_pri = thread->req_base_pri;
678 if (base_pri != thread->base_pri) {
679 /*
680 * This function returns "true" if the base pri change
681 * is the most likely cause for the preemption.
682 */
683 sched_set_thread_base_priority(thread, base_pri);
684 ast = ast_peek(AST_PREEMPT);
685 }
686
687 thread_unlock(thread);
688 splx(s);
689
690 return ast != 0;
691 }
692
693 uint8_t
694 thread_workq_pri_for_qos(thread_qos_t qos)
695 {
696 assert(qos < THREAD_QOS_LAST);
697 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
698 }
699
700 thread_qos_t
701 thread_workq_qos_for_pri(int priority)
702 {
703 thread_qos_t qos;
704 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
705 // indicate that workq should map >UI threads to workq's
706 // internal notation for above-UI work.
707 return THREAD_QOS_UNSPECIFIED;
708 }
709 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
710 // map a given priority up to the next nearest qos band.
711 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
712 return qos;
713 }
714 }
715 return THREAD_QOS_MAINTENANCE;
716 }
717
718 /*
719 * private interface for pthread workqueues
720 *
721 * Set scheduling policy & absolute priority for thread
722 * May be called with spinlocks held
723 * Thread mutex lock is not held
724 */
725 void
726 thread_reset_workq_qos(thread_t thread, uint32_t qos)
727 {
728 struct task_pend_token pend_token = {};
729
730 assert(qos < THREAD_QOS_LAST);
731
732 spl_t s = splsched();
733 thread_lock(thread);
734
735 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
736 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
737 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
738 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
739 &pend_token);
740
741 assert(pend_token.tpt_update_sockets == 0);
742
743 thread_unlock(thread);
744 splx(s);
745
746 thread_policy_update_complete_unlocked(thread, &pend_token);
747 }
748
749 /*
750 * private interface for pthread workqueues
751 *
752 * Set scheduling policy & absolute priority for thread
753 * May be called with spinlocks held
754 * Thread mutex lock is held
755 */
756 void
757 thread_set_workq_override(thread_t thread, uint32_t qos)
758 {
759 struct task_pend_token pend_token = {};
760
761 assert(qos < THREAD_QOS_LAST);
762
763 spl_t s = splsched();
764 thread_lock(thread);
765
766 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
767 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
768
769 assert(pend_token.tpt_update_sockets == 0);
770
771 thread_unlock(thread);
772 splx(s);
773
774 thread_policy_update_complete_unlocked(thread, &pend_token);
775 }
776
777 /*
778 * private interface for pthread workqueues
779 *
780 * Set scheduling policy & absolute priority for thread
781 * May be called with spinlocks held
782 * Thread mutex lock is not held
783 */
784 void
785 thread_set_workq_pri(thread_t thread,
786 thread_qos_t qos,
787 integer_t priority,
788 integer_t policy)
789 {
790 struct task_pend_token pend_token = {};
791 sched_mode_t mode = convert_policy_to_sched_mode(policy);
792
793 assert(qos < THREAD_QOS_LAST);
794 assert(thread->static_param);
795
796 if (!thread->static_param || !thread->active) {
797 return;
798 }
799
800 spl_t s = splsched();
801 thread_lock(thread);
802
803 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
804 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
805 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
806 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
807 0, &pend_token);
808
809 thread_unlock(thread);
810 splx(s);
811
812 /* Concern: this doesn't hold the mutex... */
813
814 __assert_only kern_return_t kr;
815 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
816 &pend_token);
817 assert(kr == KERN_SUCCESS);
818
819 if (pend_token.tpt_update_thread_sfi) {
820 sfi_reevaluate(thread);
821 }
822 }
823
824 /*
825 * thread_set_mode_and_absolute_pri:
826 *
827 * Set scheduling policy & absolute priority for thread, for deprecated
828 * thread_set_policy and thread_policy interfaces.
829 *
830 * Called with nothing locked.
831 */
832 kern_return_t
833 thread_set_mode_and_absolute_pri(thread_t thread,
834 integer_t policy,
835 integer_t priority)
836 {
837 kern_return_t kr = KERN_SUCCESS;
838 struct task_pend_token pend_token = {};
839
840 sched_mode_t mode = convert_policy_to_sched_mode(policy);
841
842 thread_mtx_lock(thread);
843
844 if (!thread->active) {
845 kr = KERN_TERMINATED;
846 goto unlock;
847 }
848
849 if (thread_is_static_param(thread)) {
850 kr = KERN_POLICY_STATIC;
851 goto unlock;
852 }
853
854 /* Setting legacy policies on threads kills the current QoS */
855 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
856 thread_remove_qos_policy_locked(thread, &pend_token);
857 }
858
859 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
860
861 unlock:
862 thread_mtx_unlock(thread);
863
864 thread_policy_update_complete_unlocked(thread, &pend_token);
865
866 return kr;
867 }
868
869 /*
870 * Set the thread's requested mode and recompute priority
871 * Called with thread mutex and thread locked
872 *
873 * TODO: Mitigate potential problems caused by moving thread to end of runq
874 * whenever its priority is recomputed
875 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
876 */
877 static void
878 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
879 {
880 if (thread->policy_reset) {
881 return;
882 }
883
884 boolean_t removed = thread_run_queue_remove(thread);
885
886 /*
887 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
888 * That way there's zero confusion over which the user wants
889 * and which the kernel wants.
890 */
891 if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
892 thread->saved_mode = mode;
893 } else {
894 sched_set_thread_mode(thread, mode);
895 }
896
897 thread_recompute_priority(thread);
898
899 if (removed) {
900 thread_run_queue_reinsert(thread, SCHED_TAILQ);
901 }
902 }
903
904 /* called at splsched with thread lock locked */
905 static void
906 thread_update_qos_cpu_time_locked(thread_t thread)
907 {
908 task_t task = thread->task;
909 uint64_t timer_sum, timer_delta;
910
911 /*
912 * This is only as accurate as the distance between
913 * last context switch (embedded) or last user/kernel boundary transition (desktop)
914 * because user_timer and system_timer are only updated then.
915 *
916 * TODO: Consider running a timer_update operation here to update it first.
917 * Maybe doable with interrupts disabled from current thread.
918 * If the thread is on a different core, may not be easy to get right.
919 *
920 * TODO: There should be a function for this in timer.c
921 */
922
923 timer_sum = timer_grab(&thread->user_timer);
924 timer_sum += timer_grab(&thread->system_timer);
925 timer_delta = timer_sum - thread->vtimer_qos_save;
926
927 thread->vtimer_qos_save = timer_sum;
928
929 uint64_t* task_counter = NULL;
930
931 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
932 switch (thread->effective_policy.thep_qos) {
933 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
934 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
935 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
936 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
937 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
938 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
939 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
940 default:
941 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
942 }
943
944 OSAddAtomic64(timer_delta, task_counter);
945
946 /* Update the task-level qos stats atomically, because we don't have the task lock. */
947 switch (thread->requested_policy.thrp_qos) {
948 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
949 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
950 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
951 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
952 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
953 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
954 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
955 default:
956 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
957 }
958
959 OSAddAtomic64(timer_delta, task_counter);
960 }
961
962 /*
963 * called with no thread locks held
964 * may hold task lock
965 */
966 void
967 thread_update_qos_cpu_time(thread_t thread)
968 {
969 thread_mtx_lock(thread);
970
971 spl_t s = splsched();
972 thread_lock(thread);
973
974 thread_update_qos_cpu_time_locked(thread);
975
976 thread_unlock(thread);
977 splx(s);
978
979 thread_mtx_unlock(thread);
980 }
981
982 /*
983 * Calculate base priority from thread attributes, and set it on the thread
984 *
985 * Called with thread_lock and thread mutex held.
986 */
987 extern thread_t vm_pageout_scan_thread;
988 extern boolean_t vps_dynamic_priority_enabled;
989
990 void
991 thread_recompute_priority(
992 thread_t thread)
993 {
994 integer_t priority;
995
996 if (thread->policy_reset) {
997 return;
998 }
999
1000 if (thread->sched_mode == TH_MODE_REALTIME) {
1001 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
1002 return;
1003 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1004 int qos = thread->effective_policy.thep_qos;
1005 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1006 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1007 int qos_scaled_relprio;
1008
1009 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1010 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1011
1012 priority = thread_qos_policy_params.qos_pri[qos];
1013 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1014
1015 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1016 /* Bump priority 46 to 47 when in a frontmost app */
1017 qos_scaled_relprio += 1;
1018 }
1019
1020 /* TODO: factor in renice priority here? */
1021
1022 priority += qos_scaled_relprio;
1023 } else {
1024 if (thread->importance > MAXPRI) {
1025 priority = MAXPRI;
1026 } else if (thread->importance < -MAXPRI) {
1027 priority = -MAXPRI;
1028 } else {
1029 priority = thread->importance;
1030 }
1031
1032 priority += thread->task_priority;
1033 }
1034
1035 priority = MAX(priority, thread->user_promotion_basepri);
1036
1037 /*
1038 * Clamp priority back into the allowed range for this task.
1039 * The initial priority value could be out of this range due to:
1040 * Task clamped to BG or Utility (max-pri is 4, or 20)
1041 * Task is user task (max-pri is 63)
1042 * Task is kernel task (max-pri is 95)
1043 * Note that thread->importance is user-settable to any integer
1044 * via THREAD_PRECEDENCE_POLICY.
1045 */
1046 if (priority > thread->max_priority) {
1047 if (thread->effective_policy.thep_promote_above_task) {
1048 priority = MAX(thread->max_priority, thread->user_promotion_basepri);
1049 } else {
1050 priority = thread->max_priority;
1051 }
1052 } else if (priority < MINPRI) {
1053 priority = MINPRI;
1054 }
1055
1056 if (thread->saved_mode == TH_MODE_REALTIME &&
1057 thread->sched_flags & TH_SFLAG_FAILSAFE) {
1058 priority = DEPRESSPRI;
1059 }
1060
1061 if (thread->effective_policy.thep_terminated == TRUE) {
1062 /*
1063 * We temporarily want to override the expected priority to
1064 * ensure that the thread exits in a timely manner.
1065 * Note that this is allowed to exceed thread->max_priority
1066 * so that the thread is no longer clamped to background
1067 * during the final exit phase.
1068 */
1069 if (priority < thread->task_priority) {
1070 priority = thread->task_priority;
1071 }
1072 if (priority < BASEPRI_DEFAULT) {
1073 priority = BASEPRI_DEFAULT;
1074 }
1075 }
1076
1077 #if !defined(XNU_TARGET_OS_OSX)
1078 /* No one can have a base priority less than MAXPRI_THROTTLE */
1079 if (priority < MAXPRI_THROTTLE) {
1080 priority = MAXPRI_THROTTLE;
1081 }
1082 #endif /* !defined(XNU_TARGET_OS_OSX) */
1083
1084 sched_set_thread_base_priority(thread, priority);
1085 }
1086
1087 /* Called with the task lock held, but not the thread mutex or spinlock */
1088 void
1089 thread_policy_update_tasklocked(
1090 thread_t thread,
1091 integer_t priority,
1092 integer_t max_priority,
1093 task_pend_token_t pend_token)
1094 {
1095 thread_mtx_lock(thread);
1096
1097 if (!thread->active || thread->policy_reset) {
1098 thread_mtx_unlock(thread);
1099 return;
1100 }
1101
1102 spl_t s = splsched();
1103 thread_lock(thread);
1104
1105 __unused
1106 integer_t old_max_priority = thread->max_priority;
1107
1108 assert(priority >= INT16_MIN && priority <= INT16_MAX);
1109 thread->task_priority = (int16_t)priority;
1110
1111 assert(max_priority >= INT16_MIN && max_priority <= INT16_MAX);
1112 thread->max_priority = (int16_t)max_priority;
1113
1114 /*
1115 * When backgrounding a thread, realtime and fixed priority threads
1116 * should be demoted to timeshare background threads.
1117 *
1118 * TODO: Do this inside the thread policy update routine in order to avoid double
1119 * remove/reinsert for a runnable thread
1120 */
1121 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1122 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1123 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1124 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1125 }
1126
1127 thread_policy_update_spinlocked(thread, true, pend_token);
1128
1129 thread_unlock(thread);
1130 splx(s);
1131
1132 thread_mtx_unlock(thread);
1133 }
1134
1135 /*
1136 * Reset thread to default state in preparation for termination
1137 * Called with thread mutex locked
1138 *
1139 * Always called on current thread, so we don't need a run queue remove
1140 */
1141 void
1142 thread_policy_reset(
1143 thread_t thread)
1144 {
1145 spl_t s;
1146
1147 assert(thread == current_thread());
1148
1149 s = splsched();
1150 thread_lock(thread);
1151
1152 if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1153 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1154 }
1155
1156 if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1157 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1158 }
1159
1160 /* At this point, the various demotions should be inactive */
1161 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1162 assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1163 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1164
1165 /* Reset thread back to task-default basepri and mode */
1166 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1167
1168 sched_set_thread_mode(thread, newmode);
1169
1170 thread->importance = 0;
1171
1172 /* Prevent further changes to thread base priority or mode */
1173 thread->policy_reset = 1;
1174
1175 sched_set_thread_base_priority(thread, thread->task_priority);
1176
1177 thread_unlock(thread);
1178 splx(s);
1179 }
1180
1181 kern_return_t
1182 thread_policy_get(
1183 thread_t thread,
1184 thread_policy_flavor_t flavor,
1185 thread_policy_t policy_info,
1186 mach_msg_type_number_t *count,
1187 boolean_t *get_default)
1188 {
1189 kern_return_t result = KERN_SUCCESS;
1190
1191 if (thread == THREAD_NULL) {
1192 return KERN_INVALID_ARGUMENT;
1193 }
1194
1195 thread_mtx_lock(thread);
1196 if (!thread->active) {
1197 thread_mtx_unlock(thread);
1198
1199 return KERN_TERMINATED;
1200 }
1201
1202 switch (flavor) {
1203 case THREAD_EXTENDED_POLICY:
1204 {
1205 boolean_t timeshare = TRUE;
1206
1207 if (!(*get_default)) {
1208 spl_t s = splsched();
1209 thread_lock(thread);
1210
1211 if ((thread->sched_mode != TH_MODE_REALTIME) &&
1212 (thread->saved_mode != TH_MODE_REALTIME)) {
1213 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1214 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1215 } else {
1216 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1217 }
1218 } else {
1219 *get_default = TRUE;
1220 }
1221
1222 thread_unlock(thread);
1223 splx(s);
1224 }
1225
1226 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1227 thread_extended_policy_t info;
1228
1229 info = (thread_extended_policy_t)policy_info;
1230 info->timeshare = timeshare;
1231 }
1232
1233 break;
1234 }
1235
1236 case THREAD_TIME_CONSTRAINT_POLICY:
1237 {
1238 thread_time_constraint_policy_t info;
1239
1240 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1241 result = KERN_INVALID_ARGUMENT;
1242 break;
1243 }
1244
1245 info = (thread_time_constraint_policy_t)policy_info;
1246
1247 if (!(*get_default)) {
1248 spl_t s = splsched();
1249 thread_lock(thread);
1250
1251 if ((thread->sched_mode == TH_MODE_REALTIME) ||
1252 (thread->saved_mode == TH_MODE_REALTIME)) {
1253 info->period = thread->realtime.period;
1254 info->computation = thread->realtime.computation;
1255 info->constraint = thread->realtime.constraint;
1256 info->preemptible = thread->realtime.preemptible;
1257 } else {
1258 *get_default = TRUE;
1259 }
1260
1261 thread_unlock(thread);
1262 splx(s);
1263 }
1264
1265 if (*get_default) {
1266 info->period = 0;
1267 info->computation = default_timeshare_computation;
1268 info->constraint = default_timeshare_constraint;
1269 info->preemptible = TRUE;
1270 }
1271
1272
1273 break;
1274 }
1275
1276 case THREAD_PRECEDENCE_POLICY:
1277 {
1278 thread_precedence_policy_t info;
1279
1280 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1281 result = KERN_INVALID_ARGUMENT;
1282 break;
1283 }
1284
1285 info = (thread_precedence_policy_t)policy_info;
1286
1287 if (!(*get_default)) {
1288 spl_t s = splsched();
1289 thread_lock(thread);
1290
1291 info->importance = thread->importance;
1292
1293 thread_unlock(thread);
1294 splx(s);
1295 } else {
1296 info->importance = 0;
1297 }
1298
1299 break;
1300 }
1301
1302 case THREAD_AFFINITY_POLICY:
1303 {
1304 thread_affinity_policy_t info;
1305
1306 if (!thread_affinity_is_supported()) {
1307 result = KERN_NOT_SUPPORTED;
1308 break;
1309 }
1310 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1311 result = KERN_INVALID_ARGUMENT;
1312 break;
1313 }
1314
1315 info = (thread_affinity_policy_t)policy_info;
1316
1317 if (!(*get_default)) {
1318 info->affinity_tag = thread_affinity_get(thread);
1319 } else {
1320 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1321 }
1322
1323 break;
1324 }
1325
1326 case THREAD_POLICY_STATE:
1327 {
1328 thread_policy_state_t info;
1329
1330 if (*count < THREAD_POLICY_STATE_COUNT) {
1331 result = KERN_INVALID_ARGUMENT;
1332 break;
1333 }
1334
1335 /* Only root can get this info */
1336 if (current_task()->sec_token.val[0] != 0) {
1337 result = KERN_PROTECTION_FAILURE;
1338 break;
1339 }
1340
1341 info = (thread_policy_state_t)(void*)policy_info;
1342
1343 if (!(*get_default)) {
1344 info->flags = 0;
1345
1346 spl_t s = splsched();
1347 thread_lock(thread);
1348
1349 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1350
1351 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1352 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1353
1354 info->thps_user_promotions = 0;
1355 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1356 info->thps_ipc_overrides = thread->kevent_overrides;
1357
1358 proc_get_thread_policy_bitfield(thread, info);
1359
1360 thread_unlock(thread);
1361 splx(s);
1362 } else {
1363 info->requested = 0;
1364 info->effective = 0;
1365 info->pending = 0;
1366 }
1367
1368 break;
1369 }
1370
1371 case THREAD_LATENCY_QOS_POLICY:
1372 {
1373 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1374 thread_latency_qos_t plqos;
1375
1376 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1377 result = KERN_INVALID_ARGUMENT;
1378 break;
1379 }
1380
1381 if (*get_default) {
1382 plqos = 0;
1383 } else {
1384 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1385 }
1386
1387 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1388 }
1389 break;
1390
1391 case THREAD_THROUGHPUT_QOS_POLICY:
1392 {
1393 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1394 thread_throughput_qos_t ptqos;
1395
1396 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1397 result = KERN_INVALID_ARGUMENT;
1398 break;
1399 }
1400
1401 if (*get_default) {
1402 ptqos = 0;
1403 } else {
1404 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1405 }
1406
1407 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1408 }
1409 break;
1410
1411 case THREAD_QOS_POLICY:
1412 {
1413 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1414
1415 if (*count < THREAD_QOS_POLICY_COUNT) {
1416 result = KERN_INVALID_ARGUMENT;
1417 break;
1418 }
1419
1420 if (!(*get_default)) {
1421 int relprio_value = 0;
1422 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1423 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1424
1425 info->tier_importance = -relprio_value;
1426 } else {
1427 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1428 info->tier_importance = 0;
1429 }
1430
1431 break;
1432 }
1433
1434 default:
1435 result = KERN_INVALID_ARGUMENT;
1436 break;
1437 }
1438
1439 thread_mtx_unlock(thread);
1440
1441 return result;
1442 }
1443
1444 void
1445 thread_policy_create(thread_t thread)
1446 {
1447 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1448 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1449 thread_tid(thread), theffective_0(thread),
1450 theffective_1(thread), thread->base_pri, 0);
1451
1452 /* We pass a pend token but ignore it */
1453 struct task_pend_token pend_token = {};
1454
1455 thread_policy_update_internal_spinlocked(thread, true, &pend_token);
1456
1457 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1458 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1459 thread_tid(thread), theffective_0(thread),
1460 theffective_1(thread), thread->base_pri, 0);
1461 }
1462
1463 static void
1464 thread_policy_update_spinlocked(thread_t thread, bool recompute_priority, task_pend_token_t pend_token)
1465 {
1466 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1467 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1468 thread_tid(thread), theffective_0(thread),
1469 theffective_1(thread), thread->base_pri, 0);
1470
1471 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1472
1473 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1474 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1475 thread_tid(thread), theffective_0(thread),
1476 theffective_1(thread), thread->base_pri, 0);
1477 }
1478
1479
1480
1481 /*
1482 * One thread state update function TO RULE THEM ALL
1483 *
1484 * This function updates the thread effective policy fields
1485 * and pushes the results to the relevant subsystems.
1486 *
1487 * Returns TRUE if a pended action needs to be run.
1488 *
1489 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1490 */
1491 static void
1492 thread_policy_update_internal_spinlocked(thread_t thread, bool recompute_priority,
1493 task_pend_token_t pend_token)
1494 {
1495 /*
1496 * Step 1:
1497 * Gather requested policy and effective task state
1498 */
1499
1500 struct thread_requested_policy requested = thread->requested_policy;
1501 struct task_effective_policy task_effective = thread->task->effective_policy;
1502
1503 /*
1504 * Step 2:
1505 * Calculate new effective policies from requested policy, task and thread state
1506 * Rules:
1507 * Don't change requested, it won't take effect
1508 */
1509
1510 struct thread_effective_policy next = {};
1511
1512 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1513
1514 uint32_t next_qos = requested.thrp_qos;
1515
1516 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1517 next_qos = MAX(requested.thrp_qos_override, next_qos);
1518 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1519 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1520 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1521 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1522 }
1523
1524 if (task_effective.tep_darwinbg && task_effective.tep_adaptive_bg &&
1525 requested.thrp_qos_promote > THREAD_QOS_BACKGROUND) {
1526 /*
1527 * This thread is turnstile-boosted higher than the adaptive clamp
1528 * by a synchronous waiter. Allow that to override the adaptive
1529 * clamp temporarily for this thread only.
1530 */
1531 next.thep_promote_above_task = true;
1532 next_qos = requested.thrp_qos_promote;
1533 }
1534
1535 next.thep_qos = next_qos;
1536
1537 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1538 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1539 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1540 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1541 } else {
1542 next.thep_qos = task_effective.tep_qos_clamp;
1543 }
1544 }
1545
1546 /*
1547 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1548 * This allows QoS promotions to work properly even after the process is unclamped.
1549 */
1550 next.thep_qos_promote = next.thep_qos;
1551
1552 /* The ceiling only applies to threads that are in the QoS world */
1553 /* TODO: is it appropriate for this to limit a turnstile-boosted thread's QoS? */
1554 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1555 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1556 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1557 }
1558
1559 /*
1560 * The QoS relative priority is only applicable when the original programmer's
1561 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1562 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1563 * since otherwise it would be lower than unclamped threads. Similarly, in the
1564 * presence of boosting, the programmer doesn't know what other actors
1565 * are boosting the thread.
1566 */
1567 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1568 (requested.thrp_qos == next.thep_qos) &&
1569 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1570 next.thep_qos_relprio = requested.thrp_qos_relprio;
1571 } else {
1572 next.thep_qos_relprio = 0;
1573 }
1574
1575 /* Calculate DARWIN_BG */
1576 bool wants_darwinbg = false;
1577 bool wants_all_sockets_bg = false; /* Do I want my existing sockets to be bg */
1578
1579 if (task_effective.tep_darwinbg && !next.thep_promote_above_task) {
1580 wants_darwinbg = true;
1581 }
1582
1583 /*
1584 * If DARWIN_BG has been requested at either level, it's engaged.
1585 * darwinbg threads always create bg sockets,
1586 * but only some types of darwinbg change the sockets
1587 * after they're created
1588 */
1589 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1590 wants_all_sockets_bg = wants_darwinbg = true;
1591 }
1592
1593 if (requested.thrp_pidbind_bg) {
1594 wants_all_sockets_bg = wants_darwinbg = true;
1595 }
1596
1597 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1598 next.thep_qos == THREAD_QOS_MAINTENANCE) {
1599 wants_darwinbg = true;
1600 }
1601
1602 /* Calculate side effects of DARWIN_BG */
1603
1604 if (wants_darwinbg) {
1605 next.thep_darwinbg = 1;
1606 }
1607
1608 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1609 next.thep_new_sockets_bg = 1;
1610 }
1611
1612 /* Don't use task_effective.tep_all_sockets_bg here */
1613 if (wants_all_sockets_bg) {
1614 next.thep_all_sockets_bg = 1;
1615 }
1616
1617 /* darwinbg implies background QOS (or lower) */
1618 if (next.thep_darwinbg &&
1619 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1620 next.thep_qos = THREAD_QOS_BACKGROUND;
1621 next.thep_qos_relprio = 0;
1622 }
1623
1624 /* Calculate IO policy */
1625
1626 int iopol = THROTTLE_LEVEL_TIER0;
1627
1628 /* Factor in the task's IO policy */
1629 if (next.thep_darwinbg) {
1630 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1631 }
1632
1633 if (!next.thep_promote_above_task) {
1634 iopol = MAX(iopol, task_effective.tep_io_tier);
1635 }
1636
1637 /* Look up the associated IO tier value for the QoS class */
1638 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1639
1640 iopol = MAX(iopol, requested.thrp_int_iotier);
1641 iopol = MAX(iopol, requested.thrp_ext_iotier);
1642
1643 next.thep_io_tier = iopol;
1644
1645 /*
1646 * If a QoS override is causing IO to go into a lower tier, we also set
1647 * the passive bit so that a thread doesn't end up stuck in its own throttle
1648 * window when the override goes away.
1649 */
1650
1651 int next_qos_iotier = thread_qos_policy_params.qos_iotier[next.thep_qos];
1652 int req_qos_iotier = thread_qos_policy_params.qos_iotier[requested.thrp_qos];
1653 bool qos_io_override_active = (next_qos_iotier < req_qos_iotier);
1654
1655 /* Calculate Passive IO policy */
1656 if (requested.thrp_ext_iopassive ||
1657 requested.thrp_int_iopassive ||
1658 qos_io_override_active ||
1659 task_effective.tep_io_passive) {
1660 next.thep_io_passive = 1;
1661 }
1662
1663 /* Calculate timer QOS */
1664 uint32_t latency_qos = requested.thrp_latency_qos;
1665
1666 if (!next.thep_promote_above_task) {
1667 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1668 }
1669
1670 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1671
1672 next.thep_latency_qos = latency_qos;
1673
1674 /* Calculate throughput QOS */
1675 uint32_t through_qos = requested.thrp_through_qos;
1676
1677 if (!next.thep_promote_above_task) {
1678 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1679 }
1680
1681 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1682
1683 next.thep_through_qos = through_qos;
1684
1685 if (task_effective.tep_terminated || requested.thrp_terminated) {
1686 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1687 next.thep_terminated = 1;
1688 next.thep_darwinbg = 0;
1689 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1690 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1691 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1692 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1693 }
1694
1695 /*
1696 * Step 3:
1697 * Swap out old policy for new policy
1698 */
1699
1700 struct thread_effective_policy prev = thread->effective_policy;
1701
1702 thread_update_qos_cpu_time_locked(thread);
1703
1704 /* This is the point where the new values become visible to other threads */
1705 thread->effective_policy = next;
1706
1707 /*
1708 * Step 4:
1709 * Pend updates that can't be done while holding the thread lock
1710 */
1711
1712 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1713 pend_token->tpt_update_sockets = 1;
1714 }
1715
1716 /* TODO: Doesn't this only need to be done if the throttle went up? */
1717 if (prev.thep_io_tier != next.thep_io_tier) {
1718 pend_token->tpt_update_throttle = 1;
1719 }
1720
1721 /*
1722 * Check for the attributes that sfi_thread_classify() consults,
1723 * and trigger SFI re-evaluation.
1724 */
1725 if (prev.thep_qos != next.thep_qos ||
1726 prev.thep_darwinbg != next.thep_darwinbg) {
1727 pend_token->tpt_update_thread_sfi = 1;
1728 }
1729
1730 integer_t old_base_pri = thread->base_pri;
1731
1732 /*
1733 * Step 5:
1734 * Update other subsystems as necessary if something has changed
1735 */
1736
1737 /* Check for the attributes that thread_recompute_priority() consults */
1738 if (prev.thep_qos != next.thep_qos ||
1739 prev.thep_qos_relprio != next.thep_qos_relprio ||
1740 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1741 prev.thep_promote_above_task != next.thep_promote_above_task ||
1742 prev.thep_terminated != next.thep_terminated ||
1743 pend_token->tpt_force_recompute_pri == 1 ||
1744 recompute_priority) {
1745 thread_recompute_priority(thread);
1746 }
1747
1748 /*
1749 * Check if the thread is waiting on a turnstile and needs priority propagation.
1750 */
1751 if (pend_token->tpt_update_turnstile &&
1752 ((old_base_pri == thread->base_pri) ||
1753 !thread_get_waiting_turnstile(thread))) {
1754 /*
1755 * Reset update turnstile pend token since either
1756 * the thread priority did not change or thread is
1757 * not blocked on a turnstile.
1758 */
1759 pend_token->tpt_update_turnstile = 0;
1760 }
1761 }
1762
1763
1764 /*
1765 * Initiate a thread policy state transition on a thread with its TID
1766 * Useful if you cannot guarantee the thread won't get terminated
1767 * Precondition: No locks are held
1768 * Will take task lock - using the non-tid variant is faster
1769 * if you already have a thread ref.
1770 */
1771 void
1772 proc_set_thread_policy_with_tid(task_t task,
1773 uint64_t tid,
1774 int category,
1775 int flavor,
1776 int value)
1777 {
1778 /* takes task lock, returns ref'ed thread or NULL */
1779 thread_t thread = task_findtid(task, tid);
1780
1781 if (thread == THREAD_NULL) {
1782 return;
1783 }
1784
1785 proc_set_thread_policy(thread, category, flavor, value);
1786
1787 thread_deallocate(thread);
1788 }
1789
1790 /*
1791 * Initiate a thread policy transition on a thread
1792 * This path supports networking transitions (i.e. darwinbg transitions)
1793 * Precondition: No locks are held
1794 */
1795 void
1796 proc_set_thread_policy(thread_t thread,
1797 int category,
1798 int flavor,
1799 int value)
1800 {
1801 struct task_pend_token pend_token = {};
1802
1803 thread_mtx_lock(thread);
1804
1805 proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1806
1807 thread_mtx_unlock(thread);
1808
1809 thread_policy_update_complete_unlocked(thread, &pend_token);
1810 }
1811
1812 /*
1813 * Do the things that can't be done while holding a thread mutex.
1814 * These are set up to call back into thread policy to get the latest value,
1815 * so they don't have to be synchronized with the update.
1816 * The only required semantic is 'call this sometime after updating effective policy'
1817 *
1818 * Precondition: Thread mutex is not held
1819 *
1820 * This may be called with the task lock held, but in that case it won't be
1821 * called with tpt_update_sockets set.
1822 */
1823 void
1824 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1825 {
1826 #ifdef MACH_BSD
1827 if (pend_token->tpt_update_sockets) {
1828 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1829 }
1830 #endif /* MACH_BSD */
1831
1832 if (pend_token->tpt_update_throttle) {
1833 rethrottle_thread(thread->uthread);
1834 }
1835
1836 if (pend_token->tpt_update_thread_sfi) {
1837 sfi_reevaluate(thread);
1838 }
1839
1840 if (pend_token->tpt_update_turnstile) {
1841 turnstile_update_thread_priority_chain(thread);
1842 }
1843 }
1844
1845 /*
1846 * Set and update thread policy
1847 * Thread mutex might be held
1848 */
1849 static void
1850 proc_set_thread_policy_locked(thread_t thread,
1851 int category,
1852 int flavor,
1853 int value,
1854 int value2,
1855 task_pend_token_t pend_token)
1856 {
1857 spl_t s = splsched();
1858 thread_lock(thread);
1859
1860 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1861
1862 thread_unlock(thread);
1863 splx(s);
1864 }
1865
1866 /*
1867 * Set and update thread policy
1868 * Thread spinlock is held
1869 */
1870 static void
1871 proc_set_thread_policy_spinlocked(thread_t thread,
1872 int category,
1873 int flavor,
1874 int value,
1875 int value2,
1876 task_pend_token_t pend_token)
1877 {
1878 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1879 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1880 thread_tid(thread), threquested_0(thread),
1881 threquested_1(thread), value, 0);
1882
1883 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1884
1885 thread_policy_update_spinlocked(thread, false, pend_token);
1886
1887 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1888 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1889 thread_tid(thread), threquested_0(thread),
1890 threquested_1(thread), tpending(pend_token), 0);
1891 }
1892
1893 /*
1894 * Set the requested state for a specific flavor to a specific value.
1895 */
1896 static void
1897 thread_set_requested_policy_spinlocked(thread_t thread,
1898 int category,
1899 int flavor,
1900 int value,
1901 int value2,
1902 task_pend_token_t pend_token)
1903 {
1904 int tier, passive;
1905
1906 struct thread_requested_policy requested = thread->requested_policy;
1907
1908 switch (flavor) {
1909 /* Category: EXTERNAL and INTERNAL, thread and task */
1910
1911 case TASK_POLICY_DARWIN_BG:
1912 if (category == TASK_POLICY_EXTERNAL) {
1913 requested.thrp_ext_darwinbg = value;
1914 } else {
1915 requested.thrp_int_darwinbg = value;
1916 }
1917 break;
1918
1919 case TASK_POLICY_IOPOL:
1920 proc_iopol_to_tier(value, &tier, &passive);
1921 if (category == TASK_POLICY_EXTERNAL) {
1922 requested.thrp_ext_iotier = tier;
1923 requested.thrp_ext_iopassive = passive;
1924 } else {
1925 requested.thrp_int_iotier = tier;
1926 requested.thrp_int_iopassive = passive;
1927 }
1928 break;
1929
1930 case TASK_POLICY_IO:
1931 if (category == TASK_POLICY_EXTERNAL) {
1932 requested.thrp_ext_iotier = value;
1933 } else {
1934 requested.thrp_int_iotier = value;
1935 }
1936 break;
1937
1938 case TASK_POLICY_PASSIVE_IO:
1939 if (category == TASK_POLICY_EXTERNAL) {
1940 requested.thrp_ext_iopassive = value;
1941 } else {
1942 requested.thrp_int_iopassive = value;
1943 }
1944 break;
1945
1946 /* Category: ATTRIBUTE, thread only */
1947
1948 case TASK_POLICY_PIDBIND_BG:
1949 assert(category == TASK_POLICY_ATTRIBUTE);
1950 requested.thrp_pidbind_bg = value;
1951 break;
1952
1953 case TASK_POLICY_LATENCY_QOS:
1954 assert(category == TASK_POLICY_ATTRIBUTE);
1955 requested.thrp_latency_qos = value;
1956 break;
1957
1958 case TASK_POLICY_THROUGH_QOS:
1959 assert(category == TASK_POLICY_ATTRIBUTE);
1960 requested.thrp_through_qos = value;
1961 break;
1962
1963 case TASK_POLICY_QOS_OVERRIDE:
1964 assert(category == TASK_POLICY_ATTRIBUTE);
1965 requested.thrp_qos_override = value;
1966 pend_token->tpt_update_turnstile = 1;
1967 break;
1968
1969 case TASK_POLICY_QOS_AND_RELPRIO:
1970 assert(category == TASK_POLICY_ATTRIBUTE);
1971 requested.thrp_qos = value;
1972 requested.thrp_qos_relprio = value2;
1973 pend_token->tpt_update_turnstile = 1;
1974 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1975 break;
1976
1977 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1978 assert(category == TASK_POLICY_ATTRIBUTE);
1979 requested.thrp_qos_workq_override = value;
1980 pend_token->tpt_update_turnstile = 1;
1981 break;
1982
1983 case TASK_POLICY_QOS_PROMOTE:
1984 assert(category == TASK_POLICY_ATTRIBUTE);
1985 requested.thrp_qos_promote = value;
1986 break;
1987
1988 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
1989 assert(category == TASK_POLICY_ATTRIBUTE);
1990 requested.thrp_qos_kevent_override = value;
1991 pend_token->tpt_update_turnstile = 1;
1992 break;
1993
1994 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
1995 assert(category == TASK_POLICY_ATTRIBUTE);
1996 requested.thrp_qos_wlsvc_override = value;
1997 pend_token->tpt_update_turnstile = 1;
1998 break;
1999
2000 case TASK_POLICY_TERMINATED:
2001 assert(category == TASK_POLICY_ATTRIBUTE);
2002 requested.thrp_terminated = value;
2003 break;
2004
2005 default:
2006 panic("unknown task policy: %d %d %d", category, flavor, value);
2007 break;
2008 }
2009
2010 thread->requested_policy = requested;
2011 }
2012
2013 /*
2014 * Gets what you set. Effective values may be different.
2015 * Precondition: No locks are held
2016 */
2017 int
2018 proc_get_thread_policy(thread_t thread,
2019 int category,
2020 int flavor)
2021 {
2022 int value = 0;
2023 thread_mtx_lock(thread);
2024 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2025 thread_mtx_unlock(thread);
2026 return value;
2027 }
2028
2029 static int
2030 proc_get_thread_policy_locked(thread_t thread,
2031 int category,
2032 int flavor,
2033 int* value2)
2034 {
2035 int value = 0;
2036
2037 spl_t s = splsched();
2038 thread_lock(thread);
2039
2040 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2041
2042 thread_unlock(thread);
2043 splx(s);
2044
2045 return value;
2046 }
2047
2048 /*
2049 * Gets what you set. Effective values may be different.
2050 */
2051 static int
2052 thread_get_requested_policy_spinlocked(thread_t thread,
2053 int category,
2054 int flavor,
2055 int* value2)
2056 {
2057 int value = 0;
2058
2059 struct thread_requested_policy requested = thread->requested_policy;
2060
2061 switch (flavor) {
2062 case TASK_POLICY_DARWIN_BG:
2063 if (category == TASK_POLICY_EXTERNAL) {
2064 value = requested.thrp_ext_darwinbg;
2065 } else {
2066 value = requested.thrp_int_darwinbg;
2067 }
2068 break;
2069 case TASK_POLICY_IOPOL:
2070 if (category == TASK_POLICY_EXTERNAL) {
2071 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2072 requested.thrp_ext_iopassive);
2073 } else {
2074 value = proc_tier_to_iopol(requested.thrp_int_iotier,
2075 requested.thrp_int_iopassive);
2076 }
2077 break;
2078 case TASK_POLICY_IO:
2079 if (category == TASK_POLICY_EXTERNAL) {
2080 value = requested.thrp_ext_iotier;
2081 } else {
2082 value = requested.thrp_int_iotier;
2083 }
2084 break;
2085 case TASK_POLICY_PASSIVE_IO:
2086 if (category == TASK_POLICY_EXTERNAL) {
2087 value = requested.thrp_ext_iopassive;
2088 } else {
2089 value = requested.thrp_int_iopassive;
2090 }
2091 break;
2092 case TASK_POLICY_QOS:
2093 assert(category == TASK_POLICY_ATTRIBUTE);
2094 value = requested.thrp_qos;
2095 break;
2096 case TASK_POLICY_QOS_OVERRIDE:
2097 assert(category == TASK_POLICY_ATTRIBUTE);
2098 value = requested.thrp_qos_override;
2099 break;
2100 case TASK_POLICY_LATENCY_QOS:
2101 assert(category == TASK_POLICY_ATTRIBUTE);
2102 value = requested.thrp_latency_qos;
2103 break;
2104 case TASK_POLICY_THROUGH_QOS:
2105 assert(category == TASK_POLICY_ATTRIBUTE);
2106 value = requested.thrp_through_qos;
2107 break;
2108 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2109 assert(category == TASK_POLICY_ATTRIBUTE);
2110 value = requested.thrp_qos_workq_override;
2111 break;
2112 case TASK_POLICY_QOS_AND_RELPRIO:
2113 assert(category == TASK_POLICY_ATTRIBUTE);
2114 assert(value2 != NULL);
2115 value = requested.thrp_qos;
2116 *value2 = requested.thrp_qos_relprio;
2117 break;
2118 case TASK_POLICY_QOS_PROMOTE:
2119 assert(category == TASK_POLICY_ATTRIBUTE);
2120 value = requested.thrp_qos_promote;
2121 break;
2122 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2123 assert(category == TASK_POLICY_ATTRIBUTE);
2124 value = requested.thrp_qos_kevent_override;
2125 break;
2126 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2127 assert(category == TASK_POLICY_ATTRIBUTE);
2128 value = requested.thrp_qos_wlsvc_override;
2129 break;
2130 case TASK_POLICY_TERMINATED:
2131 assert(category == TASK_POLICY_ATTRIBUTE);
2132 value = requested.thrp_terminated;
2133 break;
2134
2135 default:
2136 panic("unknown policy_flavor %d", flavor);
2137 break;
2138 }
2139
2140 return value;
2141 }
2142
2143 /*
2144 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2145 *
2146 * NOTE: This accessor does not take the task or thread lock.
2147 * Notifications of state updates need to be externally synchronized with state queries.
2148 * This routine *MUST* remain interrupt safe, as it is potentially invoked
2149 * within the context of a timer interrupt.
2150 *
2151 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2152 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2153 * I don't think that cost is worth not having the right answer.
2154 */
2155 int
2156 proc_get_effective_thread_policy(thread_t thread,
2157 int flavor)
2158 {
2159 int value = 0;
2160
2161 switch (flavor) {
2162 case TASK_POLICY_DARWIN_BG:
2163 /*
2164 * This call is used within the timer layer, as well as
2165 * prioritizing requests to the graphics system.
2166 * It also informs SFI and originator-bg-state.
2167 * Returns 1 for background mode, 0 for normal mode
2168 */
2169
2170 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2171 break;
2172 case TASK_POLICY_IO:
2173 /*
2174 * The I/O system calls here to find out what throttling tier to apply to an operation.
2175 * Returns THROTTLE_LEVEL_* values
2176 */
2177 value = thread->effective_policy.thep_io_tier;
2178 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2179 value = MIN(value, thread->iotier_override);
2180 }
2181 break;
2182 case TASK_POLICY_PASSIVE_IO:
2183 /*
2184 * The I/O system calls here to find out whether an operation should be passive.
2185 * (i.e. not cause operations with lower throttle tiers to be throttled)
2186 * Returns 1 for passive mode, 0 for normal mode
2187 *
2188 * If an override is causing IO to go into a lower tier, we also set
2189 * the passive bit so that a thread doesn't end up stuck in its own throttle
2190 * window when the override goes away.
2191 */
2192 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2193 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2194 thread->iotier_override < thread->effective_policy.thep_io_tier) {
2195 value = 1;
2196 }
2197 break;
2198 case TASK_POLICY_ALL_SOCKETS_BG:
2199 /*
2200 * do_background_socket() calls this to determine whether
2201 * it should change the thread's sockets
2202 * Returns 1 for background mode, 0 for normal mode
2203 * This consults both thread and task so un-DBGing a thread while the task is BG
2204 * doesn't get you out of the network throttle.
2205 */
2206 value = (thread->effective_policy.thep_all_sockets_bg ||
2207 thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2208 break;
2209 case TASK_POLICY_NEW_SOCKETS_BG:
2210 /*
2211 * socreate() calls this to determine if it should mark a new socket as background
2212 * Returns 1 for background mode, 0 for normal mode
2213 */
2214 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2215 break;
2216 case TASK_POLICY_LATENCY_QOS:
2217 /*
2218 * timer arming calls into here to find out the timer coalescing level
2219 * Returns a latency QoS tier (0-6)
2220 */
2221 value = thread->effective_policy.thep_latency_qos;
2222 break;
2223 case TASK_POLICY_THROUGH_QOS:
2224 /*
2225 * This value is passed into the urgency callout from the scheduler
2226 * to the performance management subsystem.
2227 *
2228 * Returns a throughput QoS tier (0-6)
2229 */
2230 value = thread->effective_policy.thep_through_qos;
2231 break;
2232 case TASK_POLICY_QOS:
2233 /*
2234 * This is communicated to the performance management layer and SFI.
2235 *
2236 * Returns a QoS policy tier
2237 */
2238 value = thread->effective_policy.thep_qos;
2239 break;
2240 default:
2241 panic("unknown thread policy flavor %d", flavor);
2242 break;
2243 }
2244
2245 return value;
2246 }
2247
2248
2249 /*
2250 * (integer_t) casts limit the number of bits we can fit here
2251 * this interface is deprecated and replaced by the _EXT struct ?
2252 */
2253 static void
2254 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2255 {
2256 uint64_t bits = 0;
2257 struct thread_requested_policy requested = thread->requested_policy;
2258
2259 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2260 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2261 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2262 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2263 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2264 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2265
2266 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2267 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2268
2269 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2270
2271 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2272 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2273
2274 info->requested = (integer_t) bits;
2275 bits = 0;
2276
2277 struct thread_effective_policy effective = thread->effective_policy;
2278
2279 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2280
2281 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2282 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2283 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2284 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2285
2286 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2287
2288 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2289 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2290
2291 info->effective = (integer_t)bits;
2292 bits = 0;
2293
2294 info->pending = 0;
2295 }
2296
2297 /*
2298 * Sneakily trace either the task and thread requested
2299 * or just the thread requested, depending on if we have enough room.
2300 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2301 *
2302 * LP32 LP64
2303 * threquested_0(thread) thread[0] task[0]
2304 * threquested_1(thread) thread[1] thread[0]
2305 *
2306 */
2307
2308 uintptr_t
2309 threquested_0(thread_t thread)
2310 {
2311 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2312
2313 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2314
2315 return raw[0];
2316 }
2317
2318 uintptr_t
2319 threquested_1(thread_t thread)
2320 {
2321 #if defined __LP64__
2322 return *(uintptr_t*)&thread->task->requested_policy;
2323 #else
2324 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2325 return raw[1];
2326 #endif
2327 }
2328
2329 uintptr_t
2330 theffective_0(thread_t thread)
2331 {
2332 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2333
2334 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2335 return raw[0];
2336 }
2337
2338 uintptr_t
2339 theffective_1(thread_t thread)
2340 {
2341 #if defined __LP64__
2342 return *(uintptr_t*)&thread->task->effective_policy;
2343 #else
2344 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2345 return raw[1];
2346 #endif
2347 }
2348
2349
2350 /*
2351 * Set an override on the thread which is consulted with a
2352 * higher priority than the task/thread policy. This should
2353 * only be set for temporary grants until the thread
2354 * returns to the userspace boundary
2355 *
2356 * We use atomic operations to swap in the override, with
2357 * the assumption that the thread itself can
2358 * read the override and clear it on return to userspace.
2359 *
2360 * No locking is performed, since it is acceptable to see
2361 * a stale override for one loop through throttle_lowpri_io().
2362 * However a thread reference must be held on the thread.
2363 */
2364
2365 void
2366 set_thread_iotier_override(thread_t thread, int policy)
2367 {
2368 int current_override;
2369
2370 /* Let most aggressive I/O policy win until user boundary */
2371 do {
2372 current_override = thread->iotier_override;
2373
2374 if (current_override != THROTTLE_LEVEL_NONE) {
2375 policy = MIN(current_override, policy);
2376 }
2377
2378 if (current_override == policy) {
2379 /* no effective change */
2380 return;
2381 }
2382 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2383
2384 /*
2385 * Since the thread may be currently throttled,
2386 * re-evaluate tiers and potentially break out
2387 * of an msleep
2388 */
2389 rethrottle_thread(thread->uthread);
2390 }
2391
2392 /*
2393 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2394 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2395 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2396 * priority thread. In these cases, we attempt to propagate the priority token, as long
2397 * as the subsystem informs us of the relationships between the threads. The userspace
2398 * synchronization subsystem should maintain the information of owner->resource and
2399 * resource->waiters itself.
2400 */
2401
2402 /*
2403 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2404 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2405 * to be handled specially in the future, but for now it's fine to slam
2406 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2407 */
2408 static void
2409 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2410 {
2411 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2412 /* Map all input resource/type to a single one */
2413 *resource = USER_ADDR_NULL;
2414 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2415 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2416 /* no transform */
2417 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2418 /* Map all mutex overrides to a single one, to avoid memory overhead */
2419 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2420 *resource = USER_ADDR_NULL;
2421 }
2422 }
2423 }
2424
2425 /* This helper routine finds an existing override if known. Locking should be done by caller */
2426 static struct thread_qos_override *
2427 find_qos_override(thread_t thread,
2428 user_addr_t resource,
2429 int resource_type)
2430 {
2431 struct thread_qos_override *override;
2432
2433 override = thread->overrides;
2434 while (override) {
2435 if (override->override_resource == resource &&
2436 override->override_resource_type == resource_type) {
2437 return override;
2438 }
2439
2440 override = override->override_next;
2441 }
2442
2443 return NULL;
2444 }
2445
2446 static void
2447 find_and_decrement_qos_override(thread_t thread,
2448 user_addr_t resource,
2449 int resource_type,
2450 boolean_t reset,
2451 struct thread_qos_override **free_override_list)
2452 {
2453 struct thread_qos_override *override, *override_prev;
2454
2455 override_prev = NULL;
2456 override = thread->overrides;
2457 while (override) {
2458 struct thread_qos_override *override_next = override->override_next;
2459
2460 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2461 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2462 if (reset) {
2463 override->override_contended_resource_count = 0;
2464 } else {
2465 override->override_contended_resource_count--;
2466 }
2467
2468 if (override->override_contended_resource_count == 0) {
2469 if (override_prev == NULL) {
2470 thread->overrides = override_next;
2471 } else {
2472 override_prev->override_next = override_next;
2473 }
2474
2475 /* Add to out-param for later zfree */
2476 override->override_next = *free_override_list;
2477 *free_override_list = override;
2478 } else {
2479 override_prev = override;
2480 }
2481
2482 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2483 return;
2484 }
2485 } else {
2486 override_prev = override;
2487 }
2488
2489 override = override_next;
2490 }
2491 }
2492
2493 /* This helper recalculates the current requested override using the policy selected at boot */
2494 static int
2495 calculate_requested_qos_override(thread_t thread)
2496 {
2497 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2498 return THREAD_QOS_UNSPECIFIED;
2499 }
2500
2501 /* iterate over all overrides and calculate MAX */
2502 struct thread_qos_override *override;
2503 int qos_override = THREAD_QOS_UNSPECIFIED;
2504
2505 override = thread->overrides;
2506 while (override) {
2507 qos_override = MAX(qos_override, override->override_qos);
2508 override = override->override_next;
2509 }
2510
2511 return qos_override;
2512 }
2513
2514 /*
2515 * Returns:
2516 * - 0 on success
2517 * - EINVAL if some invalid input was passed
2518 */
2519 static int
2520 proc_thread_qos_add_override_internal(thread_t thread,
2521 int override_qos,
2522 boolean_t first_override_for_resource,
2523 user_addr_t resource,
2524 int resource_type)
2525 {
2526 struct task_pend_token pend_token = {};
2527 int rc = 0;
2528
2529 thread_mtx_lock(thread);
2530
2531 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2532 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2533
2534 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2535 uint64_t, thread->requested_policy.thrp_qos,
2536 uint64_t, thread->effective_policy.thep_qos,
2537 int, override_qos, boolean_t, first_override_for_resource);
2538
2539 struct thread_qos_override *override;
2540 struct thread_qos_override *override_new = NULL;
2541 int new_qos_override, prev_qos_override;
2542 int new_effective_qos;
2543
2544 canonicalize_resource_and_type(&resource, &resource_type);
2545
2546 override = find_qos_override(thread, resource, resource_type);
2547 if (first_override_for_resource && !override) {
2548 /* We need to allocate a new object. Drop the thread lock and
2549 * recheck afterwards in case someone else added the override
2550 */
2551 thread_mtx_unlock(thread);
2552 override_new = zalloc(thread_qos_override_zone);
2553 thread_mtx_lock(thread);
2554 override = find_qos_override(thread, resource, resource_type);
2555 }
2556 if (first_override_for_resource && override) {
2557 /* Someone else already allocated while the thread lock was dropped */
2558 override->override_contended_resource_count++;
2559 } else if (!override && override_new) {
2560 override = override_new;
2561 override_new = NULL;
2562 override->override_next = thread->overrides;
2563 /* since first_override_for_resource was TRUE */
2564 override->override_contended_resource_count = 1;
2565 override->override_resource = resource;
2566 override->override_resource_type = (int16_t)resource_type;
2567 override->override_qos = THREAD_QOS_UNSPECIFIED;
2568 thread->overrides = override;
2569 }
2570
2571 if (override) {
2572 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2573 override->override_qos = (int16_t)override_qos;
2574 } else {
2575 override->override_qos = MAX(override->override_qos, (int16_t)override_qos);
2576 }
2577 }
2578
2579 /* Determine how to combine the various overrides into a single current
2580 * requested override
2581 */
2582 new_qos_override = calculate_requested_qos_override(thread);
2583
2584 prev_qos_override = proc_get_thread_policy_locked(thread,
2585 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2586
2587 if (new_qos_override != prev_qos_override) {
2588 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2589 TASK_POLICY_QOS_OVERRIDE,
2590 new_qos_override, 0, &pend_token);
2591 }
2592
2593 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2594
2595 thread_mtx_unlock(thread);
2596
2597 thread_policy_update_complete_unlocked(thread, &pend_token);
2598
2599 if (override_new) {
2600 zfree(thread_qos_override_zone, override_new);
2601 }
2602
2603 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2604 int, new_qos_override, int, new_effective_qos, int, rc);
2605
2606 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2607 new_qos_override, resource, resource_type, 0, 0);
2608
2609 return rc;
2610 }
2611
2612 int
2613 proc_thread_qos_add_override(task_t task,
2614 thread_t thread,
2615 uint64_t tid,
2616 int override_qos,
2617 boolean_t first_override_for_resource,
2618 user_addr_t resource,
2619 int resource_type)
2620 {
2621 boolean_t has_thread_reference = FALSE;
2622 int rc = 0;
2623
2624 if (thread == THREAD_NULL) {
2625 thread = task_findtid(task, tid);
2626 /* returns referenced thread */
2627
2628 if (thread == THREAD_NULL) {
2629 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2630 tid, 0, 0xdead, 0, 0);
2631 return ESRCH;
2632 }
2633 has_thread_reference = TRUE;
2634 } else {
2635 assert(thread->task == task);
2636 }
2637 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2638 first_override_for_resource, resource, resource_type);
2639 if (has_thread_reference) {
2640 thread_deallocate(thread);
2641 }
2642
2643 return rc;
2644 }
2645
2646 static void
2647 proc_thread_qos_remove_override_internal(thread_t thread,
2648 user_addr_t resource,
2649 int resource_type,
2650 boolean_t reset)
2651 {
2652 struct task_pend_token pend_token = {};
2653
2654 struct thread_qos_override *deferred_free_override_list = NULL;
2655 int new_qos_override, prev_qos_override, new_effective_qos;
2656
2657 thread_mtx_lock(thread);
2658
2659 canonicalize_resource_and_type(&resource, &resource_type);
2660
2661 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2662
2663 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2664 thread_tid(thread), resource, reset, 0, 0);
2665
2666 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2667 uint64_t, thread->requested_policy.thrp_qos,
2668 uint64_t, thread->effective_policy.thep_qos);
2669
2670 /* Determine how to combine the various overrides into a single current requested override */
2671 new_qos_override = calculate_requested_qos_override(thread);
2672
2673 spl_t s = splsched();
2674 thread_lock(thread);
2675
2676 /*
2677 * The override chain and therefore the value of the current override is locked with thread mutex,
2678 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2679 * This means you can't change the current override from a spinlock-only setter.
2680 */
2681 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2682
2683 if (new_qos_override != prev_qos_override) {
2684 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2685 }
2686
2687 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2688
2689 thread_unlock(thread);
2690 splx(s);
2691
2692 thread_mtx_unlock(thread);
2693
2694 thread_policy_update_complete_unlocked(thread, &pend_token);
2695
2696 while (deferred_free_override_list) {
2697 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2698
2699 zfree(thread_qos_override_zone, deferred_free_override_list);
2700 deferred_free_override_list = override_next;
2701 }
2702
2703 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2704 int, new_qos_override, int, new_effective_qos);
2705
2706 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2707 thread_tid(thread), 0, 0, 0, 0);
2708 }
2709
2710 int
2711 proc_thread_qos_remove_override(task_t task,
2712 thread_t thread,
2713 uint64_t tid,
2714 user_addr_t resource,
2715 int resource_type)
2716 {
2717 boolean_t has_thread_reference = FALSE;
2718
2719 if (thread == THREAD_NULL) {
2720 thread = task_findtid(task, tid);
2721 /* returns referenced thread */
2722
2723 if (thread == THREAD_NULL) {
2724 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2725 tid, 0, 0xdead, 0, 0);
2726 return ESRCH;
2727 }
2728 has_thread_reference = TRUE;
2729 } else {
2730 assert(task == thread->task);
2731 }
2732
2733 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2734
2735 if (has_thread_reference) {
2736 thread_deallocate(thread);
2737 }
2738
2739 return 0;
2740 }
2741
2742 /* Deallocate before thread termination */
2743 void
2744 proc_thread_qos_deallocate(thread_t thread)
2745 {
2746 /* This thread must have no more IPC overrides. */
2747 assert(thread->kevent_overrides == 0);
2748 assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2749 assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2750
2751 /*
2752 * Clear out any lingering override objects.
2753 */
2754 struct thread_qos_override *override;
2755
2756 thread_mtx_lock(thread);
2757 override = thread->overrides;
2758 thread->overrides = NULL;
2759 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2760 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2761 thread_mtx_unlock(thread);
2762
2763 while (override) {
2764 struct thread_qos_override *override_next = override->override_next;
2765
2766 zfree(thread_qos_override_zone, override);
2767 override = override_next;
2768 }
2769 }
2770
2771 /*
2772 * Set up the primordial thread's QoS
2773 */
2774 void
2775 task_set_main_thread_qos(task_t task, thread_t thread)
2776 {
2777 struct task_pend_token pend_token = {};
2778
2779 assert(thread->task == task);
2780
2781 thread_mtx_lock(thread);
2782
2783 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2784 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2785 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2786 thread->requested_policy.thrp_qos, 0);
2787
2788 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2789
2790 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2791 primordial_qos, 0, &pend_token);
2792
2793 thread_mtx_unlock(thread);
2794
2795 thread_policy_update_complete_unlocked(thread, &pend_token);
2796
2797 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2798 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2799 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2800 primordial_qos, 0);
2801 }
2802
2803 /*
2804 * KPI for pthread kext
2805 *
2806 * Return a good guess at what the initial manager QoS will be
2807 * Dispatch can override this in userspace if it so chooses
2808 */
2809 thread_qos_t
2810 task_get_default_manager_qos(task_t task)
2811 {
2812 thread_qos_t primordial_qos = task_compute_main_thread_qos(task);
2813
2814 if (primordial_qos == THREAD_QOS_LEGACY) {
2815 primordial_qos = THREAD_QOS_USER_INITIATED;
2816 }
2817
2818 return primordial_qos;
2819 }
2820
2821 /*
2822 * Check if the kernel promotion on thread has changed
2823 * and apply it.
2824 *
2825 * thread locked on entry and exit
2826 */
2827 boolean_t
2828 thread_recompute_kernel_promotion_locked(thread_t thread)
2829 {
2830 boolean_t needs_update = FALSE;
2831 uint8_t kern_promotion_schedpri = (uint8_t)thread_get_inheritor_turnstile_sched_priority(thread);
2832
2833 /*
2834 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2835 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2836 * and propagates the priority through the chain with the same cap, because as of now it does
2837 * not differenciate on the kernel primitive.
2838 *
2839 * If this assumption will change with the adoption of a kernel primitive that does not
2840 * cap the when adding/propagating,
2841 * then here is the place to put the generic cap for all kernel primitives
2842 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2843 */
2844 assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2845
2846 if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2847 KDBG(MACHDBG_CODE(
2848 DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
2849 thread_tid(thread),
2850 kern_promotion_schedpri,
2851 thread->kern_promotion_schedpri);
2852
2853 needs_update = TRUE;
2854 thread->kern_promotion_schedpri = kern_promotion_schedpri;
2855 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
2856 }
2857
2858 return needs_update;
2859 }
2860
2861 /*
2862 * Check if the user promotion on thread has changed
2863 * and apply it.
2864 *
2865 * thread locked on entry, might drop the thread lock
2866 * and reacquire it.
2867 */
2868 boolean_t
2869 thread_recompute_user_promotion_locked(thread_t thread)
2870 {
2871 boolean_t needs_update = FALSE;
2872 struct task_pend_token pend_token = {};
2873 uint8_t user_promotion_basepri = MIN((uint8_t)thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
2874 int old_base_pri = thread->base_pri;
2875 thread_qos_t qos_promotion;
2876
2877 /* Check if user promotion has changed */
2878 if (thread->user_promotion_basepri == user_promotion_basepri) {
2879 return needs_update;
2880 } else {
2881 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2882 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2883 thread_tid(thread),
2884 user_promotion_basepri,
2885 thread->user_promotion_basepri,
2886 0, 0);
2887 KDBG(MACHDBG_CODE(
2888 DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
2889 thread_tid(thread),
2890 user_promotion_basepri,
2891 thread->user_promotion_basepri);
2892 }
2893
2894 /* Update the user promotion base pri */
2895 thread->user_promotion_basepri = user_promotion_basepri;
2896 pend_token.tpt_force_recompute_pri = 1;
2897
2898 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2899 qos_promotion = THREAD_QOS_UNSPECIFIED;
2900 } else {
2901 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2902 }
2903
2904 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2905 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2906
2907 if (thread_get_waiting_turnstile(thread) &&
2908 thread->base_pri != old_base_pri) {
2909 needs_update = TRUE;
2910 }
2911
2912 thread_unlock(thread);
2913
2914 thread_policy_update_complete_unlocked(thread, &pend_token);
2915
2916 thread_lock(thread);
2917
2918 return needs_update;
2919 }
2920
2921 /*
2922 * Convert the thread user promotion base pri to qos for threads in qos world.
2923 * For priority above UI qos, the qos would be set to UI.
2924 */
2925 thread_qos_t
2926 thread_user_promotion_qos_for_pri(int priority)
2927 {
2928 thread_qos_t qos;
2929 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2930 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2931 return qos;
2932 }
2933 }
2934 return THREAD_QOS_MAINTENANCE;
2935 }
2936
2937 /*
2938 * Set the thread's QoS Kevent override
2939 * Owned by the Kevent subsystem
2940 *
2941 * May be called with spinlocks held, but not spinlocks
2942 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2943 *
2944 * One 'add' must be balanced by one 'drop'.
2945 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2946 * Before the thread is deallocated, there must be 0 remaining overrides.
2947 */
2948 static void
2949 thread_kevent_override(thread_t thread,
2950 uint32_t qos_override,
2951 boolean_t is_new_override)
2952 {
2953 struct task_pend_token pend_token = {};
2954 boolean_t needs_update;
2955
2956 spl_t s = splsched();
2957 thread_lock(thread);
2958
2959 uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
2960
2961 assert(qos_override > THREAD_QOS_UNSPECIFIED);
2962 assert(qos_override < THREAD_QOS_LAST);
2963
2964 if (is_new_override) {
2965 if (thread->kevent_overrides++ == 0) {
2966 /* This add is the first override for this thread */
2967 assert(old_override == THREAD_QOS_UNSPECIFIED);
2968 } else {
2969 /* There are already other overrides in effect for this thread */
2970 assert(old_override > THREAD_QOS_UNSPECIFIED);
2971 }
2972 } else {
2973 /* There must be at least one override (the previous add call) in effect */
2974 assert(thread->kevent_overrides > 0);
2975 assert(old_override > THREAD_QOS_UNSPECIFIED);
2976 }
2977
2978 /*
2979 * We can't allow lowering if there are several IPC overrides because
2980 * the caller can't possibly know the whole truth
2981 */
2982 if (thread->kevent_overrides == 1) {
2983 needs_update = qos_override != old_override;
2984 } else {
2985 needs_update = qos_override > old_override;
2986 }
2987
2988 if (needs_update) {
2989 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2990 TASK_POLICY_QOS_KEVENT_OVERRIDE,
2991 qos_override, 0, &pend_token);
2992 assert(pend_token.tpt_update_sockets == 0);
2993 }
2994
2995 thread_unlock(thread);
2996 splx(s);
2997
2998 thread_policy_update_complete_unlocked(thread, &pend_token);
2999 }
3000
3001 void
3002 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
3003 {
3004 thread_kevent_override(thread, qos_override, TRUE);
3005 }
3006
3007 void
3008 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
3009 {
3010 thread_kevent_override(thread, qos_override, FALSE);
3011 }
3012
3013 void
3014 thread_drop_kevent_override(thread_t thread)
3015 {
3016 struct task_pend_token pend_token = {};
3017
3018 spl_t s = splsched();
3019 thread_lock(thread);
3020
3021 assert(thread->kevent_overrides > 0);
3022
3023 if (--thread->kevent_overrides == 0) {
3024 /*
3025 * There are no more overrides for this thread, so we should
3026 * clear out the saturated override value
3027 */
3028
3029 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3030 TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3031 0, &pend_token);
3032 }
3033
3034 thread_unlock(thread);
3035 splx(s);
3036
3037 thread_policy_update_complete_unlocked(thread, &pend_token);
3038 }
3039
3040 /*
3041 * Set the thread's QoS Workloop Servicer override
3042 * Owned by the Kevent subsystem
3043 *
3044 * May be called with spinlocks held, but not spinlocks
3045 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3046 *
3047 * One 'add' must be balanced by one 'drop'.
3048 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3049 * Before the thread is deallocated, there must be 0 remaining overrides.
3050 */
3051 static void
3052 thread_servicer_override(thread_t thread,
3053 uint32_t qos_override,
3054 boolean_t is_new_override)
3055 {
3056 struct task_pend_token pend_token = {};
3057
3058 spl_t s = splsched();
3059 thread_lock(thread);
3060
3061 if (is_new_override) {
3062 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3063 } else {
3064 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3065 }
3066
3067 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3068 TASK_POLICY_QOS_SERVICER_OVERRIDE,
3069 qos_override, 0, &pend_token);
3070
3071 thread_unlock(thread);
3072 splx(s);
3073
3074 assert(pend_token.tpt_update_sockets == 0);
3075 thread_policy_update_complete_unlocked(thread, &pend_token);
3076 }
3077
3078 void
3079 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3080 {
3081 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3082 assert(qos_override < THREAD_QOS_LAST);
3083
3084 thread_servicer_override(thread, qos_override, TRUE);
3085 }
3086
3087 void
3088 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3089 {
3090 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3091 assert(qos_override < THREAD_QOS_LAST);
3092
3093 thread_servicer_override(thread, qos_override, FALSE);
3094 }
3095
3096 void
3097 thread_drop_servicer_override(thread_t thread)
3098 {
3099 thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3100 }
3101
3102
3103 /* Get current requested qos / relpri, may be called from spinlock context */
3104 thread_qos_t
3105 thread_get_requested_qos(thread_t thread, int *relpri)
3106 {
3107 int relprio_value = 0;
3108 thread_qos_t qos;
3109
3110 qos = (thread_qos_t)proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3111 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3112 if (relpri) {
3113 *relpri = -relprio_value;
3114 }
3115 return qos;
3116 }
3117
3118 /*
3119 * This function will promote the thread priority
3120 * since exec could block other threads calling
3121 * proc_find on the proc. This boost must be removed
3122 * via call to thread_clear_exec_promotion.
3123 *
3124 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3125 */
3126 void
3127 thread_set_exec_promotion(thread_t thread)
3128 {
3129 spl_t s = splsched();
3130 thread_lock(thread);
3131
3132 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3133
3134 thread_unlock(thread);
3135 splx(s);
3136 }
3137
3138 /*
3139 * This function will clear the exec thread
3140 * promotion set on the thread by thread_set_exec_promotion.
3141 */
3142 void
3143 thread_clear_exec_promotion(thread_t thread)
3144 {
3145 spl_t s = splsched();
3146 thread_lock(thread);
3147
3148 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3149
3150 thread_unlock(thread);
3151 splx(s);
3152 }