]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/thread_policy.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / osfmk / kern / thread_policy.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <mach/task_policy.h>
37 #include <kern/sfi.h>
38 #include <kern/policy_internal.h>
39 #include <sys/errno.h>
40 #include <sys/ulock.h>
41
42 #include <mach/machine/sdt.h>
43
44 #ifdef MACH_BSD
45 extern int proc_selfpid(void);
46 extern char * proc_name_address(void *p);
47 extern void rethrottle_thread(void * uthread);
48 #endif /* MACH_BSD */
49
50 #define QOS_EXTRACT(q) ((q) & 0xff)
51
52 uint32_t qos_override_mode;
53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 3
57
58 extern zone_t thread_qos_override_zone;
59
60 static void
61 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset);
62
63 /*
64 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
65 * to threads that don't have a QoS class set.
66 */
67 const qos_policy_params_t thread_qos_policy_params = {
68 /*
69 * This table defines the starting base priority of the thread,
70 * which will be modified by the thread importance and the task max priority
71 * before being applied.
72 */
73 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
74 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
75 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
76 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
77 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
78 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
79 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
80
81 /*
82 * This table defines the highest IO priority that a thread marked with this
83 * QoS class can have.
84 */
85 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
86 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
87 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
88 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
89 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
90 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
91 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
92
93 /*
94 * This table defines the highest QoS level that
95 * a thread marked with this QoS class can have.
96 */
97
98 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
99 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
100 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
101 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
102 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
103 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
104 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
105
106 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
107 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
108 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
109 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
110 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
111 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
112 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
113 };
114
115 static void
116 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
117
118 static int
119 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
120
121 static void
122 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
123
124 static void
125 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
126
127 static void
128 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
129
130 static void
131 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
132
133 static int
134 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
135
136 static int
137 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
138
139 static void
140 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
141
142 static void
143 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
144
145 void
146 thread_policy_init(void)
147 {
148 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
149 printf("QOS override mode: 0x%08x\n", qos_override_mode);
150 } else {
151 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
152 }
153 }
154
155 boolean_t
156 thread_has_qos_policy(thread_t thread)
157 {
158 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
159 }
160
161
162 static void
163 thread_remove_qos_policy_locked(thread_t thread,
164 task_pend_token_t pend_token)
165 {
166 __unused int prev_qos = thread->requested_policy.thrp_qos;
167
168 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169
170 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173
174 kern_return_t
175 thread_remove_qos_policy(thread_t thread)
176 {
177 struct task_pend_token pend_token = {};
178
179 thread_mtx_lock(thread);
180 if (!thread->active) {
181 thread_mtx_unlock(thread);
182 return KERN_TERMINATED;
183 }
184
185 thread_remove_qos_policy_locked(thread, &pend_token);
186
187 thread_mtx_unlock(thread);
188
189 thread_policy_update_complete_unlocked(thread, &pend_token);
190
191 return KERN_SUCCESS;
192 }
193
194
195 boolean_t
196 thread_is_static_param(thread_t thread)
197 {
198 if (thread->static_param) {
199 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 return TRUE;
201 }
202 return FALSE;
203 }
204
205 /*
206 * Relative priorities can range between 0REL and -15REL. These
207 * map to QoS-specific ranges, to create non-overlapping priority
208 * ranges.
209 */
210 static int
211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 int next_lower_qos;
214
215 /* Fast path, since no validation or scaling is needed */
216 if (qos_relprio == 0) {
217 return 0;
218 }
219
220 switch (qos) {
221 case THREAD_QOS_USER_INTERACTIVE:
222 next_lower_qos = THREAD_QOS_USER_INITIATED;
223 break;
224 case THREAD_QOS_USER_INITIATED:
225 next_lower_qos = THREAD_QOS_LEGACY;
226 break;
227 case THREAD_QOS_LEGACY:
228 next_lower_qos = THREAD_QOS_UTILITY;
229 break;
230 case THREAD_QOS_UTILITY:
231 next_lower_qos = THREAD_QOS_BACKGROUND;
232 break;
233 case THREAD_QOS_MAINTENANCE:
234 case THREAD_QOS_BACKGROUND:
235 next_lower_qos = 0;
236 break;
237 default:
238 panic("Unrecognized QoS %d", qos);
239 return 0;
240 }
241
242 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
243 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
244
245 /*
246 * We now have the valid range that the scaled relative priority can map to. Note
247 * that the lower bound is exclusive, but the upper bound is inclusive. If the
248 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
249 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
250 * remainder.
251 */
252 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
253
254 return scaled_relprio;
255 }
256
257 /*
258 * flag set by -qos-policy-allow boot-arg to allow
259 * testing thread qos policy from userspace
260 */
261 boolean_t allow_qos_policy_set = FALSE;
262
263 kern_return_t
264 thread_policy_set(
265 thread_t thread,
266 thread_policy_flavor_t flavor,
267 thread_policy_t policy_info,
268 mach_msg_type_number_t count)
269 {
270 thread_qos_policy_data_t req_qos;
271 kern_return_t kr;
272
273 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
274
275 if (thread == THREAD_NULL) {
276 return KERN_INVALID_ARGUMENT;
277 }
278
279 if (allow_qos_policy_set == FALSE) {
280 if (thread_is_static_param(thread)) {
281 return KERN_POLICY_STATIC;
282 }
283
284 if (flavor == THREAD_QOS_POLICY) {
285 return KERN_INVALID_ARGUMENT;
286 }
287 }
288
289 /* Threads without static_param set reset their QoS when other policies are applied. */
290 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
291 /* Store the existing tier, if we fail this call it is used to reset back. */
292 req_qos.qos_tier = thread->requested_policy.thrp_qos;
293 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
294
295 kr = thread_remove_qos_policy(thread);
296 if (kr != KERN_SUCCESS) {
297 return kr;
298 }
299 }
300
301 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
302
303 /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
304 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
305 if (kr != KERN_SUCCESS) {
306 /* Reset back to our original tier as the set failed. */
307 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
308 }
309 }
310
311 return kr;
312 }
313
314 kern_return_t
315 thread_policy_set_internal(
316 thread_t thread,
317 thread_policy_flavor_t flavor,
318 thread_policy_t policy_info,
319 mach_msg_type_number_t count)
320 {
321 kern_return_t result = KERN_SUCCESS;
322 struct task_pend_token pend_token = {};
323
324 thread_mtx_lock(thread);
325 if (!thread->active) {
326 thread_mtx_unlock(thread);
327
328 return KERN_TERMINATED;
329 }
330
331 switch (flavor) {
332 case THREAD_EXTENDED_POLICY:
333 {
334 boolean_t timeshare = TRUE;
335
336 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
337 thread_extended_policy_t info;
338
339 info = (thread_extended_policy_t)policy_info;
340 timeshare = info->timeshare;
341 }
342
343 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
344
345 spl_t s = splsched();
346 thread_lock(thread);
347
348 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
349
350 thread_unlock(thread);
351 splx(s);
352
353 pend_token.tpt_update_thread_sfi = 1;
354
355 break;
356 }
357
358 case THREAD_TIME_CONSTRAINT_POLICY:
359 {
360 thread_time_constraint_policy_t info;
361
362 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
363 result = KERN_INVALID_ARGUMENT;
364 break;
365 }
366
367 info = (thread_time_constraint_policy_t)policy_info;
368 if (info->constraint < info->computation ||
369 info->computation > max_rt_quantum ||
370 info->computation < min_rt_quantum) {
371 result = KERN_INVALID_ARGUMENT;
372 break;
373 }
374
375 spl_t s = splsched();
376 thread_lock(thread);
377
378 thread->realtime.period = info->period;
379 thread->realtime.computation = info->computation;
380 thread->realtime.constraint = info->constraint;
381 thread->realtime.preemptible = info->preemptible;
382
383 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
384
385 thread_unlock(thread);
386 splx(s);
387
388 pend_token.tpt_update_thread_sfi = 1;
389
390 break;
391 }
392
393 case THREAD_PRECEDENCE_POLICY:
394 {
395 thread_precedence_policy_t info;
396
397 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
398 result = KERN_INVALID_ARGUMENT;
399 break;
400 }
401 info = (thread_precedence_policy_t)policy_info;
402
403 spl_t s = splsched();
404 thread_lock(thread);
405
406 thread->importance = info->importance;
407
408 thread_recompute_priority(thread);
409
410 thread_unlock(thread);
411 splx(s);
412
413 break;
414 }
415
416 case THREAD_AFFINITY_POLICY:
417 {
418 thread_affinity_policy_t info;
419
420 if (!thread_affinity_is_supported()) {
421 result = KERN_NOT_SUPPORTED;
422 break;
423 }
424 if (count < THREAD_AFFINITY_POLICY_COUNT) {
425 result = KERN_INVALID_ARGUMENT;
426 break;
427 }
428
429 info = (thread_affinity_policy_t) policy_info;
430 /*
431 * Unlock the thread mutex here and
432 * return directly after calling thread_affinity_set().
433 * This is necessary for correct lock ordering because
434 * thread_affinity_set() takes the task lock.
435 */
436 thread_mtx_unlock(thread);
437 return thread_affinity_set(thread, info->affinity_tag);
438 }
439
440 #if CONFIG_EMBEDDED
441 case THREAD_BACKGROUND_POLICY:
442 {
443 thread_background_policy_t info;
444
445 if (count < THREAD_BACKGROUND_POLICY_COUNT) {
446 result = KERN_INVALID_ARGUMENT;
447 break;
448 }
449
450 if (thread->task != current_task()) {
451 result = KERN_PROTECTION_FAILURE;
452 break;
453 }
454
455 info = (thread_background_policy_t) policy_info;
456
457 int enable;
458
459 if (info->priority == THREAD_BACKGROUND_POLICY_DARWIN_BG) {
460 enable = TASK_POLICY_ENABLE;
461 } else {
462 enable = TASK_POLICY_DISABLE;
463 }
464
465 int category = (current_thread() == thread) ? TASK_POLICY_INTERNAL : TASK_POLICY_EXTERNAL;
466
467 proc_set_thread_policy_locked(thread, category, TASK_POLICY_DARWIN_BG, enable, 0, &pend_token);
468
469 break;
470 }
471 #endif /* CONFIG_EMBEDDED */
472
473 case THREAD_THROUGHPUT_QOS_POLICY:
474 {
475 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
476 thread_throughput_qos_t tqos;
477
478 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
479 result = KERN_INVALID_ARGUMENT;
480 break;
481 }
482
483 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS) {
484 break;
485 }
486
487 tqos = qos_extract(info->thread_throughput_qos_tier);
488
489 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
490 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
491
492 break;
493 }
494
495 case THREAD_LATENCY_QOS_POLICY:
496 {
497 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
498 thread_latency_qos_t lqos;
499
500 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
501 result = KERN_INVALID_ARGUMENT;
502 break;
503 }
504
505 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS) {
506 break;
507 }
508
509 lqos = qos_extract(info->thread_latency_qos_tier);
510
511 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
512 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
513
514 break;
515 }
516
517 case THREAD_QOS_POLICY:
518 {
519 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
520
521 if (count < THREAD_QOS_POLICY_COUNT) {
522 result = KERN_INVALID_ARGUMENT;
523 break;
524 }
525
526 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
527 result = KERN_INVALID_ARGUMENT;
528 break;
529 }
530
531 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
532 result = KERN_INVALID_ARGUMENT;
533 break;
534 }
535
536 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
537 result = KERN_INVALID_ARGUMENT;
538 break;
539 }
540
541 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
542 info->qos_tier, -info->tier_importance, &pend_token);
543
544 break;
545 }
546
547 default:
548 result = KERN_INVALID_ARGUMENT;
549 break;
550 }
551
552 thread_mtx_unlock(thread);
553
554 thread_policy_update_complete_unlocked(thread, &pend_token);
555
556 return result;
557 }
558
559 /*
560 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
561 * Both result in FIXED mode scheduling.
562 */
563 static sched_mode_t
564 convert_policy_to_sched_mode(integer_t policy)
565 {
566 switch (policy) {
567 case POLICY_TIMESHARE:
568 return TH_MODE_TIMESHARE;
569 case POLICY_RR:
570 case POLICY_FIFO:
571 return TH_MODE_FIXED;
572 default:
573 panic("unexpected sched policy: %d", policy);
574 return TH_MODE_NONE;
575 }
576 }
577
578 /*
579 * Called either with the thread mutex locked
580 * or from the pthread kext in a 'safe place'.
581 */
582 static kern_return_t
583 thread_set_mode_and_absolute_pri_internal(thread_t thread,
584 sched_mode_t mode,
585 integer_t priority,
586 task_pend_token_t pend_token)
587 {
588 kern_return_t kr = KERN_SUCCESS;
589
590 spl_t s = splsched();
591 thread_lock(thread);
592
593 /* This path isn't allowed to change a thread out of realtime. */
594 if ((thread->sched_mode == TH_MODE_REALTIME) ||
595 (thread->saved_mode == TH_MODE_REALTIME)) {
596 kr = KERN_FAILURE;
597 goto unlock;
598 }
599
600 if (thread->policy_reset) {
601 kr = KERN_SUCCESS;
602 goto unlock;
603 }
604
605 sched_mode_t old_mode = thread->sched_mode;
606
607 /*
608 * Reverse engineer and apply the correct importance value
609 * from the requested absolute priority value.
610 *
611 * TODO: Store the absolute priority value instead
612 */
613
614 if (priority >= thread->max_priority) {
615 priority = thread->max_priority - thread->task_priority;
616 } else if (priority >= MINPRI_KERNEL) {
617 priority -= MINPRI_KERNEL;
618 } else if (priority >= MINPRI_RESERVED) {
619 priority -= MINPRI_RESERVED;
620 } else {
621 priority -= BASEPRI_DEFAULT;
622 }
623
624 priority += thread->task_priority;
625
626 if (priority > thread->max_priority) {
627 priority = thread->max_priority;
628 } else if (priority < MINPRI) {
629 priority = MINPRI;
630 }
631
632 thread->importance = priority - thread->task_priority;
633
634 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
635
636 if (mode != old_mode) {
637 pend_token->tpt_update_thread_sfi = 1;
638 }
639
640 unlock:
641 thread_unlock(thread);
642 splx(s);
643
644 return kr;
645 }
646
647 void
648 thread_freeze_base_pri(thread_t thread)
649 {
650 assert(thread == current_thread());
651
652 spl_t s = splsched();
653 thread_lock(thread);
654
655 assert((thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN) == 0);
656 thread->sched_flags |= TH_SFLAG_BASE_PRI_FROZEN;
657
658 thread_unlock(thread);
659 splx(s);
660 }
661
662 bool
663 thread_unfreeze_base_pri(thread_t thread)
664 {
665 assert(thread == current_thread());
666 integer_t base_pri;
667 ast_t ast = 0;
668
669 spl_t s = splsched();
670 thread_lock(thread);
671
672 assert(thread->sched_flags & TH_SFLAG_BASE_PRI_FROZEN);
673 thread->sched_flags &= ~TH_SFLAG_BASE_PRI_FROZEN;
674
675 base_pri = thread->req_base_pri;
676 if (base_pri != thread->base_pri) {
677 /*
678 * This function returns "true" if the base pri change
679 * is the most likely cause for the preemption.
680 */
681 sched_set_thread_base_priority(thread, base_pri);
682 ast = ast_peek(AST_PREEMPT);
683 }
684
685 thread_unlock(thread);
686 splx(s);
687
688 return ast != 0;
689 }
690
691 uint8_t
692 thread_workq_pri_for_qos(thread_qos_t qos)
693 {
694 assert(qos < THREAD_QOS_LAST);
695 return (uint8_t)thread_qos_policy_params.qos_pri[qos];
696 }
697
698 thread_qos_t
699 thread_workq_qos_for_pri(int priority)
700 {
701 int qos;
702 if (priority > thread_qos_policy_params.qos_pri[THREAD_QOS_USER_INTERACTIVE]) {
703 // indicate that workq should map >UI threads to workq's
704 // internal notation for above-UI work.
705 return THREAD_QOS_UNSPECIFIED;
706 }
707 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
708 // map a given priority up to the next nearest qos band.
709 if (thread_qos_policy_params.qos_pri[qos - 1] < priority) {
710 return qos;
711 }
712 }
713 return THREAD_QOS_MAINTENANCE;
714 }
715
716 /*
717 * private interface for pthread workqueues
718 *
719 * Set scheduling policy & absolute priority for thread
720 * May be called with spinlocks held
721 * Thread mutex lock is not held
722 */
723 void
724 thread_reset_workq_qos(thread_t thread, uint32_t qos)
725 {
726 struct task_pend_token pend_token = {};
727
728 assert(qos < THREAD_QOS_LAST);
729
730 spl_t s = splsched();
731 thread_lock(thread);
732
733 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
734 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
735 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
736 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED, 0,
737 &pend_token);
738
739 assert(pend_token.tpt_update_sockets == 0);
740
741 thread_unlock(thread);
742 splx(s);
743
744 thread_policy_update_complete_unlocked(thread, &pend_token);
745 }
746
747 /*
748 * private interface for pthread workqueues
749 *
750 * Set scheduling policy & absolute priority for thread
751 * May be called with spinlocks held
752 * Thread mutex lock is held
753 */
754 void
755 thread_set_workq_override(thread_t thread, uint32_t qos)
756 {
757 struct task_pend_token pend_token = {};
758
759 assert(qos < THREAD_QOS_LAST);
760
761 spl_t s = splsched();
762 thread_lock(thread);
763
764 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
765 TASK_POLICY_QOS_WORKQ_OVERRIDE, qos, 0, &pend_token);
766
767 assert(pend_token.tpt_update_sockets == 0);
768
769 thread_unlock(thread);
770 splx(s);
771
772 thread_policy_update_complete_unlocked(thread, &pend_token);
773 }
774
775 /*
776 * private interface for pthread workqueues
777 *
778 * Set scheduling policy & absolute priority for thread
779 * May be called with spinlocks held
780 * Thread mutex lock is not held
781 */
782 void
783 thread_set_workq_pri(thread_t thread,
784 thread_qos_t qos,
785 integer_t priority,
786 integer_t policy)
787 {
788 struct task_pend_token pend_token = {};
789 sched_mode_t mode = convert_policy_to_sched_mode(policy);
790
791 assert(qos < THREAD_QOS_LAST);
792 assert(thread->static_param);
793
794 if (!thread->static_param || !thread->active) {
795 return;
796 }
797
798 spl_t s = splsched();
799 thread_lock(thread);
800
801 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
802 TASK_POLICY_QOS_AND_RELPRIO, qos, 0, &pend_token);
803 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
804 TASK_POLICY_QOS_WORKQ_OVERRIDE, THREAD_QOS_UNSPECIFIED,
805 0, &pend_token);
806
807 thread_unlock(thread);
808 splx(s);
809
810 /* Concern: this doesn't hold the mutex... */
811
812 __assert_only kern_return_t kr;
813 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority,
814 &pend_token);
815 assert(kr == KERN_SUCCESS);
816
817 if (pend_token.tpt_update_thread_sfi) {
818 sfi_reevaluate(thread);
819 }
820 }
821
822 /*
823 * thread_set_mode_and_absolute_pri:
824 *
825 * Set scheduling policy & absolute priority for thread, for deprecated
826 * thread_set_policy and thread_policy interfaces.
827 *
828 * Called with nothing locked.
829 */
830 kern_return_t
831 thread_set_mode_and_absolute_pri(thread_t thread,
832 integer_t policy,
833 integer_t priority)
834 {
835 kern_return_t kr = KERN_SUCCESS;
836 struct task_pend_token pend_token = {};
837
838 sched_mode_t mode = convert_policy_to_sched_mode(policy);
839
840 thread_mtx_lock(thread);
841
842 if (!thread->active) {
843 kr = KERN_TERMINATED;
844 goto unlock;
845 }
846
847 if (thread_is_static_param(thread)) {
848 kr = KERN_POLICY_STATIC;
849 goto unlock;
850 }
851
852 /* Setting legacy policies on threads kills the current QoS */
853 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
854 thread_remove_qos_policy_locked(thread, &pend_token);
855 }
856
857 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
858
859 unlock:
860 thread_mtx_unlock(thread);
861
862 thread_policy_update_complete_unlocked(thread, &pend_token);
863
864 return kr;
865 }
866
867 /*
868 * Set the thread's requested mode and recompute priority
869 * Called with thread mutex and thread locked
870 *
871 * TODO: Mitigate potential problems caused by moving thread to end of runq
872 * whenever its priority is recomputed
873 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
874 */
875 static void
876 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
877 {
878 if (thread->policy_reset) {
879 return;
880 }
881
882 boolean_t removed = thread_run_queue_remove(thread);
883
884 /*
885 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
886 * That way there's zero confusion over which the user wants
887 * and which the kernel wants.
888 */
889 if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK) {
890 thread->saved_mode = mode;
891 } else {
892 sched_set_thread_mode(thread, mode);
893 }
894
895 thread_recompute_priority(thread);
896
897 if (removed) {
898 thread_run_queue_reinsert(thread, SCHED_TAILQ);
899 }
900 }
901
902 /* called at splsched with thread lock locked */
903 static void
904 thread_update_qos_cpu_time_locked(thread_t thread)
905 {
906 task_t task = thread->task;
907 uint64_t timer_sum, timer_delta;
908
909 /*
910 * This is only as accurate as the distance between
911 * last context switch (embedded) or last user/kernel boundary transition (desktop)
912 * because user_timer and system_timer are only updated then.
913 *
914 * TODO: Consider running a timer_update operation here to update it first.
915 * Maybe doable with interrupts disabled from current thread.
916 * If the thread is on a different core, may not be easy to get right.
917 *
918 * TODO: There should be a function for this in timer.c
919 */
920
921 timer_sum = timer_grab(&thread->user_timer);
922 timer_sum += timer_grab(&thread->system_timer);
923 timer_delta = timer_sum - thread->vtimer_qos_save;
924
925 thread->vtimer_qos_save = timer_sum;
926
927 uint64_t* task_counter = NULL;
928
929 /* Update the task-level effective and requested qos stats atomically, because we don't have the task lock. */
930 switch (thread->effective_policy.thep_qos) {
931 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_default; break;
932 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_maintenance; break;
933 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_background; break;
934 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_utility; break;
935 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_legacy; break;
936 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_initiated; break;
937 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_eqos_stats.cpu_time_qos_user_interactive; break;
938 default:
939 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
940 }
941
942 OSAddAtomic64(timer_delta, task_counter);
943
944 /* Update the task-level qos stats atomically, because we don't have the task lock. */
945 switch (thread->requested_policy.thrp_qos) {
946 case THREAD_QOS_UNSPECIFIED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_default; break;
947 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_maintenance; break;
948 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_background; break;
949 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_utility; break;
950 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_legacy; break;
951 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_initiated; break;
952 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_rqos_stats.cpu_time_qos_user_interactive; break;
953 default:
954 panic("unknown requested QoS: %d", thread->requested_policy.thrp_qos);
955 }
956
957 OSAddAtomic64(timer_delta, task_counter);
958 }
959
960 /*
961 * called with no thread locks held
962 * may hold task lock
963 */
964 void
965 thread_update_qos_cpu_time(thread_t thread)
966 {
967 thread_mtx_lock(thread);
968
969 spl_t s = splsched();
970 thread_lock(thread);
971
972 thread_update_qos_cpu_time_locked(thread);
973
974 thread_unlock(thread);
975 splx(s);
976
977 thread_mtx_unlock(thread);
978 }
979
980 /*
981 * Calculate base priority from thread attributes, and set it on the thread
982 *
983 * Called with thread_lock and thread mutex held.
984 */
985 extern thread_t vm_pageout_scan_thread;
986 extern boolean_t vps_dynamic_priority_enabled;
987
988 void
989 thread_recompute_priority(
990 thread_t thread)
991 {
992 integer_t priority;
993
994 if (thread->policy_reset) {
995 return;
996 }
997
998 if (thread->sched_mode == TH_MODE_REALTIME) {
999 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
1000 return;
1001 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
1002 int qos = thread->effective_policy.thep_qos;
1003 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
1004 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
1005 int qos_scaled_relprio;
1006
1007 assert(qos >= 0 && qos < THREAD_QOS_LAST);
1008 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1009
1010 priority = thread_qos_policy_params.qos_pri[qos];
1011 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
1012
1013 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
1014 /* Bump priority 46 to 47 when in a frontmost app */
1015 qos_scaled_relprio += 1;
1016 }
1017
1018 /* TODO: factor in renice priority here? */
1019
1020 priority += qos_scaled_relprio;
1021 } else {
1022 if (thread->importance > MAXPRI) {
1023 priority = MAXPRI;
1024 } else if (thread->importance < -MAXPRI) {
1025 priority = -MAXPRI;
1026 } else {
1027 priority = thread->importance;
1028 }
1029
1030 priority += thread->task_priority;
1031 }
1032
1033 priority = MAX(priority, thread->user_promotion_basepri);
1034
1035 /*
1036 * Clamp priority back into the allowed range for this task.
1037 * The initial priority value could be out of this range due to:
1038 * Task clamped to BG or Utility (max-pri is 4, or 20)
1039 * Task is user task (max-pri is 63)
1040 * Task is kernel task (max-pri is 95)
1041 * Note that thread->importance is user-settable to any integer
1042 * via THREAD_PRECEDENCE_POLICY.
1043 */
1044 if (priority > thread->max_priority) {
1045 priority = thread->max_priority;
1046 } else if (priority < MINPRI) {
1047 priority = MINPRI;
1048 }
1049
1050 if (thread->saved_mode == TH_MODE_REALTIME &&
1051 thread->sched_flags & TH_SFLAG_FAILSAFE) {
1052 priority = DEPRESSPRI;
1053 }
1054
1055 if (thread->effective_policy.thep_terminated == TRUE) {
1056 /*
1057 * We temporarily want to override the expected priority to
1058 * ensure that the thread exits in a timely manner.
1059 * Note that this is allowed to exceed thread->max_priority
1060 * so that the thread is no longer clamped to background
1061 * during the final exit phase.
1062 */
1063 if (priority < thread->task_priority) {
1064 priority = thread->task_priority;
1065 }
1066 if (priority < BASEPRI_DEFAULT) {
1067 priority = BASEPRI_DEFAULT;
1068 }
1069 }
1070
1071 #if CONFIG_EMBEDDED
1072 /* No one can have a base priority less than MAXPRI_THROTTLE */
1073 if (priority < MAXPRI_THROTTLE) {
1074 priority = MAXPRI_THROTTLE;
1075 }
1076 #endif /* CONFIG_EMBEDDED */
1077
1078 sched_set_thread_base_priority(thread, priority);
1079 }
1080
1081 /* Called with the task lock held, but not the thread mutex or spinlock */
1082 void
1083 thread_policy_update_tasklocked(
1084 thread_t thread,
1085 integer_t priority,
1086 integer_t max_priority,
1087 task_pend_token_t pend_token)
1088 {
1089 thread_mtx_lock(thread);
1090
1091 if (!thread->active || thread->policy_reset) {
1092 thread_mtx_unlock(thread);
1093 return;
1094 }
1095
1096 spl_t s = splsched();
1097 thread_lock(thread);
1098
1099 __unused
1100 integer_t old_max_priority = thread->max_priority;
1101
1102 thread->task_priority = priority;
1103 thread->max_priority = max_priority;
1104
1105 #if CONFIG_EMBEDDED
1106 /*
1107 * When backgrounding a thread, iOS has the semantic that
1108 * realtime and fixed priority threads should be demoted
1109 * to timeshare background threads.
1110 *
1111 * On OSX, realtime and fixed priority threads don't lose their mode.
1112 *
1113 * TODO: Do this inside the thread policy update routine in order to avoid double
1114 * remove/reinsert for a runnable thread
1115 */
1116 if ((max_priority <= MAXPRI_THROTTLE) && (old_max_priority > MAXPRI_THROTTLE)) {
1117 sched_thread_mode_demote(thread, TH_SFLAG_THROTTLED);
1118 } else if ((max_priority > MAXPRI_THROTTLE) && (old_max_priority <= MAXPRI_THROTTLE)) {
1119 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1120 }
1121 #endif /* CONFIG_EMBEDDED */
1122
1123 thread_policy_update_spinlocked(thread, TRUE, pend_token);
1124
1125 thread_unlock(thread);
1126 splx(s);
1127
1128 thread_mtx_unlock(thread);
1129 }
1130
1131 /*
1132 * Reset thread to default state in preparation for termination
1133 * Called with thread mutex locked
1134 *
1135 * Always called on current thread, so we don't need a run queue remove
1136 */
1137 void
1138 thread_policy_reset(
1139 thread_t thread)
1140 {
1141 spl_t s;
1142
1143 assert(thread == current_thread());
1144
1145 s = splsched();
1146 thread_lock(thread);
1147
1148 if (thread->sched_flags & TH_SFLAG_FAILSAFE) {
1149 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
1150 }
1151
1152 if (thread->sched_flags & TH_SFLAG_THROTTLED) {
1153 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
1154 }
1155
1156 /* At this point, the various demotions should be inactive */
1157 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
1158 assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
1159 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
1160
1161 /* Reset thread back to task-default basepri and mode */
1162 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
1163
1164 sched_set_thread_mode(thread, newmode);
1165
1166 thread->importance = 0;
1167
1168 /* Prevent further changes to thread base priority or mode */
1169 thread->policy_reset = 1;
1170
1171 sched_set_thread_base_priority(thread, thread->task_priority);
1172
1173 thread_unlock(thread);
1174 splx(s);
1175 }
1176
1177 kern_return_t
1178 thread_policy_get(
1179 thread_t thread,
1180 thread_policy_flavor_t flavor,
1181 thread_policy_t policy_info,
1182 mach_msg_type_number_t *count,
1183 boolean_t *get_default)
1184 {
1185 kern_return_t result = KERN_SUCCESS;
1186
1187 if (thread == THREAD_NULL) {
1188 return KERN_INVALID_ARGUMENT;
1189 }
1190
1191 thread_mtx_lock(thread);
1192 if (!thread->active) {
1193 thread_mtx_unlock(thread);
1194
1195 return KERN_TERMINATED;
1196 }
1197
1198 switch (flavor) {
1199 case THREAD_EXTENDED_POLICY:
1200 {
1201 boolean_t timeshare = TRUE;
1202
1203 if (!(*get_default)) {
1204 spl_t s = splsched();
1205 thread_lock(thread);
1206
1207 if ((thread->sched_mode != TH_MODE_REALTIME) &&
1208 (thread->saved_mode != TH_MODE_REALTIME)) {
1209 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK)) {
1210 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
1211 } else {
1212 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
1213 }
1214 } else {
1215 *get_default = TRUE;
1216 }
1217
1218 thread_unlock(thread);
1219 splx(s);
1220 }
1221
1222 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
1223 thread_extended_policy_t info;
1224
1225 info = (thread_extended_policy_t)policy_info;
1226 info->timeshare = timeshare;
1227 }
1228
1229 break;
1230 }
1231
1232 case THREAD_TIME_CONSTRAINT_POLICY:
1233 {
1234 thread_time_constraint_policy_t info;
1235
1236 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
1237 result = KERN_INVALID_ARGUMENT;
1238 break;
1239 }
1240
1241 info = (thread_time_constraint_policy_t)policy_info;
1242
1243 if (!(*get_default)) {
1244 spl_t s = splsched();
1245 thread_lock(thread);
1246
1247 if ((thread->sched_mode == TH_MODE_REALTIME) ||
1248 (thread->saved_mode == TH_MODE_REALTIME)) {
1249 info->period = thread->realtime.period;
1250 info->computation = thread->realtime.computation;
1251 info->constraint = thread->realtime.constraint;
1252 info->preemptible = thread->realtime.preemptible;
1253 } else {
1254 *get_default = TRUE;
1255 }
1256
1257 thread_unlock(thread);
1258 splx(s);
1259 }
1260
1261 if (*get_default) {
1262 info->period = 0;
1263 info->computation = default_timeshare_computation;
1264 info->constraint = default_timeshare_constraint;
1265 info->preemptible = TRUE;
1266 }
1267
1268 break;
1269 }
1270
1271 case THREAD_PRECEDENCE_POLICY:
1272 {
1273 thread_precedence_policy_t info;
1274
1275 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1276 result = KERN_INVALID_ARGUMENT;
1277 break;
1278 }
1279
1280 info = (thread_precedence_policy_t)policy_info;
1281
1282 if (!(*get_default)) {
1283 spl_t s = splsched();
1284 thread_lock(thread);
1285
1286 info->importance = thread->importance;
1287
1288 thread_unlock(thread);
1289 splx(s);
1290 } else {
1291 info->importance = 0;
1292 }
1293
1294 break;
1295 }
1296
1297 case THREAD_AFFINITY_POLICY:
1298 {
1299 thread_affinity_policy_t info;
1300
1301 if (!thread_affinity_is_supported()) {
1302 result = KERN_NOT_SUPPORTED;
1303 break;
1304 }
1305 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1306 result = KERN_INVALID_ARGUMENT;
1307 break;
1308 }
1309
1310 info = (thread_affinity_policy_t)policy_info;
1311
1312 if (!(*get_default)) {
1313 info->affinity_tag = thread_affinity_get(thread);
1314 } else {
1315 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1316 }
1317
1318 break;
1319 }
1320
1321 case THREAD_POLICY_STATE:
1322 {
1323 thread_policy_state_t info;
1324
1325 if (*count < THREAD_POLICY_STATE_COUNT) {
1326 result = KERN_INVALID_ARGUMENT;
1327 break;
1328 }
1329
1330 /* Only root can get this info */
1331 if (current_task()->sec_token.val[0] != 0) {
1332 result = KERN_PROTECTION_FAILURE;
1333 break;
1334 }
1335
1336 info = (thread_policy_state_t)(void*)policy_info;
1337
1338 if (!(*get_default)) {
1339 info->flags = 0;
1340
1341 spl_t s = splsched();
1342 thread_lock(thread);
1343
1344 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1345
1346 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1347 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1348
1349 info->thps_user_promotions = 0;
1350 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1351 info->thps_ipc_overrides = thread->kevent_overrides;
1352
1353 proc_get_thread_policy_bitfield(thread, info);
1354
1355 thread_unlock(thread);
1356 splx(s);
1357 } else {
1358 info->requested = 0;
1359 info->effective = 0;
1360 info->pending = 0;
1361 }
1362
1363 break;
1364 }
1365
1366 case THREAD_LATENCY_QOS_POLICY:
1367 {
1368 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1369 thread_latency_qos_t plqos;
1370
1371 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1372 result = KERN_INVALID_ARGUMENT;
1373 break;
1374 }
1375
1376 if (*get_default) {
1377 plqos = 0;
1378 } else {
1379 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1380 }
1381
1382 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1383 }
1384 break;
1385
1386 case THREAD_THROUGHPUT_QOS_POLICY:
1387 {
1388 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1389 thread_throughput_qos_t ptqos;
1390
1391 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1392 result = KERN_INVALID_ARGUMENT;
1393 break;
1394 }
1395
1396 if (*get_default) {
1397 ptqos = 0;
1398 } else {
1399 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1400 }
1401
1402 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1403 }
1404 break;
1405
1406 case THREAD_QOS_POLICY:
1407 {
1408 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1409
1410 if (*count < THREAD_QOS_POLICY_COUNT) {
1411 result = KERN_INVALID_ARGUMENT;
1412 break;
1413 }
1414
1415 if (!(*get_default)) {
1416 int relprio_value = 0;
1417 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1418 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1419
1420 info->tier_importance = -relprio_value;
1421 } else {
1422 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1423 info->tier_importance = 0;
1424 }
1425
1426 break;
1427 }
1428
1429 default:
1430 result = KERN_INVALID_ARGUMENT;
1431 break;
1432 }
1433
1434 thread_mtx_unlock(thread);
1435
1436 return result;
1437 }
1438
1439 void
1440 thread_policy_create(thread_t thread)
1441 {
1442 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1443 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1444 thread_tid(thread), theffective_0(thread),
1445 theffective_1(thread), thread->base_pri, 0);
1446
1447 /* We pass a pend token but ignore it */
1448 struct task_pend_token pend_token = {};
1449
1450 thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
1451
1452 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1453 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1454 thread_tid(thread), theffective_0(thread),
1455 theffective_1(thread), thread->base_pri, 0);
1456 }
1457
1458 static void
1459 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
1460 {
1461 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1462 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1463 thread_tid(thread), theffective_0(thread),
1464 theffective_1(thread), thread->base_pri, 0);
1465
1466 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1467
1468 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1469 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1470 thread_tid(thread), theffective_0(thread),
1471 theffective_1(thread), thread->base_pri, 0);
1472 }
1473
1474
1475
1476 /*
1477 * One thread state update function TO RULE THEM ALL
1478 *
1479 * This function updates the thread effective policy fields
1480 * and pushes the results to the relevant subsystems.
1481 *
1482 * Returns TRUE if a pended action needs to be run.
1483 *
1484 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1485 */
1486 static void
1487 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
1488 task_pend_token_t pend_token)
1489 {
1490 /*
1491 * Step 1:
1492 * Gather requested policy and effective task state
1493 */
1494
1495 struct thread_requested_policy requested = thread->requested_policy;
1496 struct task_effective_policy task_effective = thread->task->effective_policy;
1497
1498 /*
1499 * Step 2:
1500 * Calculate new effective policies from requested policy, task and thread state
1501 * Rules:
1502 * Don't change requested, it won't take effect
1503 */
1504
1505 struct thread_effective_policy next = {};
1506
1507 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1508
1509 uint32_t next_qos = requested.thrp_qos;
1510
1511 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1512 next_qos = MAX(requested.thrp_qos_override, next_qos);
1513 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1514 next_qos = MAX(requested.thrp_qos_kevent_override, next_qos);
1515 next_qos = MAX(requested.thrp_qos_wlsvc_override, next_qos);
1516 next_qos = MAX(requested.thrp_qos_workq_override, next_qos);
1517 }
1518
1519 next.thep_qos = next_qos;
1520
1521 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1522 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1523 if (next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1524 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1525 } else {
1526 next.thep_qos = task_effective.tep_qos_clamp;
1527 }
1528 }
1529
1530 /*
1531 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1532 * This allows QoS promotions to work properly even after the process is unclamped.
1533 */
1534 next.thep_qos_promote = next.thep_qos;
1535
1536 /* The ceiling only applies to threads that are in the QoS world */
1537 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1538 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1539 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1540 }
1541
1542 /* Apply the sync ipc qos override */
1543 assert(requested.thrp_qos_sync_ipc_override == THREAD_QOS_UNSPECIFIED);
1544
1545 /*
1546 * The QoS relative priority is only applicable when the original programmer's
1547 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1548 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1549 * since otherwise it would be lower than unclamped threads. Similarly, in the
1550 * presence of boosting, the programmer doesn't know what other actors
1551 * are boosting the thread.
1552 */
1553 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1554 (requested.thrp_qos == next.thep_qos) &&
1555 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1556 next.thep_qos_relprio = requested.thrp_qos_relprio;
1557 } else {
1558 next.thep_qos_relprio = 0;
1559 }
1560
1561 /* Calculate DARWIN_BG */
1562 boolean_t wants_darwinbg = FALSE;
1563 boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */
1564
1565 /*
1566 * If DARWIN_BG has been requested at either level, it's engaged.
1567 * darwinbg threads always create bg sockets,
1568 * but only some types of darwinbg change the sockets
1569 * after they're created
1570 */
1571 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg) {
1572 wants_all_sockets_bg = wants_darwinbg = TRUE;
1573 }
1574
1575 if (requested.thrp_pidbind_bg) {
1576 wants_all_sockets_bg = wants_darwinbg = TRUE;
1577 }
1578
1579 if (task_effective.tep_darwinbg) {
1580 wants_darwinbg = TRUE;
1581 }
1582
1583 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1584 next.thep_qos == THREAD_QOS_MAINTENANCE) {
1585 wants_darwinbg = TRUE;
1586 }
1587
1588 /* Calculate side effects of DARWIN_BG */
1589
1590 if (wants_darwinbg) {
1591 next.thep_darwinbg = 1;
1592 }
1593
1594 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg) {
1595 next.thep_new_sockets_bg = 1;
1596 }
1597
1598 /* Don't use task_effective.tep_all_sockets_bg here */
1599 if (wants_all_sockets_bg) {
1600 next.thep_all_sockets_bg = 1;
1601 }
1602
1603 /* darwinbg implies background QOS (or lower) */
1604 if (next.thep_darwinbg &&
1605 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1606 next.thep_qos = THREAD_QOS_BACKGROUND;
1607 next.thep_qos_relprio = 0;
1608 }
1609
1610 /* Calculate IO policy */
1611
1612 int iopol = THROTTLE_LEVEL_TIER0;
1613
1614 /* Factor in the task's IO policy */
1615 if (next.thep_darwinbg) {
1616 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1617 }
1618
1619 iopol = MAX(iopol, task_effective.tep_io_tier);
1620
1621 /* Look up the associated IO tier value for the QoS class */
1622 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1623
1624 iopol = MAX(iopol, requested.thrp_int_iotier);
1625 iopol = MAX(iopol, requested.thrp_ext_iotier);
1626
1627 next.thep_io_tier = iopol;
1628
1629 /*
1630 * If a QoS override is causing IO to go into a lower tier, we also set
1631 * the passive bit so that a thread doesn't end up stuck in its own throttle
1632 * window when the override goes away.
1633 */
1634 boolean_t qos_io_override_active = FALSE;
1635 if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
1636 thread_qos_policy_params.qos_iotier[requested.thrp_qos]) {
1637 qos_io_override_active = TRUE;
1638 }
1639
1640 /* Calculate Passive IO policy */
1641 if (requested.thrp_ext_iopassive ||
1642 requested.thrp_int_iopassive ||
1643 qos_io_override_active ||
1644 task_effective.tep_io_passive) {
1645 next.thep_io_passive = 1;
1646 }
1647
1648 /* Calculate timer QOS */
1649 uint32_t latency_qos = requested.thrp_latency_qos;
1650
1651 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1652 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1653
1654 next.thep_latency_qos = latency_qos;
1655
1656 /* Calculate throughput QOS */
1657 uint32_t through_qos = requested.thrp_through_qos;
1658
1659 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1660 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1661
1662 next.thep_through_qos = through_qos;
1663
1664 if (task_effective.tep_terminated || requested.thrp_terminated) {
1665 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1666 next.thep_terminated = 1;
1667 next.thep_darwinbg = 0;
1668 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1669 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1670 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1671 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1672 }
1673
1674 /*
1675 * Step 3:
1676 * Swap out old policy for new policy
1677 */
1678
1679 struct thread_effective_policy prev = thread->effective_policy;
1680
1681 thread_update_qos_cpu_time_locked(thread);
1682
1683 /* This is the point where the new values become visible to other threads */
1684 thread->effective_policy = next;
1685
1686 /*
1687 * Step 4:
1688 * Pend updates that can't be done while holding the thread lock
1689 */
1690
1691 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg) {
1692 pend_token->tpt_update_sockets = 1;
1693 }
1694
1695 /* TODO: Doesn't this only need to be done if the throttle went up? */
1696 if (prev.thep_io_tier != next.thep_io_tier) {
1697 pend_token->tpt_update_throttle = 1;
1698 }
1699
1700 /*
1701 * Check for the attributes that sfi_thread_classify() consults,
1702 * and trigger SFI re-evaluation.
1703 */
1704 if (prev.thep_qos != next.thep_qos ||
1705 prev.thep_darwinbg != next.thep_darwinbg) {
1706 pend_token->tpt_update_thread_sfi = 1;
1707 }
1708
1709 integer_t old_base_pri = thread->base_pri;
1710
1711 /*
1712 * Step 5:
1713 * Update other subsystems as necessary if something has changed
1714 */
1715
1716 /* Check for the attributes that thread_recompute_priority() consults */
1717 if (prev.thep_qos != next.thep_qos ||
1718 prev.thep_qos_relprio != next.thep_qos_relprio ||
1719 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1720 prev.thep_terminated != next.thep_terminated ||
1721 pend_token->tpt_force_recompute_pri == 1 ||
1722 recompute_priority) {
1723 thread_recompute_priority(thread);
1724 }
1725
1726 /*
1727 * Check if the thread is waiting on a turnstile and needs priority propagation.
1728 */
1729 if (pend_token->tpt_update_turnstile &&
1730 ((old_base_pri == thread->base_pri) ||
1731 !thread_get_waiting_turnstile(thread))) {
1732 /*
1733 * Reset update turnstile pend token since either
1734 * the thread priority did not change or thread is
1735 * not blocked on a turnstile.
1736 */
1737 pend_token->tpt_update_turnstile = 0;
1738 }
1739 }
1740
1741
1742 /*
1743 * Initiate a thread policy state transition on a thread with its TID
1744 * Useful if you cannot guarantee the thread won't get terminated
1745 * Precondition: No locks are held
1746 * Will take task lock - using the non-tid variant is faster
1747 * if you already have a thread ref.
1748 */
1749 void
1750 proc_set_thread_policy_with_tid(task_t task,
1751 uint64_t tid,
1752 int category,
1753 int flavor,
1754 int value)
1755 {
1756 /* takes task lock, returns ref'ed thread or NULL */
1757 thread_t thread = task_findtid(task, tid);
1758
1759 if (thread == THREAD_NULL) {
1760 return;
1761 }
1762
1763 proc_set_thread_policy(thread, category, flavor, value);
1764
1765 thread_deallocate(thread);
1766 }
1767
1768 /*
1769 * Initiate a thread policy transition on a thread
1770 * This path supports networking transitions (i.e. darwinbg transitions)
1771 * Precondition: No locks are held
1772 */
1773 void
1774 proc_set_thread_policy(thread_t thread,
1775 int category,
1776 int flavor,
1777 int value)
1778 {
1779 struct task_pend_token pend_token = {};
1780
1781 thread_mtx_lock(thread);
1782
1783 proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1784
1785 thread_mtx_unlock(thread);
1786
1787 thread_policy_update_complete_unlocked(thread, &pend_token);
1788 }
1789
1790 /*
1791 * Do the things that can't be done while holding a thread mutex.
1792 * These are set up to call back into thread policy to get the latest value,
1793 * so they don't have to be synchronized with the update.
1794 * The only required semantic is 'call this sometime after updating effective policy'
1795 *
1796 * Precondition: Thread mutex is not held
1797 *
1798 * This may be called with the task lock held, but in that case it won't be
1799 * called with tpt_update_sockets set.
1800 */
1801 void
1802 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1803 {
1804 #ifdef MACH_BSD
1805 if (pend_token->tpt_update_sockets) {
1806 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1807 }
1808 #endif /* MACH_BSD */
1809
1810 if (pend_token->tpt_update_throttle) {
1811 rethrottle_thread(thread->uthread);
1812 }
1813
1814 if (pend_token->tpt_update_thread_sfi) {
1815 sfi_reevaluate(thread);
1816 }
1817
1818 if (pend_token->tpt_update_turnstile) {
1819 turnstile_update_thread_priority_chain(thread);
1820 }
1821 }
1822
1823 /*
1824 * Set and update thread policy
1825 * Thread mutex might be held
1826 */
1827 static void
1828 proc_set_thread_policy_locked(thread_t thread,
1829 int category,
1830 int flavor,
1831 int value,
1832 int value2,
1833 task_pend_token_t pend_token)
1834 {
1835 spl_t s = splsched();
1836 thread_lock(thread);
1837
1838 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1839
1840 thread_unlock(thread);
1841 splx(s);
1842 }
1843
1844 /*
1845 * Set and update thread policy
1846 * Thread spinlock is held
1847 */
1848 static void
1849 proc_set_thread_policy_spinlocked(thread_t thread,
1850 int category,
1851 int flavor,
1852 int value,
1853 int value2,
1854 task_pend_token_t pend_token)
1855 {
1856 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1857 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1858 thread_tid(thread), threquested_0(thread),
1859 threquested_1(thread), value, 0);
1860
1861 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1862
1863 thread_policy_update_spinlocked(thread, FALSE, pend_token);
1864
1865 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1866 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1867 thread_tid(thread), threquested_0(thread),
1868 threquested_1(thread), tpending(pend_token), 0);
1869 }
1870
1871 /*
1872 * Set the requested state for a specific flavor to a specific value.
1873 */
1874 static void
1875 thread_set_requested_policy_spinlocked(thread_t thread,
1876 int category,
1877 int flavor,
1878 int value,
1879 int value2,
1880 task_pend_token_t pend_token)
1881 {
1882 int tier, passive;
1883
1884 struct thread_requested_policy requested = thread->requested_policy;
1885
1886 switch (flavor) {
1887 /* Category: EXTERNAL and INTERNAL, thread and task */
1888
1889 case TASK_POLICY_DARWIN_BG:
1890 if (category == TASK_POLICY_EXTERNAL) {
1891 requested.thrp_ext_darwinbg = value;
1892 } else {
1893 requested.thrp_int_darwinbg = value;
1894 }
1895 break;
1896
1897 case TASK_POLICY_IOPOL:
1898 proc_iopol_to_tier(value, &tier, &passive);
1899 if (category == TASK_POLICY_EXTERNAL) {
1900 requested.thrp_ext_iotier = tier;
1901 requested.thrp_ext_iopassive = passive;
1902 } else {
1903 requested.thrp_int_iotier = tier;
1904 requested.thrp_int_iopassive = passive;
1905 }
1906 break;
1907
1908 case TASK_POLICY_IO:
1909 if (category == TASK_POLICY_EXTERNAL) {
1910 requested.thrp_ext_iotier = value;
1911 } else {
1912 requested.thrp_int_iotier = value;
1913 }
1914 break;
1915
1916 case TASK_POLICY_PASSIVE_IO:
1917 if (category == TASK_POLICY_EXTERNAL) {
1918 requested.thrp_ext_iopassive = value;
1919 } else {
1920 requested.thrp_int_iopassive = value;
1921 }
1922 break;
1923
1924 /* Category: ATTRIBUTE, thread only */
1925
1926 case TASK_POLICY_PIDBIND_BG:
1927 assert(category == TASK_POLICY_ATTRIBUTE);
1928 requested.thrp_pidbind_bg = value;
1929 break;
1930
1931 case TASK_POLICY_LATENCY_QOS:
1932 assert(category == TASK_POLICY_ATTRIBUTE);
1933 requested.thrp_latency_qos = value;
1934 break;
1935
1936 case TASK_POLICY_THROUGH_QOS:
1937 assert(category == TASK_POLICY_ATTRIBUTE);
1938 requested.thrp_through_qos = value;
1939 break;
1940
1941 case TASK_POLICY_QOS_OVERRIDE:
1942 assert(category == TASK_POLICY_ATTRIBUTE);
1943 requested.thrp_qos_override = value;
1944 pend_token->tpt_update_turnstile = 1;
1945 break;
1946
1947 case TASK_POLICY_QOS_AND_RELPRIO:
1948 assert(category == TASK_POLICY_ATTRIBUTE);
1949 requested.thrp_qos = value;
1950 requested.thrp_qos_relprio = value2;
1951 pend_token->tpt_update_turnstile = 1;
1952 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1953 break;
1954
1955 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
1956 assert(category == TASK_POLICY_ATTRIBUTE);
1957 requested.thrp_qos_workq_override = value;
1958 pend_token->tpt_update_turnstile = 1;
1959 break;
1960
1961 case TASK_POLICY_QOS_PROMOTE:
1962 assert(category == TASK_POLICY_ATTRIBUTE);
1963 requested.thrp_qos_promote = value;
1964 break;
1965
1966 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
1967 assert(category == TASK_POLICY_ATTRIBUTE);
1968 requested.thrp_qos_kevent_override = value;
1969 pend_token->tpt_update_turnstile = 1;
1970 break;
1971
1972 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
1973 assert(category == TASK_POLICY_ATTRIBUTE);
1974 requested.thrp_qos_wlsvc_override = value;
1975 pend_token->tpt_update_turnstile = 1;
1976 break;
1977
1978 case TASK_POLICY_TERMINATED:
1979 assert(category == TASK_POLICY_ATTRIBUTE);
1980 requested.thrp_terminated = value;
1981 break;
1982
1983 default:
1984 panic("unknown task policy: %d %d %d", category, flavor, value);
1985 break;
1986 }
1987
1988 thread->requested_policy = requested;
1989 }
1990
1991 /*
1992 * Gets what you set. Effective values may be different.
1993 * Precondition: No locks are held
1994 */
1995 int
1996 proc_get_thread_policy(thread_t thread,
1997 int category,
1998 int flavor)
1999 {
2000 int value = 0;
2001 thread_mtx_lock(thread);
2002 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
2003 thread_mtx_unlock(thread);
2004 return value;
2005 }
2006
2007 static int
2008 proc_get_thread_policy_locked(thread_t thread,
2009 int category,
2010 int flavor,
2011 int* value2)
2012 {
2013 int value = 0;
2014
2015 spl_t s = splsched();
2016 thread_lock(thread);
2017
2018 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
2019
2020 thread_unlock(thread);
2021 splx(s);
2022
2023 return value;
2024 }
2025
2026 /*
2027 * Gets what you set. Effective values may be different.
2028 */
2029 static int
2030 thread_get_requested_policy_spinlocked(thread_t thread,
2031 int category,
2032 int flavor,
2033 int* value2)
2034 {
2035 int value = 0;
2036
2037 struct thread_requested_policy requested = thread->requested_policy;
2038
2039 switch (flavor) {
2040 case TASK_POLICY_DARWIN_BG:
2041 if (category == TASK_POLICY_EXTERNAL) {
2042 value = requested.thrp_ext_darwinbg;
2043 } else {
2044 value = requested.thrp_int_darwinbg;
2045 }
2046 break;
2047 case TASK_POLICY_IOPOL:
2048 if (category == TASK_POLICY_EXTERNAL) {
2049 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
2050 requested.thrp_ext_iopassive);
2051 } else {
2052 value = proc_tier_to_iopol(requested.thrp_int_iotier,
2053 requested.thrp_int_iopassive);
2054 }
2055 break;
2056 case TASK_POLICY_IO:
2057 if (category == TASK_POLICY_EXTERNAL) {
2058 value = requested.thrp_ext_iotier;
2059 } else {
2060 value = requested.thrp_int_iotier;
2061 }
2062 break;
2063 case TASK_POLICY_PASSIVE_IO:
2064 if (category == TASK_POLICY_EXTERNAL) {
2065 value = requested.thrp_ext_iopassive;
2066 } else {
2067 value = requested.thrp_int_iopassive;
2068 }
2069 break;
2070 case TASK_POLICY_QOS:
2071 assert(category == TASK_POLICY_ATTRIBUTE);
2072 value = requested.thrp_qos;
2073 break;
2074 case TASK_POLICY_QOS_OVERRIDE:
2075 assert(category == TASK_POLICY_ATTRIBUTE);
2076 value = requested.thrp_qos_override;
2077 break;
2078 case TASK_POLICY_LATENCY_QOS:
2079 assert(category == TASK_POLICY_ATTRIBUTE);
2080 value = requested.thrp_latency_qos;
2081 break;
2082 case TASK_POLICY_THROUGH_QOS:
2083 assert(category == TASK_POLICY_ATTRIBUTE);
2084 value = requested.thrp_through_qos;
2085 break;
2086 case TASK_POLICY_QOS_WORKQ_OVERRIDE:
2087 assert(category == TASK_POLICY_ATTRIBUTE);
2088 value = requested.thrp_qos_workq_override;
2089 break;
2090 case TASK_POLICY_QOS_AND_RELPRIO:
2091 assert(category == TASK_POLICY_ATTRIBUTE);
2092 assert(value2 != NULL);
2093 value = requested.thrp_qos;
2094 *value2 = requested.thrp_qos_relprio;
2095 break;
2096 case TASK_POLICY_QOS_PROMOTE:
2097 assert(category == TASK_POLICY_ATTRIBUTE);
2098 value = requested.thrp_qos_promote;
2099 break;
2100 case TASK_POLICY_QOS_KEVENT_OVERRIDE:
2101 assert(category == TASK_POLICY_ATTRIBUTE);
2102 value = requested.thrp_qos_kevent_override;
2103 break;
2104 case TASK_POLICY_QOS_SERVICER_OVERRIDE:
2105 assert(category == TASK_POLICY_ATTRIBUTE);
2106 value = requested.thrp_qos_wlsvc_override;
2107 break;
2108 case TASK_POLICY_TERMINATED:
2109 assert(category == TASK_POLICY_ATTRIBUTE);
2110 value = requested.thrp_terminated;
2111 break;
2112
2113 default:
2114 panic("unknown policy_flavor %d", flavor);
2115 break;
2116 }
2117
2118 return value;
2119 }
2120
2121 /*
2122 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
2123 *
2124 * NOTE: This accessor does not take the task or thread lock.
2125 * Notifications of state updates need to be externally synchronized with state queries.
2126 * This routine *MUST* remain interrupt safe, as it is potentially invoked
2127 * within the context of a timer interrupt.
2128 *
2129 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
2130 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
2131 * I don't think that cost is worth not having the right answer.
2132 */
2133 int
2134 proc_get_effective_thread_policy(thread_t thread,
2135 int flavor)
2136 {
2137 int value = 0;
2138
2139 switch (flavor) {
2140 case TASK_POLICY_DARWIN_BG:
2141 /*
2142 * This call is used within the timer layer, as well as
2143 * prioritizing requests to the graphics system.
2144 * It also informs SFI and originator-bg-state.
2145 * Returns 1 for background mode, 0 for normal mode
2146 */
2147
2148 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
2149 break;
2150 case TASK_POLICY_IO:
2151 /*
2152 * The I/O system calls here to find out what throttling tier to apply to an operation.
2153 * Returns THROTTLE_LEVEL_* values
2154 */
2155 value = thread->effective_policy.thep_io_tier;
2156 if (thread->iotier_override != THROTTLE_LEVEL_NONE) {
2157 value = MIN(value, thread->iotier_override);
2158 }
2159 break;
2160 case TASK_POLICY_PASSIVE_IO:
2161 /*
2162 * The I/O system calls here to find out whether an operation should be passive.
2163 * (i.e. not cause operations with lower throttle tiers to be throttled)
2164 * Returns 1 for passive mode, 0 for normal mode
2165 *
2166 * If an override is causing IO to go into a lower tier, we also set
2167 * the passive bit so that a thread doesn't end up stuck in its own throttle
2168 * window when the override goes away.
2169 */
2170 value = thread->effective_policy.thep_io_passive ? 1 : 0;
2171 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
2172 thread->iotier_override < thread->effective_policy.thep_io_tier) {
2173 value = 1;
2174 }
2175 break;
2176 case TASK_POLICY_ALL_SOCKETS_BG:
2177 /*
2178 * do_background_socket() calls this to determine whether
2179 * it should change the thread's sockets
2180 * Returns 1 for background mode, 0 for normal mode
2181 * This consults both thread and task so un-DBGing a thread while the task is BG
2182 * doesn't get you out of the network throttle.
2183 */
2184 value = (thread->effective_policy.thep_all_sockets_bg ||
2185 thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
2186 break;
2187 case TASK_POLICY_NEW_SOCKETS_BG:
2188 /*
2189 * socreate() calls this to determine if it should mark a new socket as background
2190 * Returns 1 for background mode, 0 for normal mode
2191 */
2192 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
2193 break;
2194 case TASK_POLICY_LATENCY_QOS:
2195 /*
2196 * timer arming calls into here to find out the timer coalescing level
2197 * Returns a latency QoS tier (0-6)
2198 */
2199 value = thread->effective_policy.thep_latency_qos;
2200 break;
2201 case TASK_POLICY_THROUGH_QOS:
2202 /*
2203 * This value is passed into the urgency callout from the scheduler
2204 * to the performance management subsystem.
2205 *
2206 * Returns a throughput QoS tier (0-6)
2207 */
2208 value = thread->effective_policy.thep_through_qos;
2209 break;
2210 case TASK_POLICY_QOS:
2211 /*
2212 * This is communicated to the performance management layer and SFI.
2213 *
2214 * Returns a QoS policy tier
2215 */
2216 value = thread->effective_policy.thep_qos;
2217 break;
2218 default:
2219 panic("unknown thread policy flavor %d", flavor);
2220 break;
2221 }
2222
2223 return value;
2224 }
2225
2226
2227 /*
2228 * (integer_t) casts limit the number of bits we can fit here
2229 * this interface is deprecated and replaced by the _EXT struct ?
2230 */
2231 static void
2232 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2233 {
2234 uint64_t bits = 0;
2235 struct thread_requested_policy requested = thread->requested_policy;
2236
2237 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2238 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2239 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2240 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2241 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2242 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2243
2244 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2245 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2246
2247 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2248
2249 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2250 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2251
2252 info->requested = (integer_t) bits;
2253 bits = 0;
2254
2255 struct thread_effective_policy effective = thread->effective_policy;
2256
2257 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2258
2259 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2260 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2261 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2262 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2263
2264 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2265
2266 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2267 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2268
2269 info->effective = (integer_t)bits;
2270 bits = 0;
2271
2272 info->pending = 0;
2273 }
2274
2275 /*
2276 * Sneakily trace either the task and thread requested
2277 * or just the thread requested, depending on if we have enough room.
2278 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2279 *
2280 * LP32 LP64
2281 * threquested_0(thread) thread[0] task[0]
2282 * threquested_1(thread) thread[1] thread[0]
2283 *
2284 */
2285
2286 uintptr_t
2287 threquested_0(thread_t thread)
2288 {
2289 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2290
2291 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2292
2293 return raw[0];
2294 }
2295
2296 uintptr_t
2297 threquested_1(thread_t thread)
2298 {
2299 #if defined __LP64__
2300 return *(uintptr_t*)&thread->task->requested_policy;
2301 #else
2302 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2303 return raw[1];
2304 #endif
2305 }
2306
2307 uintptr_t
2308 theffective_0(thread_t thread)
2309 {
2310 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2311
2312 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2313 return raw[0];
2314 }
2315
2316 uintptr_t
2317 theffective_1(thread_t thread)
2318 {
2319 #if defined __LP64__
2320 return *(uintptr_t*)&thread->task->effective_policy;
2321 #else
2322 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2323 return raw[1];
2324 #endif
2325 }
2326
2327
2328 /*
2329 * Set an override on the thread which is consulted with a
2330 * higher priority than the task/thread policy. This should
2331 * only be set for temporary grants until the thread
2332 * returns to the userspace boundary
2333 *
2334 * We use atomic operations to swap in the override, with
2335 * the assumption that the thread itself can
2336 * read the override and clear it on return to userspace.
2337 *
2338 * No locking is performed, since it is acceptable to see
2339 * a stale override for one loop through throttle_lowpri_io().
2340 * However a thread reference must be held on the thread.
2341 */
2342
2343 void
2344 set_thread_iotier_override(thread_t thread, int policy)
2345 {
2346 int current_override;
2347
2348 /* Let most aggressive I/O policy win until user boundary */
2349 do {
2350 current_override = thread->iotier_override;
2351
2352 if (current_override != THROTTLE_LEVEL_NONE) {
2353 policy = MIN(current_override, policy);
2354 }
2355
2356 if (current_override == policy) {
2357 /* no effective change */
2358 return;
2359 }
2360 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2361
2362 /*
2363 * Since the thread may be currently throttled,
2364 * re-evaluate tiers and potentially break out
2365 * of an msleep
2366 */
2367 rethrottle_thread(thread->uthread);
2368 }
2369
2370 /*
2371 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2372 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2373 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2374 * priority thread. In these cases, we attempt to propagate the priority token, as long
2375 * as the subsystem informs us of the relationships between the threads. The userspace
2376 * synchronization subsystem should maintain the information of owner->resource and
2377 * resource->waiters itself.
2378 */
2379
2380 /*
2381 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2382 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2383 * to be handled specially in the future, but for now it's fine to slam
2384 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2385 */
2386 static void
2387 canonicalize_resource_and_type(user_addr_t *resource, int *resource_type)
2388 {
2389 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2390 /* Map all input resource/type to a single one */
2391 *resource = USER_ADDR_NULL;
2392 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2393 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2394 /* no transform */
2395 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2396 /* Map all mutex overrides to a single one, to avoid memory overhead */
2397 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2398 *resource = USER_ADDR_NULL;
2399 }
2400 }
2401 }
2402
2403 /* This helper routine finds an existing override if known. Locking should be done by caller */
2404 static struct thread_qos_override *
2405 find_qos_override(thread_t thread,
2406 user_addr_t resource,
2407 int resource_type)
2408 {
2409 struct thread_qos_override *override;
2410
2411 override = thread->overrides;
2412 while (override) {
2413 if (override->override_resource == resource &&
2414 override->override_resource_type == resource_type) {
2415 return override;
2416 }
2417
2418 override = override->override_next;
2419 }
2420
2421 return NULL;
2422 }
2423
2424 static void
2425 find_and_decrement_qos_override(thread_t thread,
2426 user_addr_t resource,
2427 int resource_type,
2428 boolean_t reset,
2429 struct thread_qos_override **free_override_list)
2430 {
2431 struct thread_qos_override *override, *override_prev;
2432
2433 override_prev = NULL;
2434 override = thread->overrides;
2435 while (override) {
2436 struct thread_qos_override *override_next = override->override_next;
2437
2438 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2439 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2440 if (reset) {
2441 override->override_contended_resource_count = 0;
2442 } else {
2443 override->override_contended_resource_count--;
2444 }
2445
2446 if (override->override_contended_resource_count == 0) {
2447 if (override_prev == NULL) {
2448 thread->overrides = override_next;
2449 } else {
2450 override_prev->override_next = override_next;
2451 }
2452
2453 /* Add to out-param for later zfree */
2454 override->override_next = *free_override_list;
2455 *free_override_list = override;
2456 } else {
2457 override_prev = override;
2458 }
2459
2460 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2461 return;
2462 }
2463 } else {
2464 override_prev = override;
2465 }
2466
2467 override = override_next;
2468 }
2469 }
2470
2471 /* This helper recalculates the current requested override using the policy selected at boot */
2472 static int
2473 calculate_requested_qos_override(thread_t thread)
2474 {
2475 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2476 return THREAD_QOS_UNSPECIFIED;
2477 }
2478
2479 /* iterate over all overrides and calculate MAX */
2480 struct thread_qos_override *override;
2481 int qos_override = THREAD_QOS_UNSPECIFIED;
2482
2483 override = thread->overrides;
2484 while (override) {
2485 qos_override = MAX(qos_override, override->override_qos);
2486 override = override->override_next;
2487 }
2488
2489 return qos_override;
2490 }
2491
2492 /*
2493 * Returns:
2494 * - 0 on success
2495 * - EINVAL if some invalid input was passed
2496 */
2497 static int
2498 proc_thread_qos_add_override_internal(thread_t thread,
2499 int override_qos,
2500 boolean_t first_override_for_resource,
2501 user_addr_t resource,
2502 int resource_type)
2503 {
2504 struct task_pend_token pend_token = {};
2505 int rc = 0;
2506
2507 thread_mtx_lock(thread);
2508
2509 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2510 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2511
2512 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2513 uint64_t, thread->requested_policy.thrp_qos,
2514 uint64_t, thread->effective_policy.thep_qos,
2515 int, override_qos, boolean_t, first_override_for_resource);
2516
2517 struct thread_qos_override *override;
2518 struct thread_qos_override *override_new = NULL;
2519 int new_qos_override, prev_qos_override;
2520 int new_effective_qos;
2521
2522 canonicalize_resource_and_type(&resource, &resource_type);
2523
2524 override = find_qos_override(thread, resource, resource_type);
2525 if (first_override_for_resource && !override) {
2526 /* We need to allocate a new object. Drop the thread lock and
2527 * recheck afterwards in case someone else added the override
2528 */
2529 thread_mtx_unlock(thread);
2530 override_new = zalloc(thread_qos_override_zone);
2531 thread_mtx_lock(thread);
2532 override = find_qos_override(thread, resource, resource_type);
2533 }
2534 if (first_override_for_resource && override) {
2535 /* Someone else already allocated while the thread lock was dropped */
2536 override->override_contended_resource_count++;
2537 } else if (!override && override_new) {
2538 override = override_new;
2539 override_new = NULL;
2540 override->override_next = thread->overrides;
2541 /* since first_override_for_resource was TRUE */
2542 override->override_contended_resource_count = 1;
2543 override->override_resource = resource;
2544 override->override_resource_type = resource_type;
2545 override->override_qos = THREAD_QOS_UNSPECIFIED;
2546 thread->overrides = override;
2547 }
2548
2549 if (override) {
2550 if (override->override_qos == THREAD_QOS_UNSPECIFIED) {
2551 override->override_qos = override_qos;
2552 } else {
2553 override->override_qos = MAX(override->override_qos, override_qos);
2554 }
2555 }
2556
2557 /* Determine how to combine the various overrides into a single current
2558 * requested override
2559 */
2560 new_qos_override = calculate_requested_qos_override(thread);
2561
2562 prev_qos_override = proc_get_thread_policy_locked(thread,
2563 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2564
2565 if (new_qos_override != prev_qos_override) {
2566 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2567 TASK_POLICY_QOS_OVERRIDE,
2568 new_qos_override, 0, &pend_token);
2569 }
2570
2571 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2572
2573 thread_mtx_unlock(thread);
2574
2575 thread_policy_update_complete_unlocked(thread, &pend_token);
2576
2577 if (override_new) {
2578 zfree(thread_qos_override_zone, override_new);
2579 }
2580
2581 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2582 int, new_qos_override, int, new_effective_qos, int, rc);
2583
2584 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2585 new_qos_override, resource, resource_type, 0, 0);
2586
2587 return rc;
2588 }
2589
2590 int
2591 proc_thread_qos_add_override(task_t task,
2592 thread_t thread,
2593 uint64_t tid,
2594 int override_qos,
2595 boolean_t first_override_for_resource,
2596 user_addr_t resource,
2597 int resource_type)
2598 {
2599 boolean_t has_thread_reference = FALSE;
2600 int rc = 0;
2601
2602 if (thread == THREAD_NULL) {
2603 thread = task_findtid(task, tid);
2604 /* returns referenced thread */
2605
2606 if (thread == THREAD_NULL) {
2607 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2608 tid, 0, 0xdead, 0, 0);
2609 return ESRCH;
2610 }
2611 has_thread_reference = TRUE;
2612 } else {
2613 assert(thread->task == task);
2614 }
2615 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2616 first_override_for_resource, resource, resource_type);
2617 if (has_thread_reference) {
2618 thread_deallocate(thread);
2619 }
2620
2621 return rc;
2622 }
2623
2624 static void
2625 proc_thread_qos_remove_override_internal(thread_t thread,
2626 user_addr_t resource,
2627 int resource_type,
2628 boolean_t reset)
2629 {
2630 struct task_pend_token pend_token = {};
2631
2632 struct thread_qos_override *deferred_free_override_list = NULL;
2633 int new_qos_override, prev_qos_override, new_effective_qos;
2634
2635 thread_mtx_lock(thread);
2636
2637 canonicalize_resource_and_type(&resource, &resource_type);
2638
2639 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2640
2641 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2642 thread_tid(thread), resource, reset, 0, 0);
2643
2644 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2645 uint64_t, thread->requested_policy.thrp_qos,
2646 uint64_t, thread->effective_policy.thep_qos);
2647
2648 /* Determine how to combine the various overrides into a single current requested override */
2649 new_qos_override = calculate_requested_qos_override(thread);
2650
2651 spl_t s = splsched();
2652 thread_lock(thread);
2653
2654 /*
2655 * The override chain and therefore the value of the current override is locked with thread mutex,
2656 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2657 * This means you can't change the current override from a spinlock-only setter.
2658 */
2659 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2660
2661 if (new_qos_override != prev_qos_override) {
2662 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2663 }
2664
2665 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2666
2667 thread_unlock(thread);
2668 splx(s);
2669
2670 thread_mtx_unlock(thread);
2671
2672 thread_policy_update_complete_unlocked(thread, &pend_token);
2673
2674 while (deferred_free_override_list) {
2675 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2676
2677 zfree(thread_qos_override_zone, deferred_free_override_list);
2678 deferred_free_override_list = override_next;
2679 }
2680
2681 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2682 int, new_qos_override, int, new_effective_qos);
2683
2684 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2685 thread_tid(thread), 0, 0, 0, 0);
2686 }
2687
2688 int
2689 proc_thread_qos_remove_override(task_t task,
2690 thread_t thread,
2691 uint64_t tid,
2692 user_addr_t resource,
2693 int resource_type)
2694 {
2695 boolean_t has_thread_reference = FALSE;
2696
2697 if (thread == THREAD_NULL) {
2698 thread = task_findtid(task, tid);
2699 /* returns referenced thread */
2700
2701 if (thread == THREAD_NULL) {
2702 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2703 tid, 0, 0xdead, 0, 0);
2704 return ESRCH;
2705 }
2706 has_thread_reference = TRUE;
2707 } else {
2708 assert(task == thread->task);
2709 }
2710
2711 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE);
2712
2713 if (has_thread_reference) {
2714 thread_deallocate(thread);
2715 }
2716
2717 return 0;
2718 }
2719
2720 /* Deallocate before thread termination */
2721 void
2722 proc_thread_qos_deallocate(thread_t thread)
2723 {
2724 /* This thread must have no more IPC overrides. */
2725 assert(thread->kevent_overrides == 0);
2726 assert(thread->requested_policy.thrp_qos_kevent_override == THREAD_QOS_UNSPECIFIED);
2727 assert(thread->requested_policy.thrp_qos_wlsvc_override == THREAD_QOS_UNSPECIFIED);
2728
2729 /*
2730 * Clear out any lingering override objects.
2731 */
2732 struct thread_qos_override *override;
2733
2734 thread_mtx_lock(thread);
2735 override = thread->overrides;
2736 thread->overrides = NULL;
2737 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2738 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2739 thread_mtx_unlock(thread);
2740
2741 while (override) {
2742 struct thread_qos_override *override_next = override->override_next;
2743
2744 zfree(thread_qos_override_zone, override);
2745 override = override_next;
2746 }
2747 }
2748
2749 /*
2750 * Set up the primordial thread's QoS
2751 */
2752 void
2753 task_set_main_thread_qos(task_t task, thread_t thread)
2754 {
2755 struct task_pend_token pend_token = {};
2756
2757 assert(thread->task == task);
2758
2759 thread_mtx_lock(thread);
2760
2761 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2762 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2763 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2764 thread->requested_policy.thrp_qos, 0);
2765
2766 int primordial_qos = task_compute_main_thread_qos(task);
2767
2768 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
2769 primordial_qos, 0, &pend_token);
2770
2771 thread_mtx_unlock(thread);
2772
2773 thread_policy_update_complete_unlocked(thread, &pend_token);
2774
2775 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2776 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2777 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2778 primordial_qos, 0);
2779 }
2780
2781 /*
2782 * KPI for pthread kext
2783 *
2784 * Return a good guess at what the initial manager QoS will be
2785 * Dispatch can override this in userspace if it so chooses
2786 */
2787 int
2788 task_get_default_manager_qos(task_t task)
2789 {
2790 int primordial_qos = task_compute_main_thread_qos(task);
2791
2792 if (primordial_qos == THREAD_QOS_LEGACY) {
2793 primordial_qos = THREAD_QOS_USER_INITIATED;
2794 }
2795
2796 return primordial_qos;
2797 }
2798
2799 /*
2800 * Check if the kernel promotion on thread has changed
2801 * and apply it.
2802 *
2803 * thread locked on entry and exit
2804 */
2805 boolean_t
2806 thread_recompute_kernel_promotion_locked(thread_t thread)
2807 {
2808 boolean_t needs_update = FALSE;
2809 int kern_promotion_schedpri = thread_get_inheritor_turnstile_sched_priority(thread);
2810
2811 /*
2812 * For now just assert that kern_promotion_schedpri <= MAXPRI_PROMOTE.
2813 * TURNSTILE_KERNEL_PROMOTE adds threads on the waitq already capped to MAXPRI_PROMOTE
2814 * and propagates the priority through the chain with the same cap, because as of now it does
2815 * not differenciate on the kernel primitive.
2816 *
2817 * If this assumption will change with the adoption of a kernel primitive that does not
2818 * cap the when adding/propagating,
2819 * then here is the place to put the generic cap for all kernel primitives
2820 * (converts the assert to kern_promotion_schedpri = MIN(priority, MAXPRI_PROMOTE))
2821 */
2822 assert(kern_promotion_schedpri <= MAXPRI_PROMOTE);
2823
2824 if (kern_promotion_schedpri != thread->kern_promotion_schedpri) {
2825 KDBG(MACHDBG_CODE(
2826 DBG_MACH_SCHED, MACH_TURNSTILE_KERNEL_CHANGE) | DBG_FUNC_NONE,
2827 thread_tid(thread),
2828 kern_promotion_schedpri,
2829 thread->kern_promotion_schedpri);
2830
2831 needs_update = TRUE;
2832 thread->kern_promotion_schedpri = kern_promotion_schedpri;
2833 thread_recompute_sched_pri(thread, SETPRI_DEFAULT);
2834 }
2835
2836 return needs_update;
2837 }
2838
2839 /*
2840 * Check if the user promotion on thread has changed
2841 * and apply it.
2842 *
2843 * thread locked on entry, might drop the thread lock
2844 * and reacquire it.
2845 */
2846 boolean_t
2847 thread_recompute_user_promotion_locked(thread_t thread)
2848 {
2849 boolean_t needs_update = FALSE;
2850 struct task_pend_token pend_token = {};
2851 int user_promotion_basepri = MIN(thread_get_inheritor_turnstile_base_priority(thread), MAXPRI_USER);
2852 int old_base_pri = thread->base_pri;
2853 thread_qos_t qos_promotion;
2854
2855 /* Check if user promotion has changed */
2856 if (thread->user_promotion_basepri == user_promotion_basepri) {
2857 return needs_update;
2858 } else {
2859 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2860 (TURNSTILE_CODE(TURNSTILE_PRIORITY_OPERATIONS, (THREAD_USER_PROMOTION_CHANGE))) | DBG_FUNC_NONE,
2861 thread_tid(thread),
2862 user_promotion_basepri,
2863 thread->user_promotion_basepri,
2864 0, 0);
2865 KDBG(MACHDBG_CODE(
2866 DBG_MACH_SCHED, MACH_TURNSTILE_USER_CHANGE) | DBG_FUNC_NONE,
2867 thread_tid(thread),
2868 user_promotion_basepri,
2869 thread->user_promotion_basepri);
2870 }
2871
2872 /* Update the user promotion base pri */
2873 thread->user_promotion_basepri = user_promotion_basepri;
2874 pend_token.tpt_force_recompute_pri = 1;
2875
2876 if (user_promotion_basepri <= MAXPRI_THROTTLE) {
2877 qos_promotion = THREAD_QOS_UNSPECIFIED;
2878 } else {
2879 qos_promotion = thread_user_promotion_qos_for_pri(user_promotion_basepri);
2880 }
2881
2882 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2883 TASK_POLICY_QOS_PROMOTE, qos_promotion, 0, &pend_token);
2884
2885 if (thread_get_waiting_turnstile(thread) &&
2886 thread->base_pri != old_base_pri) {
2887 needs_update = TRUE;
2888 }
2889
2890 thread_unlock(thread);
2891
2892 thread_policy_update_complete_unlocked(thread, &pend_token);
2893
2894 thread_lock(thread);
2895
2896 return needs_update;
2897 }
2898
2899 /*
2900 * Convert the thread user promotion base pri to qos for threads in qos world.
2901 * For priority above UI qos, the qos would be set to UI.
2902 */
2903 thread_qos_t
2904 thread_user_promotion_qos_for_pri(int priority)
2905 {
2906 int qos;
2907 for (qos = THREAD_QOS_USER_INTERACTIVE; qos > THREAD_QOS_MAINTENANCE; qos--) {
2908 if (thread_qos_policy_params.qos_pri[qos] <= priority) {
2909 return qos;
2910 }
2911 }
2912 return THREAD_QOS_MAINTENANCE;
2913 }
2914
2915 /*
2916 * Set the thread's QoS Kevent override
2917 * Owned by the Kevent subsystem
2918 *
2919 * May be called with spinlocks held, but not spinlocks
2920 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2921 *
2922 * One 'add' must be balanced by one 'drop'.
2923 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2924 * Before the thread is deallocated, there must be 0 remaining overrides.
2925 */
2926 static void
2927 thread_kevent_override(thread_t thread,
2928 uint32_t qos_override,
2929 boolean_t is_new_override)
2930 {
2931 struct task_pend_token pend_token = {};
2932 boolean_t needs_update;
2933
2934 spl_t s = splsched();
2935 thread_lock(thread);
2936
2937 uint32_t old_override = thread->requested_policy.thrp_qos_kevent_override;
2938
2939 assert(qos_override > THREAD_QOS_UNSPECIFIED);
2940 assert(qos_override < THREAD_QOS_LAST);
2941
2942 if (is_new_override) {
2943 if (thread->kevent_overrides++ == 0) {
2944 /* This add is the first override for this thread */
2945 assert(old_override == THREAD_QOS_UNSPECIFIED);
2946 } else {
2947 /* There are already other overrides in effect for this thread */
2948 assert(old_override > THREAD_QOS_UNSPECIFIED);
2949 }
2950 } else {
2951 /* There must be at least one override (the previous add call) in effect */
2952 assert(thread->kevent_overrides > 0);
2953 assert(old_override > THREAD_QOS_UNSPECIFIED);
2954 }
2955
2956 /*
2957 * We can't allow lowering if there are several IPC overrides because
2958 * the caller can't possibly know the whole truth
2959 */
2960 if (thread->kevent_overrides == 1) {
2961 needs_update = qos_override != old_override;
2962 } else {
2963 needs_update = qos_override > old_override;
2964 }
2965
2966 if (needs_update) {
2967 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2968 TASK_POLICY_QOS_KEVENT_OVERRIDE,
2969 qos_override, 0, &pend_token);
2970 assert(pend_token.tpt_update_sockets == 0);
2971 }
2972
2973 thread_unlock(thread);
2974 splx(s);
2975
2976 thread_policy_update_complete_unlocked(thread, &pend_token);
2977 }
2978
2979 void
2980 thread_add_kevent_override(thread_t thread, uint32_t qos_override)
2981 {
2982 thread_kevent_override(thread, qos_override, TRUE);
2983 }
2984
2985 void
2986 thread_update_kevent_override(thread_t thread, uint32_t qos_override)
2987 {
2988 thread_kevent_override(thread, qos_override, FALSE);
2989 }
2990
2991 void
2992 thread_drop_kevent_override(thread_t thread)
2993 {
2994 struct task_pend_token pend_token = {};
2995
2996 spl_t s = splsched();
2997 thread_lock(thread);
2998
2999 assert(thread->kevent_overrides > 0);
3000
3001 if (--thread->kevent_overrides == 0) {
3002 /*
3003 * There are no more overrides for this thread, so we should
3004 * clear out the saturated override value
3005 */
3006
3007 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3008 TASK_POLICY_QOS_KEVENT_OVERRIDE, THREAD_QOS_UNSPECIFIED,
3009 0, &pend_token);
3010 }
3011
3012 thread_unlock(thread);
3013 splx(s);
3014
3015 thread_policy_update_complete_unlocked(thread, &pend_token);
3016 }
3017
3018 /*
3019 * Set the thread's QoS Workloop Servicer override
3020 * Owned by the Kevent subsystem
3021 *
3022 * May be called with spinlocks held, but not spinlocks
3023 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
3024 *
3025 * One 'add' must be balanced by one 'drop'.
3026 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
3027 * Before the thread is deallocated, there must be 0 remaining overrides.
3028 */
3029 static void
3030 thread_servicer_override(thread_t thread,
3031 uint32_t qos_override,
3032 boolean_t is_new_override)
3033 {
3034 struct task_pend_token pend_token = {};
3035
3036 spl_t s = splsched();
3037 thread_lock(thread);
3038
3039 if (is_new_override) {
3040 assert(!thread->requested_policy.thrp_qos_wlsvc_override);
3041 } else {
3042 assert(thread->requested_policy.thrp_qos_wlsvc_override);
3043 }
3044
3045 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
3046 TASK_POLICY_QOS_SERVICER_OVERRIDE,
3047 qos_override, 0, &pend_token);
3048
3049 thread_unlock(thread);
3050 splx(s);
3051
3052 assert(pend_token.tpt_update_sockets == 0);
3053 thread_policy_update_complete_unlocked(thread, &pend_token);
3054 }
3055
3056 void
3057 thread_add_servicer_override(thread_t thread, uint32_t qos_override)
3058 {
3059 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3060 assert(qos_override < THREAD_QOS_LAST);
3061
3062 thread_servicer_override(thread, qos_override, TRUE);
3063 }
3064
3065 void
3066 thread_update_servicer_override(thread_t thread, uint32_t qos_override)
3067 {
3068 assert(qos_override > THREAD_QOS_UNSPECIFIED);
3069 assert(qos_override < THREAD_QOS_LAST);
3070
3071 thread_servicer_override(thread, qos_override, FALSE);
3072 }
3073
3074 void
3075 thread_drop_servicer_override(thread_t thread)
3076 {
3077 thread_servicer_override(thread, THREAD_QOS_UNSPECIFIED, FALSE);
3078 }
3079
3080
3081 /* Get current requested qos / relpri, may be called from spinlock context */
3082 thread_qos_t
3083 thread_get_requested_qos(thread_t thread, int *relpri)
3084 {
3085 int relprio_value = 0;
3086 thread_qos_t qos;
3087
3088 qos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
3089 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
3090 if (relpri) {
3091 *relpri = -relprio_value;
3092 }
3093 return qos;
3094 }
3095
3096 /*
3097 * This function will promote the thread priority
3098 * since exec could block other threads calling
3099 * proc_find on the proc. This boost must be removed
3100 * via call to thread_clear_exec_promotion.
3101 *
3102 * This should be replaced with a generic 'priority inheriting gate' mechanism (24194397)
3103 */
3104 void
3105 thread_set_exec_promotion(thread_t thread)
3106 {
3107 spl_t s = splsched();
3108 thread_lock(thread);
3109
3110 sched_thread_promote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3111
3112 thread_unlock(thread);
3113 splx(s);
3114 }
3115
3116 /*
3117 * This function will clear the exec thread
3118 * promotion set on the thread by thread_set_exec_promotion.
3119 */
3120 void
3121 thread_clear_exec_promotion(thread_t thread)
3122 {
3123 spl_t s = splsched();
3124 thread_lock(thread);
3125
3126 sched_thread_unpromote_reason(thread, TH_SFLAG_EXEC_PROMOTED, 0);
3127
3128 thread_unlock(thread);
3129 splx(s);
3130 }