]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/thread_policy.c
xnu-3789.41.3.tar.gz
[apple/xnu.git] / osfmk / kern / thread_policy.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <mach/mach_types.h>
30 #include <mach/thread_act_server.h>
31
32 #include <kern/kern_types.h>
33 #include <kern/processor.h>
34 #include <kern/thread.h>
35 #include <kern/affinity.h>
36 #include <mach/task_policy.h>
37 #include <kern/sfi.h>
38 #include <kern/policy_internal.h>
39 #include <sys/errno.h>
40 #include <sys/ulock.h>
41
42 #include <mach/machine/sdt.h>
43
44 #ifdef MACH_BSD
45 extern int proc_selfpid(void);
46 extern char * proc_name_address(void *p);
47 extern void rethrottle_thread(void * uthread);
48 #endif /* MACH_BSD */
49
50 #define QOS_EXTRACT(q) ((q) & 0xff)
51
52 uint32_t qos_override_mode;
53 #define QOS_OVERRIDE_MODE_OVERHANG_PEAK 0
54 #define QOS_OVERRIDE_MODE_IGNORE_OVERRIDE 1
55 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE 2
56 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH 3
57 #define QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE 4
58
59 extern zone_t thread_qos_override_zone;
60
61 static boolean_t
62 proc_thread_qos_remove_override_internal(thread_t thread, user_addr_t resource, int resource_type, boolean_t reset, boolean_t squash);
63
64 /*
65 * THREAD_QOS_UNSPECIFIED is assigned the highest tier available, so it does not provide a limit
66 * to threads that don't have a QoS class set.
67 */
68 const qos_policy_params_t thread_qos_policy_params = {
69 /*
70 * This table defines the starting base priority of the thread,
71 * which will be modified by the thread importance and the task max priority
72 * before being applied.
73 */
74 .qos_pri[THREAD_QOS_UNSPECIFIED] = 0, /* not consulted */
75 .qos_pri[THREAD_QOS_USER_INTERACTIVE] = BASEPRI_BACKGROUND, /* i.e. 46 */
76 .qos_pri[THREAD_QOS_USER_INITIATED] = BASEPRI_USER_INITIATED,
77 .qos_pri[THREAD_QOS_LEGACY] = BASEPRI_DEFAULT,
78 .qos_pri[THREAD_QOS_UTILITY] = BASEPRI_UTILITY,
79 .qos_pri[THREAD_QOS_BACKGROUND] = MAXPRI_THROTTLE,
80 .qos_pri[THREAD_QOS_MAINTENANCE] = MAXPRI_THROTTLE,
81
82 /*
83 * This table defines the highest IO priority that a thread marked with this
84 * QoS class can have.
85 */
86 .qos_iotier[THREAD_QOS_UNSPECIFIED] = THROTTLE_LEVEL_TIER0,
87 .qos_iotier[THREAD_QOS_USER_INTERACTIVE] = THROTTLE_LEVEL_TIER0,
88 .qos_iotier[THREAD_QOS_USER_INITIATED] = THROTTLE_LEVEL_TIER0,
89 .qos_iotier[THREAD_QOS_LEGACY] = THROTTLE_LEVEL_TIER0,
90 .qos_iotier[THREAD_QOS_UTILITY] = THROTTLE_LEVEL_TIER1,
91 .qos_iotier[THREAD_QOS_BACKGROUND] = THROTTLE_LEVEL_TIER2, /* possibly overridden by bg_iotier */
92 .qos_iotier[THREAD_QOS_MAINTENANCE] = THROTTLE_LEVEL_TIER3,
93
94 /*
95 * This table defines the highest QoS level that
96 * a thread marked with this QoS class can have.
97 */
98
99 .qos_through_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_UNSPECIFIED),
100 .qos_through_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_0),
101 .qos_through_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
102 .qos_through_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_1),
103 .qos_through_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_2),
104 .qos_through_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
105 .qos_through_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(THROUGHPUT_QOS_TIER_5),
106
107 .qos_latency_qos[THREAD_QOS_UNSPECIFIED] = QOS_EXTRACT(LATENCY_QOS_TIER_UNSPECIFIED),
108 .qos_latency_qos[THREAD_QOS_USER_INTERACTIVE] = QOS_EXTRACT(LATENCY_QOS_TIER_0),
109 .qos_latency_qos[THREAD_QOS_USER_INITIATED] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
110 .qos_latency_qos[THREAD_QOS_LEGACY] = QOS_EXTRACT(LATENCY_QOS_TIER_1),
111 .qos_latency_qos[THREAD_QOS_UTILITY] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
112 .qos_latency_qos[THREAD_QOS_BACKGROUND] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
113 .qos_latency_qos[THREAD_QOS_MAINTENANCE] = QOS_EXTRACT(LATENCY_QOS_TIER_3),
114 };
115
116 static void
117 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode);
118
119 static int
120 thread_qos_scaled_relative_priority(int qos, int qos_relprio);
121
122 static void
123 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info);
124
125 static void
126 proc_set_thread_policy_locked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
127
128 static void
129 proc_set_thread_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2, task_pend_token_t pend_token);
130
131 static void
132 thread_set_requested_policy_spinlocked(thread_t thread, int category, int flavor, int value, int value2);
133
134 static int
135 thread_get_requested_policy_spinlocked(thread_t thread, int category, int flavor, int* value2);
136
137 static int
138 proc_get_thread_policy_locked(thread_t thread, int category, int flavor, int* value2);
139
140 static void
141 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
142
143 static void
144 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token);
145
146 void
147 thread_policy_init(void) {
148 if (PE_parse_boot_argn("qos_override_mode", &qos_override_mode, sizeof(qos_override_mode))) {
149 printf("QOS override mode: 0x%08x\n", qos_override_mode);
150 } else {
151 qos_override_mode = QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE;
152 }
153 }
154
155 boolean_t
156 thread_has_qos_policy(thread_t thread) {
157 return (proc_get_thread_policy(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS) != THREAD_QOS_UNSPECIFIED) ? TRUE : FALSE;
158 }
159
160
161 static void
162 thread_remove_qos_policy_locked(thread_t thread,
163 task_pend_token_t pend_token)
164 {
165
166 __unused int prev_qos = thread->requested_policy.thrp_qos;
167
168 DTRACE_PROC2(qos__remove, thread_t, thread, int, prev_qos);
169
170 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
171 THREAD_QOS_UNSPECIFIED, 0, pend_token);
172 }
173
174 kern_return_t
175 thread_remove_qos_policy(thread_t thread)
176 {
177 struct task_pend_token pend_token = {};
178
179 thread_mtx_lock(thread);
180 if (!thread->active) {
181 thread_mtx_unlock(thread);
182 return KERN_TERMINATED;
183 }
184
185 thread_remove_qos_policy_locked(thread, &pend_token);
186
187 thread_mtx_unlock(thread);
188
189 thread_policy_update_complete_unlocked(thread, &pend_token);
190
191 return KERN_SUCCESS;
192 }
193
194
195 boolean_t
196 thread_is_static_param(thread_t thread)
197 {
198 if (thread->static_param) {
199 DTRACE_PROC1(qos__legacy__denied, thread_t, thread);
200 return TRUE;
201 }
202 return FALSE;
203 }
204
205 /*
206 * Relative priorities can range between 0REL and -15REL. These
207 * map to QoS-specific ranges, to create non-overlapping priority
208 * ranges.
209 */
210 static int
211 thread_qos_scaled_relative_priority(int qos, int qos_relprio)
212 {
213 int next_lower_qos;
214
215 /* Fast path, since no validation or scaling is needed */
216 if (qos_relprio == 0) return 0;
217
218 switch (qos) {
219 case THREAD_QOS_USER_INTERACTIVE:
220 next_lower_qos = THREAD_QOS_USER_INITIATED;
221 break;
222 case THREAD_QOS_USER_INITIATED:
223 next_lower_qos = THREAD_QOS_LEGACY;
224 break;
225 case THREAD_QOS_LEGACY:
226 next_lower_qos = THREAD_QOS_UTILITY;
227 break;
228 case THREAD_QOS_UTILITY:
229 next_lower_qos = THREAD_QOS_BACKGROUND;
230 break;
231 case THREAD_QOS_MAINTENANCE:
232 case THREAD_QOS_BACKGROUND:
233 next_lower_qos = 0;
234 break;
235 default:
236 panic("Unrecognized QoS %d", qos);
237 return 0;
238 }
239
240 int prio_range_max = thread_qos_policy_params.qos_pri[qos];
241 int prio_range_min = next_lower_qos ? thread_qos_policy_params.qos_pri[next_lower_qos] : 0;
242
243 /*
244 * We now have the valid range that the scaled relative priority can map to. Note
245 * that the lower bound is exclusive, but the upper bound is inclusive. If the
246 * range is (21,31], 0REL should map to 31 and -15REL should map to 22. We use the
247 * fact that the max relative priority is -15 and use ">>4" to divide by 16 and discard
248 * remainder.
249 */
250 int scaled_relprio = -(((prio_range_max - prio_range_min) * (-qos_relprio)) >> 4);
251
252 return scaled_relprio;
253 }
254
255 /*
256 * flag set by -qos-policy-allow boot-arg to allow
257 * testing thread qos policy from userspace
258 */
259 boolean_t allow_qos_policy_set = FALSE;
260
261 kern_return_t
262 thread_policy_set(
263 thread_t thread,
264 thread_policy_flavor_t flavor,
265 thread_policy_t policy_info,
266 mach_msg_type_number_t count)
267 {
268 thread_qos_policy_data_t req_qos;
269 kern_return_t kr;
270
271 req_qos.qos_tier = THREAD_QOS_UNSPECIFIED;
272
273 if (thread == THREAD_NULL)
274 return (KERN_INVALID_ARGUMENT);
275
276 if (allow_qos_policy_set == FALSE) {
277 if (thread_is_static_param(thread))
278 return (KERN_POLICY_STATIC);
279
280 if (flavor == THREAD_QOS_POLICY)
281 return (KERN_INVALID_ARGUMENT);
282 }
283
284 /* Threads without static_param set reset their QoS when other policies are applied. */
285 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED) {
286 /* Store the existing tier, if we fail this call it is used to reset back. */
287 req_qos.qos_tier = thread->requested_policy.thrp_qos;
288 req_qos.tier_importance = thread->requested_policy.thrp_qos_relprio;
289
290 kr = thread_remove_qos_policy(thread);
291 if (kr != KERN_SUCCESS) {
292 return kr;
293 }
294 }
295
296 kr = thread_policy_set_internal(thread, flavor, policy_info, count);
297
298 /* Return KERN_QOS_REMOVED instead of KERN_SUCCESS if we succeeded. */
299 if (req_qos.qos_tier != THREAD_QOS_UNSPECIFIED) {
300 if (kr != KERN_SUCCESS) {
301 /* Reset back to our original tier as the set failed. */
302 (void)thread_policy_set_internal(thread, THREAD_QOS_POLICY, (thread_policy_t)&req_qos, THREAD_QOS_POLICY_COUNT);
303 }
304 }
305
306 return kr;
307 }
308
309 kern_return_t
310 thread_policy_set_internal(
311 thread_t thread,
312 thread_policy_flavor_t flavor,
313 thread_policy_t policy_info,
314 mach_msg_type_number_t count)
315 {
316 kern_return_t result = KERN_SUCCESS;
317 struct task_pend_token pend_token = {};
318
319 thread_mtx_lock(thread);
320 if (!thread->active) {
321 thread_mtx_unlock(thread);
322
323 return (KERN_TERMINATED);
324 }
325
326 switch (flavor) {
327
328 case THREAD_EXTENDED_POLICY:
329 {
330 boolean_t timeshare = TRUE;
331
332 if (count >= THREAD_EXTENDED_POLICY_COUNT) {
333 thread_extended_policy_t info;
334
335 info = (thread_extended_policy_t)policy_info;
336 timeshare = info->timeshare;
337 }
338
339 sched_mode_t mode = (timeshare == TRUE) ? TH_MODE_TIMESHARE : TH_MODE_FIXED;
340
341 spl_t s = splsched();
342 thread_lock(thread);
343
344 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
345
346 thread_unlock(thread);
347 splx(s);
348
349 pend_token.tpt_update_thread_sfi = 1;
350
351 break;
352 }
353
354 case THREAD_TIME_CONSTRAINT_POLICY:
355 {
356 thread_time_constraint_policy_t info;
357
358 if (count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
359 result = KERN_INVALID_ARGUMENT;
360 break;
361 }
362
363 info = (thread_time_constraint_policy_t)policy_info;
364 if (info->constraint < info->computation ||
365 info->computation > max_rt_quantum ||
366 info->computation < min_rt_quantum ) {
367 result = KERN_INVALID_ARGUMENT;
368 break;
369 }
370
371 spl_t s = splsched();
372 thread_lock(thread);
373
374 thread->realtime.period = info->period;
375 thread->realtime.computation = info->computation;
376 thread->realtime.constraint = info->constraint;
377 thread->realtime.preemptible = info->preemptible;
378
379 thread_set_user_sched_mode_and_recompute_pri(thread, TH_MODE_REALTIME);
380
381 thread_unlock(thread);
382 splx(s);
383
384 pend_token.tpt_update_thread_sfi = 1;
385
386 break;
387 }
388
389 case THREAD_PRECEDENCE_POLICY:
390 {
391 thread_precedence_policy_t info;
392
393 if (count < THREAD_PRECEDENCE_POLICY_COUNT) {
394 result = KERN_INVALID_ARGUMENT;
395 break;
396 }
397 info = (thread_precedence_policy_t)policy_info;
398
399 spl_t s = splsched();
400 thread_lock(thread);
401
402 thread->importance = info->importance;
403
404 thread_recompute_priority(thread);
405
406 thread_unlock(thread);
407 splx(s);
408
409 break;
410 }
411
412 case THREAD_AFFINITY_POLICY:
413 {
414 thread_affinity_policy_t info;
415
416 if (!thread_affinity_is_supported()) {
417 result = KERN_NOT_SUPPORTED;
418 break;
419 }
420 if (count < THREAD_AFFINITY_POLICY_COUNT) {
421 result = KERN_INVALID_ARGUMENT;
422 break;
423 }
424
425 info = (thread_affinity_policy_t) policy_info;
426 /*
427 * Unlock the thread mutex here and
428 * return directly after calling thread_affinity_set().
429 * This is necessary for correct lock ordering because
430 * thread_affinity_set() takes the task lock.
431 */
432 thread_mtx_unlock(thread);
433 return thread_affinity_set(thread, info->affinity_tag);
434 }
435
436
437 case THREAD_THROUGHPUT_QOS_POLICY:
438 {
439 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
440 thread_throughput_qos_t tqos;
441
442 if (count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
443 result = KERN_INVALID_ARGUMENT;
444 break;
445 }
446
447 if ((result = qos_throughput_policy_validate(info->thread_throughput_qos_tier)) != KERN_SUCCESS)
448 break;
449
450 tqos = qos_extract(info->thread_throughput_qos_tier);
451
452 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
453 TASK_POLICY_THROUGH_QOS, tqos, 0, &pend_token);
454
455 break;
456 }
457
458 case THREAD_LATENCY_QOS_POLICY:
459 {
460 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
461 thread_latency_qos_t lqos;
462
463 if (count < THREAD_LATENCY_QOS_POLICY_COUNT) {
464 result = KERN_INVALID_ARGUMENT;
465 break;
466 }
467
468 if ((result = qos_latency_policy_validate(info->thread_latency_qos_tier)) != KERN_SUCCESS)
469 break;
470
471 lqos = qos_extract(info->thread_latency_qos_tier);
472
473 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
474 TASK_POLICY_LATENCY_QOS, lqos, 0, &pend_token);
475
476 break;
477 }
478
479 case THREAD_QOS_POLICY:
480 {
481 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
482
483 if (count < THREAD_QOS_POLICY_COUNT) {
484 result = KERN_INVALID_ARGUMENT;
485 break;
486 }
487
488 if (info->qos_tier < 0 || info->qos_tier >= THREAD_QOS_LAST) {
489 result = KERN_INVALID_ARGUMENT;
490 break;
491 }
492
493 if (info->tier_importance > 0 || info->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) {
494 result = KERN_INVALID_ARGUMENT;
495 break;
496 }
497
498 if (info->qos_tier == THREAD_QOS_UNSPECIFIED && info->tier_importance != 0) {
499 result = KERN_INVALID_ARGUMENT;
500 break;
501 }
502
503 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO,
504 info->qos_tier, -info->tier_importance, &pend_token);
505
506 break;
507 }
508
509 default:
510 result = KERN_INVALID_ARGUMENT;
511 break;
512 }
513
514 thread_mtx_unlock(thread);
515
516 thread_policy_update_complete_unlocked(thread, &pend_token);
517
518 return (result);
519 }
520
521 /*
522 * Note that there is no implemented difference between POLICY_RR and POLICY_FIFO.
523 * Both result in FIXED mode scheduling.
524 */
525 static sched_mode_t
526 convert_policy_to_sched_mode(integer_t policy) {
527 switch (policy) {
528 case POLICY_TIMESHARE:
529 return TH_MODE_TIMESHARE;
530 case POLICY_RR:
531 case POLICY_FIFO:
532 return TH_MODE_FIXED;
533 default:
534 panic("unexpected sched policy: %d", policy);
535 return TH_MODE_NONE;
536 }
537 }
538
539 /*
540 * Called either with the thread mutex locked
541 * or from the pthread kext in a 'safe place'.
542 */
543 static kern_return_t
544 thread_set_mode_and_absolute_pri_internal(thread_t thread,
545 sched_mode_t mode,
546 integer_t priority,
547 task_pend_token_t pend_token)
548 {
549 kern_return_t kr = KERN_SUCCESS;
550
551 spl_t s = splsched();
552 thread_lock(thread);
553
554 /* This path isn't allowed to change a thread out of realtime. */
555 if ((thread->sched_mode == TH_MODE_REALTIME) ||
556 (thread->saved_mode == TH_MODE_REALTIME)) {
557 kr = KERN_FAILURE;
558 goto unlock;
559 }
560
561 if (thread->policy_reset) {
562 kr = KERN_SUCCESS;
563 goto unlock;
564 }
565
566 sched_mode_t old_mode = thread->sched_mode;
567
568 /*
569 * Reverse engineer and apply the correct importance value
570 * from the requested absolute priority value.
571 *
572 * TODO: Store the absolute priority value instead
573 */
574
575 if (priority >= thread->max_priority)
576 priority = thread->max_priority - thread->task_priority;
577 else if (priority >= MINPRI_KERNEL)
578 priority -= MINPRI_KERNEL;
579 else if (priority >= MINPRI_RESERVED)
580 priority -= MINPRI_RESERVED;
581 else
582 priority -= BASEPRI_DEFAULT;
583
584 priority += thread->task_priority;
585
586 if (priority > thread->max_priority)
587 priority = thread->max_priority;
588 else if (priority < MINPRI)
589 priority = MINPRI;
590
591 thread->importance = priority - thread->task_priority;
592
593 thread_set_user_sched_mode_and_recompute_pri(thread, mode);
594
595 if (mode != old_mode)
596 pend_token->tpt_update_thread_sfi = 1;
597
598 unlock:
599 thread_unlock(thread);
600 splx(s);
601
602 return kr;
603 }
604
605 /*
606 * KPI for pthread kext
607 *
608 * Set scheduling policy & absolute priority for thread
609 * May be called from waitqueue callout context with spinlocks held
610 * Thread mutex lock is not held
611 */
612 kern_return_t
613 thread_set_workq_pri(thread_t thread,
614 integer_t priority,
615 integer_t policy)
616 {
617 struct task_pend_token pend_token = {};
618 sched_mode_t mode = convert_policy_to_sched_mode(policy);
619
620 assert(thread->static_param);
621 if (!thread->static_param)
622 return KERN_FAILURE;
623
624 /* Concern: this doesn't hold the mutex... */
625 if (!thread->active)
626 return KERN_TERMINATED;
627
628 kern_return_t kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
629
630 if (pend_token.tpt_update_thread_sfi)
631 sfi_reevaluate(thread);
632
633 return kr;
634 }
635
636 /*
637 * thread_set_mode_and_absolute_pri:
638 *
639 * Set scheduling policy & absolute priority for thread, for deprecated
640 * thread_set_policy and thread_policy interfaces.
641 *
642 * Called with nothing locked.
643 */
644 kern_return_t
645 thread_set_mode_and_absolute_pri(thread_t thread,
646 integer_t policy,
647 integer_t priority)
648 {
649 kern_return_t kr = KERN_SUCCESS;
650 struct task_pend_token pend_token = {};
651
652 sched_mode_t mode = convert_policy_to_sched_mode(policy);
653
654 thread_mtx_lock(thread);
655
656 if (!thread->active) {
657 kr = KERN_TERMINATED;
658 goto unlock;
659 }
660
661 if (thread_is_static_param(thread)) {
662 kr = KERN_POLICY_STATIC;
663 goto unlock;
664 }
665
666 /* Setting legacy policies on threads kills the current QoS */
667 if (thread->requested_policy.thrp_qos != THREAD_QOS_UNSPECIFIED)
668 thread_remove_qos_policy_locked(thread, &pend_token);
669
670 kr = thread_set_mode_and_absolute_pri_internal(thread, mode, priority, &pend_token);
671
672 unlock:
673 thread_mtx_unlock(thread);
674
675 thread_policy_update_complete_unlocked(thread, &pend_token);
676
677 return (kr);
678 }
679
680 /*
681 * Set the thread's requested mode and recompute priority
682 * Called with thread mutex and thread locked
683 *
684 * TODO: Mitigate potential problems caused by moving thread to end of runq
685 * whenever its priority is recomputed
686 * Only remove when it actually changes? Attempt to re-insert at appropriate location?
687 */
688 static void
689 thread_set_user_sched_mode_and_recompute_pri(thread_t thread, sched_mode_t mode)
690 {
691 if (thread->policy_reset)
692 return;
693
694 boolean_t removed = thread_run_queue_remove(thread);
695
696 /*
697 * TODO: Instead of having saved mode, have 'user mode' and 'true mode'.
698 * That way there's zero confusion over which the user wants
699 * and which the kernel wants.
700 */
701 if (thread->sched_flags & TH_SFLAG_DEMOTED_MASK)
702 thread->saved_mode = mode;
703 else
704 sched_set_thread_mode(thread, mode);
705
706 thread_recompute_priority(thread);
707
708 if (removed)
709 thread_run_queue_reinsert(thread, SCHED_TAILQ);
710 }
711
712 /* called at splsched with thread lock locked */
713 static void
714 thread_update_qos_cpu_time_locked(thread_t thread)
715 {
716 task_t task = thread->task;
717 uint64_t timer_sum, timer_delta;
718
719 /*
720 * This is only as accurate as the distance between
721 * last context switch (embedded) or last user/kernel boundary transition (desktop)
722 * because user_timer and system_timer are only updated then.
723 *
724 * TODO: Consider running a thread_timer_event operation here to update it first.
725 * Maybe doable with interrupts disabled from current thread.
726 * If the thread is on a different core, may not be easy to get right.
727 *
728 * TODO: There should be a function for this in timer.c
729 */
730
731 timer_sum = timer_grab(&thread->user_timer);
732 timer_sum += timer_grab(&thread->system_timer);
733 timer_delta = timer_sum - thread->vtimer_qos_save;
734
735 thread->vtimer_qos_save = timer_sum;
736
737 uint64_t* task_counter = NULL;
738
739 /* Update the task-level qos stats atomically, because we don't have the task lock. */
740 switch (thread->effective_policy.thep_qos) {
741 case THREAD_QOS_DEFAULT: task_counter = &task->cpu_time_qos_stats.cpu_time_qos_default; break;
742 case THREAD_QOS_MAINTENANCE: task_counter = &task->cpu_time_qos_stats.cpu_time_qos_maintenance; break;
743 case THREAD_QOS_BACKGROUND: task_counter = &task->cpu_time_qos_stats.cpu_time_qos_background; break;
744 case THREAD_QOS_UTILITY: task_counter = &task->cpu_time_qos_stats.cpu_time_qos_utility; break;
745 case THREAD_QOS_LEGACY: task_counter = &task->cpu_time_qos_stats.cpu_time_qos_legacy; break;
746 case THREAD_QOS_USER_INITIATED: task_counter = &task->cpu_time_qos_stats.cpu_time_qos_user_initiated; break;
747 case THREAD_QOS_USER_INTERACTIVE: task_counter = &task->cpu_time_qos_stats.cpu_time_qos_user_interactive; break;
748 default:
749 panic("unknown effective QoS: %d", thread->effective_policy.thep_qos);
750 }
751
752 OSAddAtomic64(timer_delta, task_counter);
753 }
754
755 /*
756 * called with no thread locks held
757 * may hold task lock
758 */
759 void
760 thread_update_qos_cpu_time(thread_t thread)
761 {
762 thread_mtx_lock(thread);
763
764 spl_t s = splsched();
765 thread_lock(thread);
766
767 thread_update_qos_cpu_time_locked(thread);
768
769 thread_unlock(thread);
770 splx(s);
771
772 thread_mtx_unlock(thread);
773 }
774
775 /*
776 * Calculate base priority from thread attributes, and set it on the thread
777 *
778 * Called with thread_lock and thread mutex held.
779 */
780 void
781 thread_recompute_priority(
782 thread_t thread)
783 {
784 integer_t priority;
785
786 if (thread->policy_reset)
787 return;
788
789 if (thread->sched_mode == TH_MODE_REALTIME) {
790 sched_set_thread_base_priority(thread, BASEPRI_RTQUEUES);
791 return;
792 } else if (thread->effective_policy.thep_qos != THREAD_QOS_UNSPECIFIED) {
793 int qos = thread->effective_policy.thep_qos;
794 int qos_ui_is_urgent = thread->effective_policy.thep_qos_ui_is_urgent;
795 int qos_relprio = -(thread->effective_policy.thep_qos_relprio); /* stored in task policy inverted */
796 int qos_scaled_relprio;
797
798 assert(qos >= 0 && qos < THREAD_QOS_LAST);
799 assert(qos_relprio <= 0 && qos_relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
800
801 priority = thread_qos_policy_params.qos_pri[qos];
802 qos_scaled_relprio = thread_qos_scaled_relative_priority(qos, qos_relprio);
803
804 if (qos == THREAD_QOS_USER_INTERACTIVE && qos_ui_is_urgent == 1) {
805 /* Bump priority 46 to 47 when in a frontmost app */
806 qos_scaled_relprio += 1;
807 }
808
809 /* TODO: factor in renice priority here? */
810
811 priority += qos_scaled_relprio;
812 } else {
813 if (thread->importance > MAXPRI)
814 priority = MAXPRI;
815 else if (thread->importance < -MAXPRI)
816 priority = -MAXPRI;
817 else
818 priority = thread->importance;
819
820 priority += thread->task_priority;
821 }
822
823 priority = MAX(priority, thread->user_promotion_basepri);
824
825 /*
826 * Clamp priority back into the allowed range for this task.
827 * The initial priority value could be out of this range due to:
828 * Task clamped to BG or Utility (max-pri is 4, or 20)
829 * Task is user task (max-pri is 63)
830 * Task is kernel task (max-pri is 95)
831 * Note that thread->importance is user-settable to any integer
832 * via THREAD_PRECEDENCE_POLICY.
833 */
834 if (priority > thread->max_priority)
835 priority = thread->max_priority;
836 else if (priority < MINPRI)
837 priority = MINPRI;
838
839 if (thread->saved_mode == TH_MODE_REALTIME &&
840 thread->sched_flags & TH_SFLAG_FAILSAFE)
841 priority = DEPRESSPRI;
842
843 if (thread->effective_policy.thep_terminated == TRUE) {
844 /*
845 * We temporarily want to override the expected priority to
846 * ensure that the thread exits in a timely manner.
847 * Note that this is allowed to exceed thread->max_priority
848 * so that the thread is no longer clamped to background
849 * during the final exit phase.
850 */
851 if (priority < thread->task_priority)
852 priority = thread->task_priority;
853 if (priority < BASEPRI_DEFAULT)
854 priority = BASEPRI_DEFAULT;
855 }
856
857
858 sched_set_thread_base_priority(thread, priority);
859 }
860
861 /* Called with the task lock held, but not the thread mutex or spinlock */
862 void
863 thread_policy_update_tasklocked(
864 thread_t thread,
865 integer_t priority,
866 integer_t max_priority,
867 task_pend_token_t pend_token)
868 {
869 thread_mtx_lock(thread);
870
871 if (!thread->active || thread->policy_reset) {
872 thread_mtx_unlock(thread);
873 return;
874 }
875
876 spl_t s = splsched();
877 thread_lock(thread);
878
879 __unused
880 integer_t old_max_priority = thread->max_priority;
881
882 thread->task_priority = priority;
883 thread->max_priority = max_priority;
884
885
886 thread_policy_update_spinlocked(thread, TRUE, pend_token);
887
888 thread_unlock(thread);
889 splx(s);
890
891 thread_mtx_unlock(thread);
892 }
893
894 /*
895 * Reset thread to default state in preparation for termination
896 * Called with thread mutex locked
897 *
898 * Always called on current thread, so we don't need a run queue remove
899 */
900 void
901 thread_policy_reset(
902 thread_t thread)
903 {
904 spl_t s;
905
906 assert(thread == current_thread());
907
908 s = splsched();
909 thread_lock(thread);
910
911 if (thread->sched_flags & TH_SFLAG_FAILSAFE)
912 sched_thread_mode_undemote(thread, TH_SFLAG_FAILSAFE);
913
914 if (thread->sched_flags & TH_SFLAG_THROTTLED)
915 sched_thread_mode_undemote(thread, TH_SFLAG_THROTTLED);
916
917 /* At this point, the various demotions should be inactive */
918 assert(!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK));
919 assert(!(thread->sched_flags & TH_SFLAG_THROTTLED));
920 assert(!(thread->sched_flags & TH_SFLAG_DEPRESSED_MASK));
921
922 /* Reset thread back to task-default basepri and mode */
923 sched_mode_t newmode = SCHED(initial_thread_sched_mode)(thread->task);
924
925 sched_set_thread_mode(thread, newmode);
926
927 thread->importance = 0;
928
929 /* Prevent further changes to thread base priority or mode */
930 thread->policy_reset = 1;
931
932 sched_set_thread_base_priority(thread, thread->task_priority);
933
934 thread_unlock(thread);
935 splx(s);
936 }
937
938 kern_return_t
939 thread_policy_get(
940 thread_t thread,
941 thread_policy_flavor_t flavor,
942 thread_policy_t policy_info,
943 mach_msg_type_number_t *count,
944 boolean_t *get_default)
945 {
946 kern_return_t result = KERN_SUCCESS;
947
948 if (thread == THREAD_NULL)
949 return (KERN_INVALID_ARGUMENT);
950
951 thread_mtx_lock(thread);
952 if (!thread->active) {
953 thread_mtx_unlock(thread);
954
955 return (KERN_TERMINATED);
956 }
957
958 switch (flavor) {
959
960 case THREAD_EXTENDED_POLICY:
961 {
962 boolean_t timeshare = TRUE;
963
964 if (!(*get_default)) {
965 spl_t s = splsched();
966 thread_lock(thread);
967
968 if ( (thread->sched_mode != TH_MODE_REALTIME) &&
969 (thread->saved_mode != TH_MODE_REALTIME) ) {
970 if (!(thread->sched_flags & TH_SFLAG_DEMOTED_MASK))
971 timeshare = (thread->sched_mode == TH_MODE_TIMESHARE) != 0;
972 else
973 timeshare = (thread->saved_mode == TH_MODE_TIMESHARE) != 0;
974 }
975 else
976 *get_default = TRUE;
977
978 thread_unlock(thread);
979 splx(s);
980 }
981
982 if (*count >= THREAD_EXTENDED_POLICY_COUNT) {
983 thread_extended_policy_t info;
984
985 info = (thread_extended_policy_t)policy_info;
986 info->timeshare = timeshare;
987 }
988
989 break;
990 }
991
992 case THREAD_TIME_CONSTRAINT_POLICY:
993 {
994 thread_time_constraint_policy_t info;
995
996 if (*count < THREAD_TIME_CONSTRAINT_POLICY_COUNT) {
997 result = KERN_INVALID_ARGUMENT;
998 break;
999 }
1000
1001 info = (thread_time_constraint_policy_t)policy_info;
1002
1003 if (!(*get_default)) {
1004 spl_t s = splsched();
1005 thread_lock(thread);
1006
1007 if ( (thread->sched_mode == TH_MODE_REALTIME) ||
1008 (thread->saved_mode == TH_MODE_REALTIME) ) {
1009 info->period = thread->realtime.period;
1010 info->computation = thread->realtime.computation;
1011 info->constraint = thread->realtime.constraint;
1012 info->preemptible = thread->realtime.preemptible;
1013 }
1014 else
1015 *get_default = TRUE;
1016
1017 thread_unlock(thread);
1018 splx(s);
1019 }
1020
1021 if (*get_default) {
1022 info->period = 0;
1023 info->computation = default_timeshare_computation;
1024 info->constraint = default_timeshare_constraint;
1025 info->preemptible = TRUE;
1026 }
1027
1028 break;
1029 }
1030
1031 case THREAD_PRECEDENCE_POLICY:
1032 {
1033 thread_precedence_policy_t info;
1034
1035 if (*count < THREAD_PRECEDENCE_POLICY_COUNT) {
1036 result = KERN_INVALID_ARGUMENT;
1037 break;
1038 }
1039
1040 info = (thread_precedence_policy_t)policy_info;
1041
1042 if (!(*get_default)) {
1043 spl_t s = splsched();
1044 thread_lock(thread);
1045
1046 info->importance = thread->importance;
1047
1048 thread_unlock(thread);
1049 splx(s);
1050 }
1051 else
1052 info->importance = 0;
1053
1054 break;
1055 }
1056
1057 case THREAD_AFFINITY_POLICY:
1058 {
1059 thread_affinity_policy_t info;
1060
1061 if (!thread_affinity_is_supported()) {
1062 result = KERN_NOT_SUPPORTED;
1063 break;
1064 }
1065 if (*count < THREAD_AFFINITY_POLICY_COUNT) {
1066 result = KERN_INVALID_ARGUMENT;
1067 break;
1068 }
1069
1070 info = (thread_affinity_policy_t)policy_info;
1071
1072 if (!(*get_default))
1073 info->affinity_tag = thread_affinity_get(thread);
1074 else
1075 info->affinity_tag = THREAD_AFFINITY_TAG_NULL;
1076
1077 break;
1078 }
1079
1080 case THREAD_POLICY_STATE:
1081 {
1082 thread_policy_state_t info;
1083
1084 if (*count < THREAD_POLICY_STATE_COUNT) {
1085 result = KERN_INVALID_ARGUMENT;
1086 break;
1087 }
1088
1089 /* Only root can get this info */
1090 if (current_task()->sec_token.val[0] != 0) {
1091 result = KERN_PROTECTION_FAILURE;
1092 break;
1093 }
1094
1095 info = (thread_policy_state_t)(void*)policy_info;
1096
1097 if (!(*get_default)) {
1098 info->flags = 0;
1099
1100 spl_t s = splsched();
1101 thread_lock(thread);
1102
1103 info->flags |= (thread->static_param ? THREAD_POLICY_STATE_FLAG_STATIC_PARAM : 0);
1104
1105 info->thps_requested_policy = *(uint64_t*)(void*)(&thread->requested_policy);
1106 info->thps_effective_policy = *(uint64_t*)(void*)(&thread->effective_policy);
1107
1108 info->thps_user_promotions = thread->user_promotions;
1109 info->thps_user_promotion_basepri = thread->user_promotion_basepri;
1110 info->thps_ipc_overrides = thread->ipc_overrides;
1111
1112 proc_get_thread_policy_bitfield(thread, info);
1113
1114 thread_unlock(thread);
1115 splx(s);
1116 } else {
1117 info->requested = 0;
1118 info->effective = 0;
1119 info->pending = 0;
1120 }
1121
1122 break;
1123 }
1124
1125 case THREAD_LATENCY_QOS_POLICY:
1126 {
1127 thread_latency_qos_policy_t info = (thread_latency_qos_policy_t) policy_info;
1128 thread_latency_qos_t plqos;
1129
1130 if (*count < THREAD_LATENCY_QOS_POLICY_COUNT) {
1131 result = KERN_INVALID_ARGUMENT;
1132 break;
1133 }
1134
1135 if (*get_default) {
1136 plqos = 0;
1137 } else {
1138 plqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_LATENCY_QOS, NULL);
1139 }
1140
1141 info->thread_latency_qos_tier = qos_latency_policy_package(plqos);
1142 }
1143 break;
1144
1145 case THREAD_THROUGHPUT_QOS_POLICY:
1146 {
1147 thread_throughput_qos_policy_t info = (thread_throughput_qos_policy_t) policy_info;
1148 thread_throughput_qos_t ptqos;
1149
1150 if (*count < THREAD_THROUGHPUT_QOS_POLICY_COUNT) {
1151 result = KERN_INVALID_ARGUMENT;
1152 break;
1153 }
1154
1155 if (*get_default) {
1156 ptqos = 0;
1157 } else {
1158 ptqos = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_THROUGH_QOS, NULL);
1159 }
1160
1161 info->thread_throughput_qos_tier = qos_throughput_policy_package(ptqos);
1162 }
1163 break;
1164
1165 case THREAD_QOS_POLICY:
1166 {
1167 thread_qos_policy_t info = (thread_qos_policy_t)policy_info;
1168
1169 if (*count < THREAD_QOS_POLICY_COUNT) {
1170 result = KERN_INVALID_ARGUMENT;
1171 break;
1172 }
1173
1174 if (!(*get_default)) {
1175 int relprio_value = 0;
1176 info->qos_tier = proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
1177 TASK_POLICY_QOS_AND_RELPRIO, &relprio_value);
1178
1179 info->tier_importance = -relprio_value;
1180 } else {
1181 info->qos_tier = THREAD_QOS_UNSPECIFIED;
1182 info->tier_importance = 0;
1183 }
1184
1185 break;
1186 }
1187
1188 default:
1189 result = KERN_INVALID_ARGUMENT;
1190 break;
1191 }
1192
1193 thread_mtx_unlock(thread);
1194
1195 return (result);
1196 }
1197
1198 static volatile uint64_t unique_work_interval_id = 1; /* Start at 1, 0 is not a valid work interval ID */
1199
1200 kern_return_t
1201 thread_policy_create_work_interval(
1202 thread_t thread,
1203 uint64_t *work_interval_id)
1204 {
1205 thread_mtx_lock(thread);
1206 if (thread->work_interval_id) {
1207 /* already assigned a work interval ID */
1208 thread_mtx_unlock(thread);
1209 return (KERN_INVALID_VALUE);
1210 }
1211
1212 thread->work_interval_id = OSIncrementAtomic64((volatile int64_t *)&unique_work_interval_id);
1213 *work_interval_id = thread->work_interval_id;
1214
1215 thread_mtx_unlock(thread);
1216 return KERN_SUCCESS;
1217 }
1218
1219 kern_return_t
1220 thread_policy_destroy_work_interval(
1221 thread_t thread,
1222 uint64_t work_interval_id)
1223 {
1224 thread_mtx_lock(thread);
1225 if (work_interval_id == 0 || thread->work_interval_id == 0 || thread->work_interval_id != work_interval_id) {
1226 /* work ID isn't valid or doesn't match previously assigned work interval ID */
1227 thread_mtx_unlock(thread);
1228 return (KERN_INVALID_ARGUMENT);
1229 }
1230
1231 thread->work_interval_id = 0;
1232
1233 thread_mtx_unlock(thread);
1234 return KERN_SUCCESS;
1235 }
1236
1237 void
1238 thread_policy_create(thread_t thread)
1239 {
1240 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1241 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1242 thread_tid(thread), theffective_0(thread),
1243 theffective_1(thread), thread->base_pri, 0);
1244
1245 /* We pass a pend token but ignore it */
1246 struct task_pend_token pend_token = {};
1247
1248 thread_policy_update_internal_spinlocked(thread, TRUE, &pend_token);
1249
1250 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1251 (IMPORTANCE_CODE(IMP_UPDATE, (IMP_UPDATE_TASK_CREATE | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1252 thread_tid(thread), theffective_0(thread),
1253 theffective_1(thread), thread->base_pri, 0);
1254 }
1255
1256 static void
1257 thread_policy_update_spinlocked(thread_t thread, boolean_t recompute_priority, task_pend_token_t pend_token)
1258 {
1259 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1260 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD) | DBG_FUNC_START),
1261 thread_tid(thread), theffective_0(thread),
1262 theffective_1(thread), thread->base_pri, 0);
1263
1264 thread_policy_update_internal_spinlocked(thread, recompute_priority, pend_token);
1265
1266 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1267 (IMPORTANCE_CODE(IMP_UPDATE, TASK_POLICY_THREAD)) | DBG_FUNC_END,
1268 thread_tid(thread), theffective_0(thread),
1269 theffective_1(thread), thread->base_pri, 0);
1270 }
1271
1272
1273
1274 /*
1275 * One thread state update function TO RULE THEM ALL
1276 *
1277 * This function updates the thread effective policy fields
1278 * and pushes the results to the relevant subsystems.
1279 *
1280 * Returns TRUE if a pended action needs to be run.
1281 *
1282 * Called with thread spinlock locked, task may be locked, thread mutex may be locked
1283 */
1284 static void
1285 thread_policy_update_internal_spinlocked(thread_t thread, boolean_t recompute_priority,
1286 task_pend_token_t pend_token)
1287 {
1288 /*
1289 * Step 1:
1290 * Gather requested policy and effective task state
1291 */
1292
1293 struct thread_requested_policy requested = thread->requested_policy;
1294 struct task_effective_policy task_effective = thread->task->effective_policy;
1295
1296 /*
1297 * Step 2:
1298 * Calculate new effective policies from requested policy, task and thread state
1299 * Rules:
1300 * Don't change requested, it won't take effect
1301 */
1302
1303 struct thread_effective_policy next = {};
1304
1305 next.thep_qos_ui_is_urgent = task_effective.tep_qos_ui_is_urgent;
1306
1307 uint32_t next_qos = requested.thrp_qos;
1308
1309 if (requested.thrp_qos != THREAD_QOS_UNSPECIFIED) {
1310 if (requested.thrp_qos_override != THREAD_QOS_UNSPECIFIED)
1311 next_qos = MAX(requested.thrp_qos_override, next_qos);
1312
1313 if (requested.thrp_qos_promote != THREAD_QOS_UNSPECIFIED)
1314 next_qos = MAX(requested.thrp_qos_promote, next_qos);
1315
1316 if (requested.thrp_qos_ipc_override != THREAD_QOS_UNSPECIFIED)
1317 next_qos = MAX(requested.thrp_qos_ipc_override, next_qos);
1318 }
1319
1320 next.thep_qos = next_qos;
1321
1322 /* A task clamp will result in an effective QoS even when requested is UNSPECIFIED */
1323 if (task_effective.tep_qos_clamp != THREAD_QOS_UNSPECIFIED) {
1324 if (next.thep_qos != THREAD_QOS_UNSPECIFIED)
1325 next.thep_qos = MIN(task_effective.tep_qos_clamp, next.thep_qos);
1326 else
1327 next.thep_qos = task_effective.tep_qos_clamp;
1328 }
1329
1330 /*
1331 * Extract outbound-promotion QoS before applying task ceiling or BG clamp
1332 * This allows QoS promotions to work properly even after the process is unclamped.
1333 */
1334 next.thep_qos_promote = next.thep_qos;
1335
1336 /* The ceiling only applies to threads that are in the QoS world */
1337 if (task_effective.tep_qos_ceiling != THREAD_QOS_UNSPECIFIED &&
1338 next.thep_qos != THREAD_QOS_UNSPECIFIED) {
1339 next.thep_qos = MIN(task_effective.tep_qos_ceiling, next.thep_qos);
1340 }
1341
1342 /*
1343 * The QoS relative priority is only applicable when the original programmer's
1344 * intended (requested) QoS is in effect. When the QoS is clamped (e.g.
1345 * USER_INITIATED-13REL clamped to UTILITY), the relative priority is not honored,
1346 * since otherwise it would be lower than unclamped threads. Similarly, in the
1347 * presence of boosting, the programmer doesn't know what other actors
1348 * are boosting the thread.
1349 */
1350 if ((requested.thrp_qos != THREAD_QOS_UNSPECIFIED) &&
1351 (requested.thrp_qos == next.thep_qos) &&
1352 (requested.thrp_qos_override == THREAD_QOS_UNSPECIFIED)) {
1353 next.thep_qos_relprio = requested.thrp_qos_relprio;
1354 } else {
1355 next.thep_qos_relprio = 0;
1356 }
1357
1358 /* Calculate DARWIN_BG */
1359 boolean_t wants_darwinbg = FALSE;
1360 boolean_t wants_all_sockets_bg = FALSE; /* Do I want my existing sockets to be bg */
1361
1362 /*
1363 * If DARWIN_BG has been requested at either level, it's engaged.
1364 * darwinbg threads always create bg sockets,
1365 * but only some types of darwinbg change the sockets
1366 * after they're created
1367 */
1368 if (requested.thrp_int_darwinbg || requested.thrp_ext_darwinbg)
1369 wants_all_sockets_bg = wants_darwinbg = TRUE;
1370
1371 if (requested.thrp_pidbind_bg)
1372 wants_all_sockets_bg = wants_darwinbg = TRUE;
1373
1374 if (task_effective.tep_darwinbg)
1375 wants_darwinbg = TRUE;
1376
1377 if (next.thep_qos == THREAD_QOS_BACKGROUND ||
1378 next.thep_qos == THREAD_QOS_MAINTENANCE)
1379 wants_darwinbg = TRUE;
1380
1381 /* Calculate side effects of DARWIN_BG */
1382
1383 if (wants_darwinbg)
1384 next.thep_darwinbg = 1;
1385
1386 if (next.thep_darwinbg || task_effective.tep_new_sockets_bg)
1387 next.thep_new_sockets_bg = 1;
1388
1389 /* Don't use task_effective.tep_all_sockets_bg here */
1390 if (wants_all_sockets_bg)
1391 next.thep_all_sockets_bg = 1;
1392
1393 /* darwinbg implies background QOS (or lower) */
1394 if (next.thep_darwinbg &&
1395 (next.thep_qos > THREAD_QOS_BACKGROUND || next.thep_qos == THREAD_QOS_UNSPECIFIED)) {
1396 next.thep_qos = THREAD_QOS_BACKGROUND;
1397 next.thep_qos_relprio = 0;
1398 }
1399
1400 /* Calculate IO policy */
1401
1402 int iopol = THROTTLE_LEVEL_TIER0;
1403
1404 /* Factor in the task's IO policy */
1405 if (next.thep_darwinbg)
1406 iopol = MAX(iopol, task_effective.tep_bg_iotier);
1407
1408 iopol = MAX(iopol, task_effective.tep_io_tier);
1409
1410 /* Look up the associated IO tier value for the QoS class */
1411 iopol = MAX(iopol, thread_qos_policy_params.qos_iotier[next.thep_qos]);
1412
1413 iopol = MAX(iopol, requested.thrp_int_iotier);
1414 iopol = MAX(iopol, requested.thrp_ext_iotier);
1415
1416 next.thep_io_tier = iopol;
1417
1418 /*
1419 * If a QoS override is causing IO to go into a lower tier, we also set
1420 * the passive bit so that a thread doesn't end up stuck in its own throttle
1421 * window when the override goes away.
1422 */
1423 boolean_t qos_io_override_active = FALSE;
1424 if (thread_qos_policy_params.qos_iotier[next.thep_qos] <
1425 thread_qos_policy_params.qos_iotier[requested.thrp_qos])
1426 qos_io_override_active = TRUE;
1427
1428 /* Calculate Passive IO policy */
1429 if (requested.thrp_ext_iopassive ||
1430 requested.thrp_int_iopassive ||
1431 qos_io_override_active ||
1432 task_effective.tep_io_passive )
1433 next.thep_io_passive = 1;
1434
1435 /* Calculate timer QOS */
1436 uint32_t latency_qos = requested.thrp_latency_qos;
1437
1438 latency_qos = MAX(latency_qos, task_effective.tep_latency_qos);
1439 latency_qos = MAX(latency_qos, thread_qos_policy_params.qos_latency_qos[next.thep_qos]);
1440
1441 next.thep_latency_qos = latency_qos;
1442
1443 /* Calculate throughput QOS */
1444 uint32_t through_qos = requested.thrp_through_qos;
1445
1446 through_qos = MAX(through_qos, task_effective.tep_through_qos);
1447 through_qos = MAX(through_qos, thread_qos_policy_params.qos_through_qos[next.thep_qos]);
1448
1449 next.thep_through_qos = through_qos;
1450
1451 if (task_effective.tep_terminated || requested.thrp_terminated) {
1452 /* Shoot down the throttles that slow down exit or response to SIGTERM */
1453 next.thep_terminated = 1;
1454 next.thep_darwinbg = 0;
1455 next.thep_io_tier = THROTTLE_LEVEL_TIER0;
1456 next.thep_qos = THREAD_QOS_UNSPECIFIED;
1457 next.thep_latency_qos = LATENCY_QOS_TIER_UNSPECIFIED;
1458 next.thep_through_qos = THROUGHPUT_QOS_TIER_UNSPECIFIED;
1459 }
1460
1461 /*
1462 * Step 3:
1463 * Swap out old policy for new policy
1464 */
1465
1466 struct thread_effective_policy prev = thread->effective_policy;
1467
1468 thread_update_qos_cpu_time_locked(thread);
1469
1470 /* This is the point where the new values become visible to other threads */
1471 thread->effective_policy = next;
1472
1473 /*
1474 * Step 4:
1475 * Pend updates that can't be done while holding the thread lock
1476 */
1477
1478 if (prev.thep_all_sockets_bg != next.thep_all_sockets_bg)
1479 pend_token->tpt_update_sockets = 1;
1480
1481 /* TODO: Doesn't this only need to be done if the throttle went up? */
1482 if (prev.thep_io_tier != next.thep_io_tier)
1483 pend_token->tpt_update_throttle = 1;
1484
1485 /*
1486 * Check for the attributes that sfi_thread_classify() consults,
1487 * and trigger SFI re-evaluation.
1488 */
1489 if (prev.thep_qos != next.thep_qos ||
1490 prev.thep_darwinbg != next.thep_darwinbg )
1491 pend_token->tpt_update_thread_sfi = 1;
1492
1493 /*
1494 * Step 5:
1495 * Update other subsystems as necessary if something has changed
1496 */
1497
1498 /* Check for the attributes that thread_recompute_priority() consults */
1499 if (prev.thep_qos != next.thep_qos ||
1500 prev.thep_qos_relprio != next.thep_qos_relprio ||
1501 prev.thep_qos_ui_is_urgent != next.thep_qos_ui_is_urgent ||
1502 prev.thep_terminated != next.thep_terminated ||
1503 pend_token->tpt_force_recompute_pri == 1 ||
1504 recompute_priority) {
1505 thread_recompute_priority(thread);
1506 }
1507 }
1508
1509
1510 /*
1511 * Initiate a thread policy state transition on a thread with its TID
1512 * Useful if you cannot guarantee the thread won't get terminated
1513 * Precondition: No locks are held
1514 * Will take task lock - using the non-tid variant is faster
1515 * if you already have a thread ref.
1516 */
1517 void
1518 proc_set_thread_policy_with_tid(task_t task,
1519 uint64_t tid,
1520 int category,
1521 int flavor,
1522 int value)
1523 {
1524 /* takes task lock, returns ref'ed thread or NULL */
1525 thread_t thread = task_findtid(task, tid);
1526
1527 if (thread == THREAD_NULL)
1528 return;
1529
1530 proc_set_thread_policy(thread, category, flavor, value);
1531
1532 thread_deallocate(thread);
1533 }
1534
1535 /*
1536 * Initiate a thread policy transition on a thread
1537 * This path supports networking transitions (i.e. darwinbg transitions)
1538 * Precondition: No locks are held
1539 */
1540 void
1541 proc_set_thread_policy(thread_t thread,
1542 int category,
1543 int flavor,
1544 int value)
1545 {
1546 struct task_pend_token pend_token = {};
1547
1548 thread_mtx_lock(thread);
1549
1550 proc_set_thread_policy_locked(thread, category, flavor, value, 0, &pend_token);
1551
1552 thread_mtx_unlock(thread);
1553
1554 thread_policy_update_complete_unlocked(thread, &pend_token);
1555 }
1556
1557 /*
1558 * KPI for pthread kext to call to set thread base QoS values during a workq wakeup
1559 * May be called with interrupts disabled and workqueue/waitqueue/kqueue locks held
1560 *
1561 * Does NOT do update completion, so the thread MUST be in a safe place WRT
1562 * IO throttling and SFI.
1563 *
1564 * TODO: Can I assert 'it must be in a safe place'?
1565 */
1566 kern_return_t
1567 thread_set_workq_qos(thread_t thread,
1568 int qos_tier,
1569 int relprio) /* relprio is -16 to 0 */
1570 {
1571 assert(qos_tier >= 0 && qos_tier <= THREAD_QOS_LAST);
1572 assert(relprio <= 0 && relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE);
1573
1574 if (!(qos_tier >= 0 && qos_tier <= THREAD_QOS_LAST))
1575 return KERN_FAILURE;
1576 if (!(relprio <= 0 && relprio >= THREAD_QOS_MIN_TIER_IMPORTANCE))
1577 return KERN_FAILURE;
1578
1579 if (qos_tier == THREAD_QOS_UNSPECIFIED) {
1580 assert(relprio == 0);
1581 if (relprio != 0)
1582 return KERN_FAILURE;
1583 }
1584
1585 assert(thread->static_param);
1586 if (!thread->static_param) {
1587 return KERN_FAILURE;
1588 }
1589
1590 /* Concern: this doesn't hold the mutex... */
1591 //if (!thread->active)
1592 // return KERN_TERMINATED;
1593
1594 struct task_pend_token pend_token = {};
1595
1596 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_AND_RELPRIO, qos_tier, -relprio, &pend_token);
1597
1598 assert(pend_token.tpt_update_sockets == 0);
1599 /* we don't need to update throttle or sfi because pthread kext promises the thread is in a safe place */
1600 /* TODO: Do we need to update SFI to ensure it gets tagged with the AST? */
1601
1602 return KERN_SUCCESS;
1603 }
1604
1605
1606 /*
1607 * Do the things that can't be done while holding a thread mutex.
1608 * These are set up to call back into thread policy to get the latest value,
1609 * so they don't have to be synchronized with the update.
1610 * The only required semantic is 'call this sometime after updating effective policy'
1611 *
1612 * Precondition: Thread mutex is not held
1613 *
1614 * This may be called with the task lock held, but in that case it won't be
1615 * called with tpt_update_sockets set.
1616 */
1617 void
1618 thread_policy_update_complete_unlocked(thread_t thread, task_pend_token_t pend_token)
1619 {
1620 #ifdef MACH_BSD
1621 if (pend_token->tpt_update_sockets)
1622 proc_apply_task_networkbg(thread->task->bsd_info, thread);
1623 #endif /* MACH_BSD */
1624
1625 if (pend_token->tpt_update_throttle)
1626 rethrottle_thread(thread->uthread);
1627
1628 if (pend_token->tpt_update_thread_sfi)
1629 sfi_reevaluate(thread);
1630 }
1631
1632 /*
1633 * Set and update thread policy
1634 * Thread mutex might be held
1635 */
1636 static void
1637 proc_set_thread_policy_locked(thread_t thread,
1638 int category,
1639 int flavor,
1640 int value,
1641 int value2,
1642 task_pend_token_t pend_token)
1643 {
1644 spl_t s = splsched();
1645 thread_lock(thread);
1646
1647 proc_set_thread_policy_spinlocked(thread, category, flavor, value, value2, pend_token);
1648
1649 thread_unlock(thread);
1650 splx(s);
1651 }
1652
1653 /*
1654 * Set and update thread policy
1655 * Thread spinlock is held
1656 */
1657 static void
1658 proc_set_thread_policy_spinlocked(thread_t thread,
1659 int category,
1660 int flavor,
1661 int value,
1662 int value2,
1663 task_pend_token_t pend_token)
1664 {
1665 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1666 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_START,
1667 thread_tid(thread), threquested_0(thread),
1668 threquested_1(thread), value, 0);
1669
1670 thread_set_requested_policy_spinlocked(thread, category, flavor, value, value2);
1671
1672 thread_policy_update_spinlocked(thread, FALSE, pend_token);
1673
1674 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
1675 (IMPORTANCE_CODE(flavor, (category | TASK_POLICY_THREAD))) | DBG_FUNC_END,
1676 thread_tid(thread), threquested_0(thread),
1677 threquested_1(thread), tpending(pend_token), 0);
1678 }
1679
1680 /*
1681 * Set the requested state for a specific flavor to a specific value.
1682 */
1683 static void
1684 thread_set_requested_policy_spinlocked(thread_t thread,
1685 int category,
1686 int flavor,
1687 int value,
1688 int value2)
1689 {
1690 int tier, passive;
1691
1692 struct thread_requested_policy requested = thread->requested_policy;
1693
1694 switch (flavor) {
1695
1696 /* Category: EXTERNAL and INTERNAL, thread and task */
1697
1698 case TASK_POLICY_DARWIN_BG:
1699 if (category == TASK_POLICY_EXTERNAL)
1700 requested.thrp_ext_darwinbg = value;
1701 else
1702 requested.thrp_int_darwinbg = value;
1703 break;
1704
1705 case TASK_POLICY_IOPOL:
1706 proc_iopol_to_tier(value, &tier, &passive);
1707 if (category == TASK_POLICY_EXTERNAL) {
1708 requested.thrp_ext_iotier = tier;
1709 requested.thrp_ext_iopassive = passive;
1710 } else {
1711 requested.thrp_int_iotier = tier;
1712 requested.thrp_int_iopassive = passive;
1713 }
1714 break;
1715
1716 case TASK_POLICY_IO:
1717 if (category == TASK_POLICY_EXTERNAL)
1718 requested.thrp_ext_iotier = value;
1719 else
1720 requested.thrp_int_iotier = value;
1721 break;
1722
1723 case TASK_POLICY_PASSIVE_IO:
1724 if (category == TASK_POLICY_EXTERNAL)
1725 requested.thrp_ext_iopassive = value;
1726 else
1727 requested.thrp_int_iopassive = value;
1728 break;
1729
1730 /* Category: ATTRIBUTE, thread only */
1731
1732 case TASK_POLICY_PIDBIND_BG:
1733 assert(category == TASK_POLICY_ATTRIBUTE);
1734 requested.thrp_pidbind_bg = value;
1735 break;
1736
1737 case TASK_POLICY_LATENCY_QOS:
1738 assert(category == TASK_POLICY_ATTRIBUTE);
1739 requested.thrp_latency_qos = value;
1740 break;
1741
1742 case TASK_POLICY_THROUGH_QOS:
1743 assert(category == TASK_POLICY_ATTRIBUTE);
1744 requested.thrp_through_qos = value;
1745 break;
1746
1747 case TASK_POLICY_QOS:
1748 assert(category == TASK_POLICY_ATTRIBUTE);
1749 requested.thrp_qos = value;
1750 break;
1751
1752 case TASK_POLICY_QOS_OVERRIDE:
1753 assert(category == TASK_POLICY_ATTRIBUTE);
1754 requested.thrp_qos_override = value;
1755 break;
1756
1757 case TASK_POLICY_QOS_AND_RELPRIO:
1758 assert(category == TASK_POLICY_ATTRIBUTE);
1759 requested.thrp_qos = value;
1760 requested.thrp_qos_relprio = value2;
1761 DTRACE_BOOST3(qos_set, uint64_t, thread->thread_id, int, requested.thrp_qos, int, requested.thrp_qos_relprio);
1762 break;
1763
1764 case TASK_POLICY_QOS_PROMOTE:
1765 assert(category == TASK_POLICY_ATTRIBUTE);
1766 requested.thrp_qos_promote = value;
1767 break;
1768
1769 case TASK_POLICY_QOS_IPC_OVERRIDE:
1770 assert(category == TASK_POLICY_ATTRIBUTE);
1771 requested.thrp_qos_ipc_override = value;
1772 break;
1773
1774 case TASK_POLICY_TERMINATED:
1775 assert(category == TASK_POLICY_ATTRIBUTE);
1776 requested.thrp_terminated = value;
1777 break;
1778
1779 default:
1780 panic("unknown task policy: %d %d %d", category, flavor, value);
1781 break;
1782 }
1783
1784 thread->requested_policy = requested;
1785 }
1786
1787 /*
1788 * Gets what you set. Effective values may be different.
1789 * Precondition: No locks are held
1790 */
1791 int
1792 proc_get_thread_policy(thread_t thread,
1793 int category,
1794 int flavor)
1795 {
1796 int value = 0;
1797 thread_mtx_lock(thread);
1798 value = proc_get_thread_policy_locked(thread, category, flavor, NULL);
1799 thread_mtx_unlock(thread);
1800 return value;
1801 }
1802
1803 static int
1804 proc_get_thread_policy_locked(thread_t thread,
1805 int category,
1806 int flavor,
1807 int* value2)
1808 {
1809 int value = 0;
1810
1811 spl_t s = splsched();
1812 thread_lock(thread);
1813
1814 value = thread_get_requested_policy_spinlocked(thread, category, flavor, value2);
1815
1816 thread_unlock(thread);
1817 splx(s);
1818
1819 return value;
1820 }
1821
1822 /*
1823 * Gets what you set. Effective values may be different.
1824 */
1825 static int
1826 thread_get_requested_policy_spinlocked(thread_t thread,
1827 int category,
1828 int flavor,
1829 int* value2)
1830 {
1831 int value = 0;
1832
1833 struct thread_requested_policy requested = thread->requested_policy;
1834
1835 switch (flavor) {
1836 case TASK_POLICY_DARWIN_BG:
1837 if (category == TASK_POLICY_EXTERNAL)
1838 value = requested.thrp_ext_darwinbg;
1839 else
1840 value = requested.thrp_int_darwinbg;
1841 break;
1842 case TASK_POLICY_IOPOL:
1843 if (category == TASK_POLICY_EXTERNAL)
1844 value = proc_tier_to_iopol(requested.thrp_ext_iotier,
1845 requested.thrp_ext_iopassive);
1846 else
1847 value = proc_tier_to_iopol(requested.thrp_int_iotier,
1848 requested.thrp_int_iopassive);
1849 break;
1850 case TASK_POLICY_IO:
1851 if (category == TASK_POLICY_EXTERNAL)
1852 value = requested.thrp_ext_iotier;
1853 else
1854 value = requested.thrp_int_iotier;
1855 break;
1856 case TASK_POLICY_PASSIVE_IO:
1857 if (category == TASK_POLICY_EXTERNAL)
1858 value = requested.thrp_ext_iopassive;
1859 else
1860 value = requested.thrp_int_iopassive;
1861 break;
1862 case TASK_POLICY_QOS:
1863 assert(category == TASK_POLICY_ATTRIBUTE);
1864 value = requested.thrp_qos;
1865 break;
1866 case TASK_POLICY_QOS_OVERRIDE:
1867 assert(category == TASK_POLICY_ATTRIBUTE);
1868 value = requested.thrp_qos_override;
1869 break;
1870 case TASK_POLICY_LATENCY_QOS:
1871 assert(category == TASK_POLICY_ATTRIBUTE);
1872 value = requested.thrp_latency_qos;
1873 break;
1874 case TASK_POLICY_THROUGH_QOS:
1875 assert(category == TASK_POLICY_ATTRIBUTE);
1876 value = requested.thrp_through_qos;
1877 break;
1878 case TASK_POLICY_QOS_AND_RELPRIO:
1879 assert(category == TASK_POLICY_ATTRIBUTE);
1880 assert(value2 != NULL);
1881 value = requested.thrp_qos;
1882 *value2 = requested.thrp_qos_relprio;
1883 break;
1884 case TASK_POLICY_QOS_PROMOTE:
1885 assert(category == TASK_POLICY_ATTRIBUTE);
1886 value = requested.thrp_qos_promote;
1887 break;
1888 case TASK_POLICY_QOS_IPC_OVERRIDE:
1889 assert(category == TASK_POLICY_ATTRIBUTE);
1890 value = requested.thrp_qos_ipc_override;
1891 break;
1892 case TASK_POLICY_TERMINATED:
1893 assert(category == TASK_POLICY_ATTRIBUTE);
1894 value = requested.thrp_terminated;
1895 break;
1896
1897 default:
1898 panic("unknown policy_flavor %d", flavor);
1899 break;
1900 }
1901
1902 return value;
1903 }
1904
1905 /*
1906 * Gets what is actually in effect, for subsystems which pull policy instead of receive updates.
1907 *
1908 * NOTE: This accessor does not take the task or thread lock.
1909 * Notifications of state updates need to be externally synchronized with state queries.
1910 * This routine *MUST* remain interrupt safe, as it is potentially invoked
1911 * within the context of a timer interrupt.
1912 *
1913 * TODO: I think we can get away with architecting this such that we don't need to look at the task ever.
1914 * Is that a good idea? Maybe it's best to avoid evaluate-all-the-threads updates.
1915 * I don't think that cost is worth not having the right answer.
1916 */
1917 int
1918 proc_get_effective_thread_policy(thread_t thread,
1919 int flavor)
1920 {
1921 int value = 0;
1922
1923 switch (flavor) {
1924 case TASK_POLICY_DARWIN_BG:
1925 /*
1926 * This call is used within the timer layer, as well as
1927 * prioritizing requests to the graphics system.
1928 * It also informs SFI and originator-bg-state.
1929 * Returns 1 for background mode, 0 for normal mode
1930 */
1931
1932 value = thread->effective_policy.thep_darwinbg ? 1 : 0;
1933 break;
1934 case TASK_POLICY_IO:
1935 /*
1936 * The I/O system calls here to find out what throttling tier to apply to an operation.
1937 * Returns THROTTLE_LEVEL_* values
1938 */
1939 value = thread->effective_policy.thep_io_tier;
1940 if (thread->iotier_override != THROTTLE_LEVEL_NONE)
1941 value = MIN(value, thread->iotier_override);
1942 break;
1943 case TASK_POLICY_PASSIVE_IO:
1944 /*
1945 * The I/O system calls here to find out whether an operation should be passive.
1946 * (i.e. not cause operations with lower throttle tiers to be throttled)
1947 * Returns 1 for passive mode, 0 for normal mode
1948 *
1949 * If an override is causing IO to go into a lower tier, we also set
1950 * the passive bit so that a thread doesn't end up stuck in its own throttle
1951 * window when the override goes away.
1952 */
1953 value = thread->effective_policy.thep_io_passive ? 1 : 0;
1954 if (thread->iotier_override != THROTTLE_LEVEL_NONE &&
1955 thread->iotier_override < thread->effective_policy.thep_io_tier)
1956 value = 1;
1957 break;
1958 case TASK_POLICY_ALL_SOCKETS_BG:
1959 /*
1960 * do_background_socket() calls this to determine whether
1961 * it should change the thread's sockets
1962 * Returns 1 for background mode, 0 for normal mode
1963 * This consults both thread and task so un-DBGing a thread while the task is BG
1964 * doesn't get you out of the network throttle.
1965 */
1966 value = (thread->effective_policy.thep_all_sockets_bg ||
1967 thread->task->effective_policy.tep_all_sockets_bg) ? 1 : 0;
1968 break;
1969 case TASK_POLICY_NEW_SOCKETS_BG:
1970 /*
1971 * socreate() calls this to determine if it should mark a new socket as background
1972 * Returns 1 for background mode, 0 for normal mode
1973 */
1974 value = thread->effective_policy.thep_new_sockets_bg ? 1 : 0;
1975 break;
1976 case TASK_POLICY_LATENCY_QOS:
1977 /*
1978 * timer arming calls into here to find out the timer coalescing level
1979 * Returns a latency QoS tier (0-6)
1980 */
1981 value = thread->effective_policy.thep_latency_qos;
1982 break;
1983 case TASK_POLICY_THROUGH_QOS:
1984 /*
1985 * This value is passed into the urgency callout from the scheduler
1986 * to the performance management subsystem.
1987 *
1988 * Returns a throughput QoS tier (0-6)
1989 */
1990 value = thread->effective_policy.thep_through_qos;
1991 break;
1992 case TASK_POLICY_QOS:
1993 /*
1994 * This is communicated to the performance management layer and SFI.
1995 *
1996 * Returns a QoS policy tier
1997 */
1998 value = thread->effective_policy.thep_qos;
1999 break;
2000 default:
2001 panic("unknown thread policy flavor %d", flavor);
2002 break;
2003 }
2004
2005 return value;
2006 }
2007
2008
2009 /*
2010 * (integer_t) casts limit the number of bits we can fit here
2011 * this interface is deprecated and replaced by the _EXT struct ?
2012 */
2013 static void
2014 proc_get_thread_policy_bitfield(thread_t thread, thread_policy_state_t info)
2015 {
2016 uint64_t bits = 0;
2017 struct thread_requested_policy requested = thread->requested_policy;
2018
2019 bits |= (requested.thrp_int_darwinbg ? POLICY_REQ_INT_DARWIN_BG : 0);
2020 bits |= (requested.thrp_ext_darwinbg ? POLICY_REQ_EXT_DARWIN_BG : 0);
2021 bits |= (requested.thrp_int_iotier ? (((uint64_t)requested.thrp_int_iotier) << POLICY_REQ_INT_IO_TIER_SHIFT) : 0);
2022 bits |= (requested.thrp_ext_iotier ? (((uint64_t)requested.thrp_ext_iotier) << POLICY_REQ_EXT_IO_TIER_SHIFT) : 0);
2023 bits |= (requested.thrp_int_iopassive ? POLICY_REQ_INT_PASSIVE_IO : 0);
2024 bits |= (requested.thrp_ext_iopassive ? POLICY_REQ_EXT_PASSIVE_IO : 0);
2025
2026 bits |= (requested.thrp_qos ? (((uint64_t)requested.thrp_qos) << POLICY_REQ_TH_QOS_SHIFT) : 0);
2027 bits |= (requested.thrp_qos_override ? (((uint64_t)requested.thrp_qos_override) << POLICY_REQ_TH_QOS_OVER_SHIFT) : 0);
2028
2029 bits |= (requested.thrp_pidbind_bg ? POLICY_REQ_PIDBIND_BG : 0);
2030
2031 bits |= (requested.thrp_latency_qos ? (((uint64_t)requested.thrp_latency_qos) << POLICY_REQ_BASE_LATENCY_QOS_SHIFT) : 0);
2032 bits |= (requested.thrp_through_qos ? (((uint64_t)requested.thrp_through_qos) << POLICY_REQ_BASE_THROUGH_QOS_SHIFT) : 0);
2033
2034 info->requested = (integer_t) bits;
2035 bits = 0;
2036
2037 struct thread_effective_policy effective = thread->effective_policy;
2038
2039 bits |= (effective.thep_darwinbg ? POLICY_EFF_DARWIN_BG : 0);
2040
2041 bits |= (effective.thep_io_tier ? (((uint64_t)effective.thep_io_tier) << POLICY_EFF_IO_TIER_SHIFT) : 0);
2042 bits |= (effective.thep_io_passive ? POLICY_EFF_IO_PASSIVE : 0);
2043 bits |= (effective.thep_all_sockets_bg ? POLICY_EFF_ALL_SOCKETS_BG : 0);
2044 bits |= (effective.thep_new_sockets_bg ? POLICY_EFF_NEW_SOCKETS_BG : 0);
2045
2046 bits |= (effective.thep_qos ? (((uint64_t)effective.thep_qos) << POLICY_EFF_TH_QOS_SHIFT) : 0);
2047
2048 bits |= (effective.thep_latency_qos ? (((uint64_t)effective.thep_latency_qos) << POLICY_EFF_LATENCY_QOS_SHIFT) : 0);
2049 bits |= (effective.thep_through_qos ? (((uint64_t)effective.thep_through_qos) << POLICY_EFF_THROUGH_QOS_SHIFT) : 0);
2050
2051 info->effective = (integer_t)bits;
2052 bits = 0;
2053
2054 info->pending = 0;
2055 }
2056
2057 /*
2058 * Sneakily trace either the task and thread requested
2059 * or just the thread requested, depending on if we have enough room.
2060 * We do have room on LP64. On LP32, we have to split it between two uintptr_t's.
2061 *
2062 * LP32 LP64
2063 * threquested_0(thread) thread[0] task[0]
2064 * threquested_1(thread) thread[1] thread[0]
2065 *
2066 */
2067
2068 uintptr_t
2069 threquested_0(thread_t thread)
2070 {
2071 static_assert(sizeof(struct thread_requested_policy) == sizeof(uint64_t), "size invariant violated");
2072
2073 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2074
2075 return raw[0];
2076 }
2077
2078 uintptr_t
2079 threquested_1(thread_t thread)
2080 {
2081 #if defined __LP64__
2082 return *(uintptr_t*)&thread->task->requested_policy;
2083 #else
2084 uintptr_t* raw = (uintptr_t*)(void*)&thread->requested_policy;
2085 return raw[1];
2086 #endif
2087 }
2088
2089 uintptr_t
2090 theffective_0(thread_t thread)
2091 {
2092 static_assert(sizeof(struct thread_effective_policy) == sizeof(uint64_t), "size invariant violated");
2093
2094 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2095 return raw[0];
2096 }
2097
2098 uintptr_t
2099 theffective_1(thread_t thread)
2100 {
2101 #if defined __LP64__
2102 return *(uintptr_t*)&thread->task->effective_policy;
2103 #else
2104 uintptr_t* raw = (uintptr_t*)(void*)&thread->effective_policy;
2105 return raw[1];
2106 #endif
2107 }
2108
2109
2110 /*
2111 * Set an override on the thread which is consulted with a
2112 * higher priority than the task/thread policy. This should
2113 * only be set for temporary grants until the thread
2114 * returns to the userspace boundary
2115 *
2116 * We use atomic operations to swap in the override, with
2117 * the assumption that the thread itself can
2118 * read the override and clear it on return to userspace.
2119 *
2120 * No locking is performed, since it is acceptable to see
2121 * a stale override for one loop through throttle_lowpri_io().
2122 * However a thread reference must be held on the thread.
2123 */
2124
2125 void set_thread_iotier_override(thread_t thread, int policy)
2126 {
2127 int current_override;
2128
2129 /* Let most aggressive I/O policy win until user boundary */
2130 do {
2131 current_override = thread->iotier_override;
2132
2133 if (current_override != THROTTLE_LEVEL_NONE)
2134 policy = MIN(current_override, policy);
2135
2136 if (current_override == policy) {
2137 /* no effective change */
2138 return;
2139 }
2140 } while (!OSCompareAndSwap(current_override, policy, &thread->iotier_override));
2141
2142 /*
2143 * Since the thread may be currently throttled,
2144 * re-evaluate tiers and potentially break out
2145 * of an msleep
2146 */
2147 rethrottle_thread(thread->uthread);
2148 }
2149
2150 /*
2151 * Userspace synchronization routines (like pthread mutexes, pthread reader-writer locks,
2152 * semaphores, dispatch_sync) may result in priority inversions where a higher priority
2153 * (i.e. scheduler priority, I/O tier, QoS tier) is waiting on a resource owned by a lower
2154 * priority thread. In these cases, we attempt to propagate the priority token, as long
2155 * as the subsystem informs us of the relationships between the threads. The userspace
2156 * synchronization subsystem should maintain the information of owner->resource and
2157 * resource->waiters itself.
2158 */
2159
2160 /*
2161 * This helper canonicalizes the resource/resource_type given the current qos_override_mode
2162 * in effect. Note that wildcards (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD) may need
2163 * to be handled specially in the future, but for now it's fine to slam
2164 * *resource to USER_ADDR_NULL even if it was previously a wildcard.
2165 */
2166 static void canonicalize_resource_and_type(user_addr_t *resource, int *resource_type) {
2167 if (qos_override_mode == QOS_OVERRIDE_MODE_OVERHANG_PEAK || qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2168 /* Map all input resource/type to a single one */
2169 *resource = USER_ADDR_NULL;
2170 *resource_type = THREAD_QOS_OVERRIDE_TYPE_UNKNOWN;
2171 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE) {
2172 /* no transform */
2173 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH) {
2174 /* Map all dispatch overrides to a single one, to avoid memory overhead */
2175 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2176 *resource = USER_ADDR_NULL;
2177 }
2178 } else if (qos_override_mode == QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_SINGLE_MUTEX_OVERRIDE) {
2179 /* Map all mutex overrides to a single one, to avoid memory overhead */
2180 if (*resource_type == THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX) {
2181 *resource = USER_ADDR_NULL;
2182 }
2183 }
2184 }
2185
2186 /* This helper routine finds an existing override if known. Locking should be done by caller */
2187 static struct thread_qos_override *
2188 find_qos_override(thread_t thread,
2189 user_addr_t resource,
2190 int resource_type)
2191 {
2192 struct thread_qos_override *override;
2193
2194 override = thread->overrides;
2195 while (override) {
2196 if (override->override_resource == resource &&
2197 override->override_resource_type == resource_type) {
2198 return override;
2199 }
2200
2201 override = override->override_next;
2202 }
2203
2204 return NULL;
2205 }
2206
2207 static void
2208 find_and_decrement_qos_override(thread_t thread,
2209 user_addr_t resource,
2210 int resource_type,
2211 boolean_t reset,
2212 struct thread_qos_override **free_override_list)
2213 {
2214 struct thread_qos_override *override, *override_prev;
2215
2216 override_prev = NULL;
2217 override = thread->overrides;
2218 while (override) {
2219 struct thread_qos_override *override_next = override->override_next;
2220
2221 if ((THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD == resource || override->override_resource == resource) &&
2222 (THREAD_QOS_OVERRIDE_TYPE_WILDCARD == resource_type || override->override_resource_type == resource_type)) {
2223
2224 if (reset) {
2225 override->override_contended_resource_count = 0;
2226 } else {
2227 override->override_contended_resource_count--;
2228 }
2229
2230 if (override->override_contended_resource_count == 0) {
2231 if (override_prev == NULL) {
2232 thread->overrides = override_next;
2233 } else {
2234 override_prev->override_next = override_next;
2235 }
2236
2237 /* Add to out-param for later zfree */
2238 override->override_next = *free_override_list;
2239 *free_override_list = override;
2240 } else {
2241 override_prev = override;
2242 }
2243
2244 if (THREAD_QOS_OVERRIDE_RESOURCE_WILDCARD != resource) {
2245 return;
2246 }
2247 } else {
2248 override_prev = override;
2249 }
2250
2251 override = override_next;
2252 }
2253 }
2254
2255 /* This helper recalculates the current requested override using the policy selected at boot */
2256 static int
2257 calculate_requested_qos_override(thread_t thread)
2258 {
2259 if (qos_override_mode == QOS_OVERRIDE_MODE_IGNORE_OVERRIDE) {
2260 return THREAD_QOS_UNSPECIFIED;
2261 }
2262
2263 /* iterate over all overrides and calculate MAX */
2264 struct thread_qos_override *override;
2265 int qos_override = THREAD_QOS_UNSPECIFIED;
2266
2267 override = thread->overrides;
2268 while (override) {
2269 if (qos_override_mode != QOS_OVERRIDE_MODE_FINE_GRAINED_OVERRIDE_BUT_IGNORE_DISPATCH ||
2270 override->override_resource_type != THREAD_QOS_OVERRIDE_TYPE_DISPATCH_ASYNCHRONOUS_OVERRIDE) {
2271 qos_override = MAX(qos_override, override->override_qos);
2272 }
2273
2274 override = override->override_next;
2275 }
2276
2277 return qos_override;
2278 }
2279
2280 /*
2281 * Returns:
2282 * - 0 on success
2283 * - EINVAL if some invalid input was passed
2284 * - EFAULT if user_lock_addr != NULL and needs to be faulted (userland has to
2285 * fault and retry)
2286 * - ESTALE if user_lock_addr != NULL &&
2287 * ulock_owner_value_to_port_name(*user_lock_addr) != user_lock_owner
2288 */
2289 static int
2290 proc_thread_qos_add_override_internal(thread_t thread,
2291 int override_qos,
2292 boolean_t first_override_for_resource,
2293 user_addr_t resource,
2294 int resource_type,
2295 user_addr_t user_lock_addr,
2296 mach_port_name_t user_lock_owner)
2297 {
2298 struct task_pend_token pend_token = {};
2299 int rc = 0;
2300
2301 thread_mtx_lock(thread);
2302
2303 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_START,
2304 thread_tid(thread), override_qos, first_override_for_resource ? 1 : 0, 0, 0);
2305
2306 DTRACE_BOOST5(qos_add_override_pre, uint64_t, thread_tid(thread),
2307 uint64_t, thread->requested_policy.thrp_qos,
2308 uint64_t, thread->effective_policy.thep_qos,
2309 int, override_qos, boolean_t, first_override_for_resource);
2310
2311 struct thread_qos_override *override;
2312 struct thread_qos_override *override_new = NULL;
2313 int new_qos_override, prev_qos_override;
2314 int new_effective_qos;
2315
2316 canonicalize_resource_and_type(&resource, &resource_type);
2317
2318 override = find_qos_override(thread, resource, resource_type);
2319 if (first_override_for_resource && !override) {
2320 /* We need to allocate a new object. Drop the thread lock and
2321 * recheck afterwards in case someone else added the override
2322 */
2323 thread_mtx_unlock(thread);
2324 override_new = zalloc(thread_qos_override_zone);
2325 thread_mtx_lock(thread);
2326 override = find_qos_override(thread, resource, resource_type);
2327 }
2328 if (user_lock_addr) {
2329 uint64_t val;
2330 /* Workaround lack of explicit support for 'no-fault copyin'
2331 * <rdar://problem/24999882>, as disabling preemption prevents paging in
2332 */
2333 disable_preemption();
2334 rc = copyin_word(user_lock_addr, &val, sizeof(user_lock_owner));
2335 enable_preemption();
2336 if (rc == 0 && ulock_owner_value_to_port_name((uint32_t)val) != user_lock_owner) {
2337 rc = ESTALE;
2338 }
2339 if (rc) {
2340 prev_qos_override = proc_get_thread_policy_locked(thread,
2341 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2342 new_qos_override = prev_qos_override;
2343 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2344 thread_mtx_unlock(thread);
2345 goto out;
2346 }
2347 }
2348 if (first_override_for_resource && override) {
2349 /* Someone else already allocated while the thread lock was dropped */
2350 override->override_contended_resource_count++;
2351 } else if (!override && override_new) {
2352 override = override_new;
2353 override_new = NULL;
2354 override->override_next = thread->overrides;
2355 /* since first_override_for_resource was TRUE */
2356 override->override_contended_resource_count = 1;
2357 override->override_resource = resource;
2358 override->override_resource_type = resource_type;
2359 override->override_qos = THREAD_QOS_UNSPECIFIED;
2360 thread->overrides = override;
2361 }
2362
2363 if (override) {
2364 if (override->override_qos == THREAD_QOS_UNSPECIFIED)
2365 override->override_qos = override_qos;
2366 else
2367 override->override_qos = MAX(override->override_qos, override_qos);
2368 }
2369
2370 /* Determine how to combine the various overrides into a single current
2371 * requested override
2372 */
2373 new_qos_override = calculate_requested_qos_override(thread);
2374
2375 prev_qos_override = proc_get_thread_policy_locked(thread,
2376 TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2377
2378 if (new_qos_override != prev_qos_override) {
2379 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE,
2380 TASK_POLICY_QOS_OVERRIDE,
2381 new_qos_override, 0, &pend_token);
2382 }
2383
2384 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2385
2386 thread_mtx_unlock(thread);
2387
2388 thread_policy_update_complete_unlocked(thread, &pend_token);
2389
2390 out:
2391 if (override_new) {
2392 zfree(thread_qos_override_zone, override_new);
2393 }
2394
2395 DTRACE_BOOST4(qos_add_override_post, int, prev_qos_override,
2396 int, new_qos_override, int, new_effective_qos, int, rc);
2397
2398 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_END,
2399 new_qos_override, resource, resource_type, 0, 0);
2400
2401 return rc;
2402 }
2403
2404 int
2405 proc_thread_qos_add_override_check_owner(thread_t thread,
2406 int override_qos,
2407 boolean_t first_override_for_resource,
2408 user_addr_t resource,
2409 int resource_type,
2410 user_addr_t user_lock_addr,
2411 mach_port_name_t user_lock_owner)
2412 {
2413 return proc_thread_qos_add_override_internal(thread, override_qos,
2414 first_override_for_resource, resource, resource_type,
2415 user_lock_addr, user_lock_owner);
2416 }
2417
2418 boolean_t
2419 proc_thread_qos_add_override(task_t task,
2420 thread_t thread,
2421 uint64_t tid,
2422 int override_qos,
2423 boolean_t first_override_for_resource,
2424 user_addr_t resource,
2425 int resource_type)
2426 {
2427 boolean_t has_thread_reference = FALSE;
2428 int rc = 0;
2429
2430 if (thread == THREAD_NULL) {
2431 thread = task_findtid(task, tid);
2432 /* returns referenced thread */
2433
2434 if (thread == THREAD_NULL) {
2435 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_ADD_OVERRIDE)) | DBG_FUNC_NONE,
2436 tid, 0, 0xdead, 0, 0);
2437 return FALSE;
2438 }
2439 has_thread_reference = TRUE;
2440 } else {
2441 assert(thread->task == task);
2442 }
2443 rc = proc_thread_qos_add_override_internal(thread, override_qos,
2444 first_override_for_resource, resource, resource_type, 0, 0);
2445 if (has_thread_reference) {
2446 thread_deallocate(thread);
2447 }
2448
2449 return rc == 0;
2450 }
2451
2452 static int
2453 proc_thread_qos_remove_override_internal(thread_t thread,
2454 user_addr_t resource,
2455 int resource_type,
2456 boolean_t reset,
2457 boolean_t squash)
2458 {
2459 struct task_pend_token pend_token = {};
2460
2461 struct thread_qos_override *deferred_free_override_list = NULL;
2462 int new_qos_override, prev_qos_override, new_effective_qos, prev_qos;
2463 int new_qos = THREAD_QOS_UNSPECIFIED;
2464
2465 thread_mtx_lock(thread);
2466
2467 canonicalize_resource_and_type(&resource, &resource_type);
2468
2469 find_and_decrement_qos_override(thread, resource, resource_type, reset, &deferred_free_override_list);
2470
2471 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_START,
2472 thread_tid(thread), resource, reset, 0, 0);
2473
2474 DTRACE_BOOST3(qos_remove_override_pre, uint64_t, thread_tid(thread),
2475 uint64_t, thread->requested_policy.thrp_qos,
2476 uint64_t, thread->effective_policy.thep_qos);
2477
2478 /* Determine how to combine the various overrides into a single current requested override */
2479 new_qos_override = calculate_requested_qos_override(thread);
2480
2481 spl_t s = splsched();
2482 thread_lock(thread);
2483
2484 /*
2485 * The override chain and therefore the value of the current override is locked with thread mutex,
2486 * so we can do a get/set without races. However, the rest of thread policy is locked under the spinlock.
2487 * This means you can't change the current override from a spinlock-only setter.
2488 */
2489 prev_qos_override = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, NULL);
2490
2491 if (squash) {
2492 /*
2493 * Remove the specified overrides, and set the current override as the new base QoS.
2494 * Return the new QoS value.
2495 */
2496 prev_qos = thread_get_requested_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS, NULL);
2497
2498 new_qos = MAX(prev_qos, prev_qos_override);
2499 if (new_qos != prev_qos)
2500 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS, new_qos, 0, &pend_token);
2501 }
2502
2503 if (new_qos_override != prev_qos_override)
2504 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_OVERRIDE, new_qos_override, 0, &pend_token);
2505
2506 new_effective_qos = proc_get_effective_thread_policy(thread, TASK_POLICY_QOS);
2507
2508 thread_unlock(thread);
2509 splx(s);
2510
2511 thread_mtx_unlock(thread);
2512
2513 thread_policy_update_complete_unlocked(thread, &pend_token);
2514
2515 while (deferred_free_override_list) {
2516 struct thread_qos_override *override_next = deferred_free_override_list->override_next;
2517
2518 zfree(thread_qos_override_zone, deferred_free_override_list);
2519 deferred_free_override_list = override_next;
2520 }
2521
2522 DTRACE_BOOST3(qos_remove_override_post, int, prev_qos_override,
2523 int, new_qos_override, int, new_effective_qos);
2524
2525 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_END,
2526 thread_tid(thread), squash, 0, 0, 0);
2527
2528 return new_qos;
2529 }
2530
2531 boolean_t
2532 proc_thread_qos_remove_override(task_t task,
2533 thread_t thread,
2534 uint64_t tid,
2535 user_addr_t resource,
2536 int resource_type)
2537 {
2538 boolean_t has_thread_reference = FALSE;
2539
2540 if (thread == THREAD_NULL) {
2541 thread = task_findtid(task, tid);
2542 /* returns referenced thread */
2543
2544 if (thread == THREAD_NULL) {
2545 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2546 tid, 0, 0xdead, 0, 0);
2547 return FALSE;
2548 }
2549 has_thread_reference = TRUE;
2550 } else {
2551 assert(task == thread->task);
2552 }
2553
2554 proc_thread_qos_remove_override_internal(thread, resource, resource_type, FALSE, FALSE);
2555
2556 if (has_thread_reference)
2557 thread_deallocate(thread);
2558
2559 return TRUE;
2560 }
2561
2562 boolean_t
2563 proc_thread_qos_reset_override(task_t task,
2564 thread_t thread,
2565 uint64_t tid,
2566 user_addr_t resource,
2567 int resource_type)
2568
2569 {
2570 boolean_t has_thread_reference = FALSE;
2571
2572 if (thread == THREAD_NULL) {
2573 thread = task_findtid(task, tid);
2574 /* returns referenced thread */
2575
2576 if (thread == THREAD_NULL) {
2577 KERNEL_DEBUG_CONSTANT((IMPORTANCE_CODE(IMP_USYNCH_QOS_OVERRIDE, IMP_USYNCH_REMOVE_OVERRIDE)) | DBG_FUNC_NONE,
2578 tid, 0, 0xdead, 0, 0);
2579 return FALSE;
2580 }
2581 has_thread_reference = TRUE;
2582 } else {
2583 assert(task == thread->task);
2584 }
2585
2586 proc_thread_qos_remove_override_internal(thread, resource, resource_type, TRUE, FALSE);
2587
2588 if (has_thread_reference)
2589 thread_deallocate(thread);
2590
2591 return TRUE;
2592 }
2593
2594 /*
2595 * Clears the requested overrides, and replaces the current QoS with the max
2596 * of the current QoS and the current override, then returns the new QoS.
2597 *
2598 * This is useful in order to reset overrides before parking a workqueue thread,
2599 * but avoid dropping priority and getting preempted right before parking.
2600 *
2601 * Called without any locks held.
2602 */
2603 int
2604 proc_thread_qos_squash_override(thread_t thread, user_addr_t resource, int resource_type)
2605 {
2606 return proc_thread_qos_remove_override_internal(thread, resource, resource_type, TRUE, TRUE);
2607 }
2608
2609 /* Deallocate before thread termination */
2610 void proc_thread_qos_deallocate(thread_t thread)
2611 {
2612 /*
2613 * There are no more references to this thread,
2614 * therefore this thread must not own any more locks,
2615 * therefore there must not be any more user promotions.
2616 */
2617 assert(thread->user_promotions == 0);
2618 assert(thread->requested_policy.thrp_qos_promote == THREAD_QOS_UNSPECIFIED);
2619 assert(thread->user_promotion_basepri == 0);
2620
2621 /* This thread must have no more IPC overrides. */
2622 assert(thread->ipc_overrides == 0);
2623 assert(thread->requested_policy.thrp_qos_ipc_override == THREAD_QOS_UNSPECIFIED);
2624
2625 /*
2626 * Clear out any lingering override objects.
2627 */
2628 struct thread_qos_override *override;
2629
2630 thread_mtx_lock(thread);
2631 override = thread->overrides;
2632 thread->overrides = NULL;
2633 thread->requested_policy.thrp_qos_override = THREAD_QOS_UNSPECIFIED;
2634 /* We don't need to re-evaluate thread policy here because the thread has already exited */
2635 thread_mtx_unlock(thread);
2636
2637 while (override) {
2638 struct thread_qos_override *override_next = override->override_next;
2639
2640 zfree(thread_qos_override_zone, override);
2641 override = override_next;
2642 }
2643 }
2644
2645 /*
2646 * Set up the primordial thread's QoS
2647 */
2648 void
2649 task_set_main_thread_qos(task_t task, thread_t thread) {
2650 struct task_pend_token pend_token = {};
2651
2652 assert(thread->task == task);
2653
2654 thread_mtx_lock(thread);
2655
2656 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2657 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_START,
2658 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2659 thread->requested_policy.thrp_qos, 0);
2660
2661 int primordial_qos = task_compute_main_thread_qos(task);
2662
2663 proc_set_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS,
2664 primordial_qos, 0, &pend_token);
2665
2666 thread_mtx_unlock(thread);
2667
2668 thread_policy_update_complete_unlocked(thread, &pend_token);
2669
2670 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
2671 (IMPORTANCE_CODE(IMP_MAIN_THREAD_QOS, 0)) | DBG_FUNC_END,
2672 thread_tid(thread), threquested_0(thread), threquested_1(thread),
2673 primordial_qos, 0);
2674 }
2675
2676 /*
2677 * KPI for pthread kext
2678 *
2679 * Return a good guess at what the initial manager QoS will be
2680 * Dispatch can override this in userspace if it so chooses
2681 */
2682 int
2683 task_get_default_manager_qos(task_t task)
2684 {
2685 int primordial_qos = task_compute_main_thread_qos(task);
2686
2687 if (primordial_qos == THREAD_QOS_LEGACY)
2688 primordial_qos = THREAD_QOS_USER_INITIATED;
2689
2690 return primordial_qos;
2691 }
2692
2693
2694 /*
2695 * Promote thread with the user level properties of 'promoter'
2696 * Mutexes may be held, but it's OK to take the throttle lock
2697 *
2698 * if 'new_promotion' is TRUE, this is a new promotion.
2699 * if FALSE, we are updating an existing promotion.
2700 */
2701 static void
2702 thread_user_promotion_promote(thread_t thread,
2703 thread_t promoter,
2704 struct promote_token* promote_token,
2705 boolean_t new_promotion)
2706 {
2707 struct task_pend_token pend_token = {};
2708
2709 uint32_t promoter_base_pri = 0, promoter_qos = THREAD_QOS_UNSPECIFIED;
2710
2711 spl_t s = splsched();
2712 thread_lock(promoter);
2713
2714 /*
2715 * We capture the 'promotion qos' here, which is captured
2716 * before task-level clamping.
2717 *
2718 * This means that if the process gets unclamped while a promotion,
2719 * is in effect, the owning thread ends up with the correct QoS.
2720 *
2721 * This does NOT work correctly across processes, as the correct QoS
2722 * in one is not necessarily the correct QoS in another.
2723 * When we add support for multi-process ulock boosting, we need to
2724 * do something more complex.
2725 */
2726 promoter_qos = promoter->effective_policy.thep_qos_promote;
2727
2728 /* TODO: extract 'effective unclamped base pri' instead */
2729 promoter_base_pri = promoter->base_pri;
2730
2731 thread_unlock(promoter);
2732 splx(s);
2733
2734 /* clamp out realtime to max user pri */
2735 promoter_base_pri = MIN(promoter_base_pri, MAXPRI_USER);
2736
2737 /* add in the saved promotion token */
2738 assert(promote_token->pt_basepri <= MAXPRI_USER);
2739
2740 promoter_base_pri = MAX(promoter_base_pri, promote_token->pt_basepri);
2741 promoter_qos = MAX(promoter_qos, promote_token->pt_qos);
2742
2743 /* save the max for later */
2744 promote_token->pt_basepri = promoter_base_pri;
2745 promote_token->pt_qos = promoter_qos;
2746
2747 s = splsched();
2748 thread_lock(thread);
2749
2750 if (new_promotion) {
2751 if (thread->user_promotions == 0) {
2752 assert(thread->requested_policy.thrp_qos_promote == THREAD_QOS_UNSPECIFIED);
2753 assert(thread->user_promotion_basepri == 0);
2754 }
2755
2756 thread->user_promotions++;
2757 } else {
2758 assert(thread->user_promotions > 0);
2759 }
2760
2761 uint32_t thread_qos = thread->requested_policy.thrp_qos_promote;
2762 uint32_t thread_basepri = thread->user_promotion_basepri;
2763
2764 uint32_t new_qos = MAX(thread_qos, promoter_qos);
2765 uint32_t new_basepri = MAX(thread_basepri, promoter_base_pri);
2766
2767 /* TODO: Fast path the 'new is lower than effective' case to avoid full reevaluation */
2768 if (thread_qos != new_qos || thread_basepri != new_basepri) {
2769
2770 thread->user_promotion_basepri = new_basepri;
2771
2772 pend_token.tpt_force_recompute_pri = 1;
2773
2774 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2775 TASK_POLICY_QOS_PROMOTE, new_qos,
2776 0, &pend_token);
2777 }
2778
2779 thread_unlock(thread);
2780 splx(s);
2781
2782 thread_policy_update_complete_unlocked(thread, &pend_token);
2783 }
2784
2785 /* Add a user promotion to thread */
2786 void
2787 thread_user_promotion_add(thread_t thread,
2788 thread_t promoter,
2789 struct promote_token* promote_token)
2790 {
2791 thread_user_promotion_promote(thread, promoter, promote_token, TRUE);
2792 }
2793
2794 /* Update an existing user promotion on thread */
2795 void
2796 thread_user_promotion_update(thread_t thread,
2797 thread_t promoter,
2798 struct promote_token* promote_token)
2799 {
2800 thread_user_promotion_promote(thread, promoter, promote_token, FALSE);
2801 }
2802
2803 /*
2804 * Drop a user promotion on thread
2805 * Mutexes may be held, but it's OK to take the throttle lock
2806 */
2807 void
2808 thread_user_promotion_drop(thread_t thread)
2809 {
2810 struct task_pend_token pend_token = {};
2811
2812 spl_t s = splsched();
2813 thread_lock(thread);
2814
2815 assert(thread->user_promotions > 0);
2816
2817 if (--thread->user_promotions == 0) {
2818 thread->requested_policy.thrp_qos_promote = THREAD_QOS_UNSPECIFIED;
2819 thread->user_promotion_basepri = 0;
2820
2821 pend_token.tpt_force_recompute_pri = 1;
2822
2823 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2824 TASK_POLICY_QOS_PROMOTE, THREAD_QOS_UNSPECIFIED,
2825 0, &pend_token);
2826 }
2827
2828 thread_unlock(thread);
2829 splx(s);
2830
2831 thread_policy_update_complete_unlocked(thread, &pend_token);
2832 }
2833
2834
2835 /*
2836 * Set the thread's QoS IPC override
2837 * Owned by the IPC subsystem
2838 *
2839 * May be called with spinlocks held, but not spinlocks
2840 * that may deadlock against the thread lock, the throttle lock, or the SFI lock.
2841 *
2842 * One 'add' must be balanced by one 'drop'.
2843 * Between 'add' and 'drop', the overide QoS value may be updated with an 'update'.
2844 * Before the thread is deallocated, there must be 0 remaining overrides.
2845 */
2846 static void
2847 thread_ipc_override(thread_t thread,
2848 uint32_t qos_override,
2849 boolean_t is_new_override)
2850 {
2851 struct task_pend_token pend_token = {};
2852
2853 spl_t s = splsched();
2854 thread_lock(thread);
2855
2856 uint32_t old_override = thread->requested_policy.thrp_qos_ipc_override;
2857
2858 if (is_new_override) {
2859 if (thread->ipc_overrides++ == 0) {
2860 /* This add is the first override for this thread */
2861 assert(old_override == THREAD_QOS_UNSPECIFIED);
2862 } else {
2863 /* There are already other overrides in effect for this thread */
2864 assert(old_override > THREAD_QOS_UNSPECIFIED);
2865 }
2866 } else {
2867 /* There must be at least one override (the previous add call) in effect */
2868 assert(thread->ipc_overrides > 0);
2869 assert(old_override > THREAD_QOS_UNSPECIFIED);
2870 }
2871
2872 uint32_t new_override = MAX(old_override, qos_override);
2873
2874 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2875 TASK_POLICY_QOS_IPC_OVERRIDE,
2876 new_override, 0, &pend_token);
2877
2878 assert(pend_token.tpt_update_sockets == 0);
2879
2880 thread_unlock(thread);
2881 splx(s);
2882
2883 /*
2884 * this is only safe after rethrottle_thread supports
2885 * being called from spinlock context
2886 */
2887 thread_policy_update_complete_unlocked(thread, &pend_token);
2888 }
2889
2890 void
2891 thread_add_ipc_override(thread_t thread,
2892 uint32_t qos_override)
2893 {
2894 thread_ipc_override(thread, qos_override, TRUE);
2895 }
2896
2897 void
2898 thread_update_ipc_override(thread_t thread,
2899 uint32_t qos_override)
2900 {
2901 thread_ipc_override(thread, qos_override, FALSE);
2902 }
2903
2904 void
2905 thread_drop_ipc_override(thread_t thread)
2906 {
2907 struct task_pend_token pend_token = {};
2908
2909 spl_t s = splsched();
2910 thread_lock(thread);
2911
2912 assert(thread->ipc_overrides > 0);
2913
2914 if (--thread->ipc_overrides == 0) {
2915 /*
2916 * There are no more overrides for this thread, so we should
2917 * clear out the saturated override value
2918 */
2919
2920 proc_set_thread_policy_spinlocked(thread, TASK_POLICY_ATTRIBUTE,
2921 TASK_POLICY_QOS_IPC_OVERRIDE, THREAD_QOS_UNSPECIFIED,
2922 0, &pend_token);
2923 }
2924
2925 thread_unlock(thread);
2926 splx(s);
2927
2928 /*
2929 * this is only safe after rethrottle_thread supports
2930 * being called from spinlock context
2931 */
2932 thread_policy_update_complete_unlocked(thread, &pend_token);
2933 }
2934
2935 /* Get current IPC override, may be called from spinlock context */
2936 uint32_t
2937 thread_get_ipc_override(thread_t thread)
2938 {
2939 return proc_get_thread_policy_locked(thread, TASK_POLICY_ATTRIBUTE, TASK_POLICY_QOS_IPC_OVERRIDE, NULL);
2940 }
2941