]>
Commit | Line | Data |
---|---|---|
d9a64523 | 1 | /* |
f427ee49 | 2 | * Copyright (c) 2000-2020 Apple Inc. All rights reserved. |
d9a64523 A |
3 | * |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* Copyright (c) 1995-2018 Apple, Inc. All Rights Reserved */ | |
29 | ||
30 | #include <sys/cdefs.h> | |
31 | ||
d9a64523 A |
32 | #include <kern/assert.h> |
33 | #include <kern/ast.h> | |
34 | #include <kern/clock.h> | |
35 | #include <kern/cpu_data.h> | |
36 | #include <kern/kern_types.h> | |
37 | #include <kern/policy_internal.h> | |
38 | #include <kern/processor.h> | |
0a7de745 | 39 | #include <kern/sched_prim.h> /* for thread_exception_return */ |
d9a64523 A |
40 | #include <kern/task.h> |
41 | #include <kern/thread.h> | |
42 | #include <kern/zalloc.h> | |
43 | #include <mach/kern_return.h> | |
44 | #include <mach/mach_param.h> | |
45 | #include <mach/mach_port.h> | |
46 | #include <mach/mach_types.h> | |
47 | #include <mach/mach_vm.h> | |
48 | #include <mach/sync_policy.h> | |
49 | #include <mach/task.h> | |
50 | #include <mach/thread_act.h> /* for thread_resume */ | |
51 | #include <mach/thread_policy.h> | |
52 | #include <mach/thread_status.h> | |
53 | #include <mach/vm_prot.h> | |
54 | #include <mach/vm_statistics.h> | |
55 | #include <machine/atomic.h> | |
56 | #include <machine/machine_routines.h> | |
57 | #include <vm/vm_map.h> | |
58 | #include <vm/vm_protos.h> | |
59 | ||
60 | #include <sys/eventvar.h> | |
61 | #include <sys/kdebug.h> | |
62 | #include <sys/kernel.h> | |
63 | #include <sys/lock.h> | |
64 | #include <sys/param.h> | |
0a7de745 | 65 | #include <sys/proc_info.h> /* for fill_procworkqueue */ |
d9a64523 A |
66 | #include <sys/proc_internal.h> |
67 | #include <sys/pthread_shims.h> | |
68 | #include <sys/resourcevar.h> | |
69 | #include <sys/signalvar.h> | |
70 | #include <sys/sysctl.h> | |
71 | #include <sys/sysproto.h> | |
72 | #include <sys/systm.h> | |
73 | #include <sys/ulock.h> /* for ulock_owner_value_to_port_name */ | |
74 | ||
75 | #include <pthread/bsdthread_private.h> | |
76 | #include <pthread/workqueue_syscalls.h> | |
77 | #include <pthread/workqueue_internal.h> | |
78 | #include <pthread/workqueue_trace.h> | |
79 | ||
80 | #include <os/log.h> | |
81 | ||
d9a64523 | 82 | static void workq_unpark_continue(void *uth, wait_result_t wr) __dead2; |
cb323159 A |
83 | static void workq_schedule_creator(proc_t p, struct workqueue *wq, |
84 | workq_kern_threadreq_flags_t flags); | |
d9a64523 A |
85 | |
86 | static bool workq_threadreq_admissible(struct workqueue *wq, struct uthread *uth, | |
0a7de745 | 87 | workq_threadreq_t req); |
d9a64523 A |
88 | |
89 | static uint32_t workq_constrained_allowance(struct workqueue *wq, | |
0a7de745 | 90 | thread_qos_t at_qos, struct uthread *uth, bool may_start_timer); |
d9a64523 A |
91 | |
92 | static bool workq_thread_is_busy(uint64_t cur_ts, | |
0a7de745 | 93 | _Atomic uint64_t *lastblocked_tsp); |
d9a64523 A |
94 | |
95 | static int workq_sysctl_handle_usecs SYSCTL_HANDLER_ARGS; | |
96 | ||
97 | #pragma mark globals | |
98 | ||
99 | struct workq_usec_var { | |
100 | uint32_t usecs; | |
101 | uint64_t abstime; | |
102 | }; | |
103 | ||
104 | #define WORKQ_SYSCTL_USECS(var, init) \ | |
0a7de745 A |
105 | static struct workq_usec_var var = { .usecs = init }; \ |
106 | SYSCTL_OID(_kern, OID_AUTO, var##_usecs, \ | |
107 | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &var, 0, \ | |
108 | workq_sysctl_handle_usecs, "I", "") | |
d9a64523 | 109 | |
f427ee49 | 110 | static LCK_GRP_DECLARE(workq_lck_grp, "workq"); |
d9a64523 A |
111 | os_refgrp_decl(static, workq_refgrp, "workq", NULL); |
112 | ||
f427ee49 A |
113 | static ZONE_DECLARE(workq_zone_workqueue, "workq.wq", |
114 | sizeof(struct workqueue), ZC_NONE); | |
115 | static ZONE_DECLARE(workq_zone_threadreq, "workq.threadreq", | |
116 | sizeof(struct workq_threadreq_s), ZC_CACHING); | |
117 | ||
cb323159 | 118 | static struct mpsc_daemon_queue workq_deallocate_queue; |
d9a64523 | 119 | |
0a7de745 | 120 | WORKQ_SYSCTL_USECS(wq_stalled_window, WQ_STALLED_WINDOW_USECS); |
d9a64523 A |
121 | WORKQ_SYSCTL_USECS(wq_reduce_pool_window, WQ_REDUCE_POOL_WINDOW_USECS); |
122 | WORKQ_SYSCTL_USECS(wq_max_timer_interval, WQ_MAX_TIMER_INTERVAL_USECS); | |
123 | static uint32_t wq_max_threads = WORKQUEUE_MAXTHREADS; | |
124 | static uint32_t wq_max_constrained_threads = WORKQUEUE_MAXTHREADS / 8; | |
125 | static uint32_t wq_init_constrained_limit = 1; | |
126 | static uint16_t wq_death_max_load; | |
127 | static uint32_t wq_max_parallelism[WORKQ_NUM_QOS_BUCKETS]; | |
128 | ||
129 | #pragma mark sysctls | |
130 | ||
131 | static int | |
132 | workq_sysctl_handle_usecs SYSCTL_HANDLER_ARGS | |
133 | { | |
134 | #pragma unused(arg2) | |
135 | struct workq_usec_var *v = arg1; | |
136 | int error = sysctl_handle_int(oidp, &v->usecs, 0, req); | |
0a7de745 | 137 | if (error || !req->newptr) { |
d9a64523 | 138 | return error; |
0a7de745 | 139 | } |
d9a64523 | 140 | clock_interval_to_absolutetime_interval(v->usecs, NSEC_PER_USEC, |
0a7de745 | 141 | &v->abstime); |
d9a64523 A |
142 | return 0; |
143 | } | |
144 | ||
145 | SYSCTL_INT(_kern, OID_AUTO, wq_max_threads, CTLFLAG_RW | CTLFLAG_LOCKED, | |
0a7de745 | 146 | &wq_max_threads, 0, ""); |
d9a64523 A |
147 | |
148 | SYSCTL_INT(_kern, OID_AUTO, wq_max_constrained_threads, CTLFLAG_RW | CTLFLAG_LOCKED, | |
0a7de745 | 149 | &wq_max_constrained_threads, 0, ""); |
d9a64523 A |
150 | |
151 | #pragma mark p_wqptr | |
152 | ||
153 | #define WQPTR_IS_INITING_VALUE ((struct workqueue *)~(uintptr_t)0) | |
154 | ||
155 | static struct workqueue * | |
156 | proc_get_wqptr_fast(struct proc *p) | |
157 | { | |
158 | return os_atomic_load(&p->p_wqptr, relaxed); | |
159 | } | |
160 | ||
161 | static struct workqueue * | |
162 | proc_get_wqptr(struct proc *p) | |
163 | { | |
164 | struct workqueue *wq = proc_get_wqptr_fast(p); | |
165 | return wq == WQPTR_IS_INITING_VALUE ? NULL : wq; | |
166 | } | |
167 | ||
168 | static void | |
169 | proc_set_wqptr(struct proc *p, struct workqueue *wq) | |
170 | { | |
171 | wq = os_atomic_xchg(&p->p_wqptr, wq, release); | |
172 | if (wq == WQPTR_IS_INITING_VALUE) { | |
173 | proc_lock(p); | |
174 | thread_wakeup(&p->p_wqptr); | |
175 | proc_unlock(p); | |
176 | } | |
177 | } | |
178 | ||
179 | static bool | |
180 | proc_init_wqptr_or_wait(struct proc *p) | |
181 | { | |
182 | struct workqueue *wq; | |
183 | ||
184 | proc_lock(p); | |
cb323159 | 185 | wq = os_atomic_load(&p->p_wqptr, relaxed); |
d9a64523 A |
186 | |
187 | if (wq == NULL) { | |
cb323159 | 188 | os_atomic_store(&p->p_wqptr, WQPTR_IS_INITING_VALUE, relaxed); |
d9a64523 A |
189 | proc_unlock(p); |
190 | return true; | |
191 | } | |
192 | ||
193 | if (wq == WQPTR_IS_INITING_VALUE) { | |
194 | assert_wait(&p->p_wqptr, THREAD_UNINT); | |
195 | proc_unlock(p); | |
196 | thread_block(THREAD_CONTINUE_NULL); | |
197 | } else { | |
198 | proc_unlock(p); | |
199 | } | |
200 | return false; | |
201 | } | |
202 | ||
203 | static inline event_t | |
204 | workq_parked_wait_event(struct uthread *uth) | |
205 | { | |
206 | return (event_t)&uth->uu_workq_stackaddr; | |
207 | } | |
208 | ||
209 | static inline void | |
210 | workq_thread_wakeup(struct uthread *uth) | |
211 | { | |
cb323159 | 212 | thread_wakeup_thread(workq_parked_wait_event(uth), uth->uu_thread); |
d9a64523 A |
213 | } |
214 | ||
215 | #pragma mark wq_thactive | |
216 | ||
217 | #if defined(__LP64__) | |
218 | // Layout is: | |
219 | // 127 - 115 : 13 bits of zeroes | |
220 | // 114 - 112 : best QoS among all pending constrained requests | |
221 | // 111 - 0 : MGR, AUI, UI, IN, DF, UT, BG+MT buckets every 16 bits | |
222 | #define WQ_THACTIVE_BUCKET_WIDTH 16 | |
223 | #define WQ_THACTIVE_QOS_SHIFT (7 * WQ_THACTIVE_BUCKET_WIDTH) | |
224 | #else | |
225 | // Layout is: | |
226 | // 63 - 61 : best QoS among all pending constrained requests | |
227 | // 60 : Manager bucket (0 or 1) | |
228 | // 59 - 0 : AUI, UI, IN, DF, UT, BG+MT buckets every 10 bits | |
229 | #define WQ_THACTIVE_BUCKET_WIDTH 10 | |
230 | #define WQ_THACTIVE_QOS_SHIFT (6 * WQ_THACTIVE_BUCKET_WIDTH + 1) | |
231 | #endif | |
232 | #define WQ_THACTIVE_BUCKET_MASK ((1U << WQ_THACTIVE_BUCKET_WIDTH) - 1) | |
233 | #define WQ_THACTIVE_BUCKET_HALF (1U << (WQ_THACTIVE_BUCKET_WIDTH - 1)) | |
234 | ||
235 | static_assert(sizeof(wq_thactive_t) * CHAR_BIT - WQ_THACTIVE_QOS_SHIFT >= 3, | |
0a7de745 | 236 | "Make sure we have space to encode a QoS"); |
d9a64523 A |
237 | |
238 | static inline wq_thactive_t | |
239 | _wq_thactive(struct workqueue *wq) | |
240 | { | |
cb323159 | 241 | return os_atomic_load_wide(&wq->wq_thactive, relaxed); |
d9a64523 A |
242 | } |
243 | ||
244 | static inline int | |
245 | _wq_bucket(thread_qos_t qos) | |
246 | { | |
247 | // Map both BG and MT to the same bucket by over-shifting down and | |
248 | // clamping MT and BG together. | |
249 | switch (qos) { | |
250 | case THREAD_QOS_MAINTENANCE: | |
251 | return 0; | |
252 | default: | |
253 | return qos - 2; | |
254 | } | |
255 | } | |
256 | ||
257 | #define WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(tha) \ | |
f427ee49 | 258 | ((thread_qos_t)((tha) >> WQ_THACTIVE_QOS_SHIFT)) |
d9a64523 A |
259 | |
260 | static inline thread_qos_t | |
261 | _wq_thactive_best_constrained_req_qos(struct workqueue *wq) | |
262 | { | |
263 | // Avoid expensive atomic operations: the three bits we're loading are in | |
264 | // a single byte, and always updated under the workqueue lock | |
265 | wq_thactive_t v = *(wq_thactive_t *)&wq->wq_thactive; | |
266 | return WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(v); | |
267 | } | |
268 | ||
269 | static void | |
270 | _wq_thactive_refresh_best_constrained_req_qos(struct workqueue *wq) | |
271 | { | |
272 | thread_qos_t old_qos, new_qos; | |
273 | workq_threadreq_t req; | |
274 | ||
275 | req = priority_queue_max(&wq->wq_constrained_queue, | |
0a7de745 | 276 | struct workq_threadreq_s, tr_entry); |
d9a64523 A |
277 | new_qos = req ? req->tr_qos : THREAD_QOS_UNSPECIFIED; |
278 | old_qos = _wq_thactive_best_constrained_req_qos(wq); | |
279 | if (old_qos != new_qos) { | |
280 | long delta = (long)new_qos - (long)old_qos; | |
281 | wq_thactive_t v = (wq_thactive_t)delta << WQ_THACTIVE_QOS_SHIFT; | |
282 | /* | |
283 | * We can do an atomic add relative to the initial load because updates | |
284 | * to this qos are always serialized under the workqueue lock. | |
285 | */ | |
286 | v = os_atomic_add(&wq->wq_thactive, v, relaxed); | |
287 | #ifdef __LP64__ | |
288 | WQ_TRACE_WQ(TRACE_wq_thactive_update, wq, (uint64_t)v, | |
0a7de745 | 289 | (uint64_t)(v >> 64), 0, 0); |
d9a64523 A |
290 | #else |
291 | WQ_TRACE_WQ(TRACE_wq_thactive_update, wq, v, 0, 0, 0); | |
292 | #endif | |
293 | } | |
294 | } | |
295 | ||
296 | static inline wq_thactive_t | |
297 | _wq_thactive_offset_for_qos(thread_qos_t qos) | |
298 | { | |
299 | return (wq_thactive_t)1 << (_wq_bucket(qos) * WQ_THACTIVE_BUCKET_WIDTH); | |
300 | } | |
301 | ||
302 | static inline wq_thactive_t | |
303 | _wq_thactive_inc(struct workqueue *wq, thread_qos_t qos) | |
304 | { | |
305 | wq_thactive_t v = _wq_thactive_offset_for_qos(qos); | |
306 | return os_atomic_add_orig(&wq->wq_thactive, v, relaxed); | |
307 | } | |
308 | ||
309 | static inline wq_thactive_t | |
310 | _wq_thactive_dec(struct workqueue *wq, thread_qos_t qos) | |
311 | { | |
312 | wq_thactive_t v = _wq_thactive_offset_for_qos(qos); | |
313 | return os_atomic_sub_orig(&wq->wq_thactive, v, relaxed); | |
314 | } | |
315 | ||
316 | static inline void | |
317 | _wq_thactive_move(struct workqueue *wq, | |
0a7de745 | 318 | thread_qos_t old_qos, thread_qos_t new_qos) |
d9a64523 A |
319 | { |
320 | wq_thactive_t v = _wq_thactive_offset_for_qos(new_qos) - | |
0a7de745 | 321 | _wq_thactive_offset_for_qos(old_qos); |
cb323159 | 322 | os_atomic_add(&wq->wq_thactive, v, relaxed); |
d9a64523 A |
323 | wq->wq_thscheduled_count[_wq_bucket(old_qos)]--; |
324 | wq->wq_thscheduled_count[_wq_bucket(new_qos)]++; | |
325 | } | |
326 | ||
327 | static inline uint32_t | |
328 | _wq_thactive_aggregate_downto_qos(struct workqueue *wq, wq_thactive_t v, | |
0a7de745 | 329 | thread_qos_t qos, uint32_t *busycount, uint32_t *max_busycount) |
d9a64523 A |
330 | { |
331 | uint32_t count = 0, active; | |
332 | uint64_t curtime; | |
333 | ||
334 | assert(WORKQ_THREAD_QOS_MIN <= qos && qos <= WORKQ_THREAD_QOS_MAX); | |
335 | ||
336 | if (busycount) { | |
337 | curtime = mach_absolute_time(); | |
338 | *busycount = 0; | |
339 | } | |
340 | if (max_busycount) { | |
341 | *max_busycount = THREAD_QOS_LAST - qos; | |
342 | } | |
343 | ||
344 | int i = _wq_bucket(qos); | |
345 | v >>= i * WQ_THACTIVE_BUCKET_WIDTH; | |
346 | for (; i < WORKQ_NUM_QOS_BUCKETS; i++, v >>= WQ_THACTIVE_BUCKET_WIDTH) { | |
347 | active = v & WQ_THACTIVE_BUCKET_MASK; | |
348 | count += active; | |
349 | ||
350 | if (busycount && wq->wq_thscheduled_count[i] > active) { | |
351 | if (workq_thread_is_busy(curtime, &wq->wq_lastblocked_ts[i])) { | |
352 | /* | |
353 | * We only consider the last blocked thread for a given bucket | |
354 | * as busy because we don't want to take the list lock in each | |
355 | * sched callback. However this is an approximation that could | |
356 | * contribute to thread creation storms. | |
357 | */ | |
358 | (*busycount)++; | |
359 | } | |
360 | } | |
361 | } | |
362 | ||
363 | return count; | |
364 | } | |
365 | ||
366 | #pragma mark wq_flags | |
367 | ||
368 | static inline uint32_t | |
369 | _wq_flags(struct workqueue *wq) | |
370 | { | |
371 | return os_atomic_load(&wq->wq_flags, relaxed); | |
372 | } | |
373 | ||
374 | static inline bool | |
375 | _wq_exiting(struct workqueue *wq) | |
376 | { | |
377 | return _wq_flags(wq) & WQ_EXITING; | |
378 | } | |
379 | ||
380 | bool | |
381 | workq_is_exiting(struct proc *p) | |
382 | { | |
383 | struct workqueue *wq = proc_get_wqptr(p); | |
384 | return !wq || _wq_exiting(wq); | |
385 | } | |
386 | ||
d9a64523 A |
387 | #pragma mark workqueue lock |
388 | ||
389 | static bool | |
390 | workq_lock_spin_is_acquired_kdp(struct workqueue *wq) | |
391 | { | |
392 | return kdp_lck_spin_is_acquired(&wq->wq_lock); | |
393 | } | |
394 | ||
395 | static inline void | |
396 | workq_lock_spin(struct workqueue *wq) | |
397 | { | |
f427ee49 | 398 | lck_spin_lock_grp(&wq->wq_lock, &workq_lck_grp); |
d9a64523 A |
399 | } |
400 | ||
401 | static inline void | |
402 | workq_lock_held(__assert_only struct workqueue *wq) | |
403 | { | |
404 | LCK_SPIN_ASSERT(&wq->wq_lock, LCK_ASSERT_OWNED); | |
405 | } | |
406 | ||
407 | static inline bool | |
408 | workq_lock_try(struct workqueue *wq) | |
409 | { | |
f427ee49 | 410 | return lck_spin_try_lock_grp(&wq->wq_lock, &workq_lck_grp); |
d9a64523 A |
411 | } |
412 | ||
413 | static inline void | |
414 | workq_unlock(struct workqueue *wq) | |
415 | { | |
416 | lck_spin_unlock(&wq->wq_lock); | |
417 | } | |
418 | ||
419 | #pragma mark idle thread lists | |
420 | ||
421 | #define WORKQ_POLICY_INIT(qos) \ | |
0a7de745 | 422 | (struct uu_workq_policy){ .qos_req = qos, .qos_bucket = qos } |
d9a64523 A |
423 | |
424 | static inline thread_qos_t | |
425 | workq_pri_bucket(struct uu_workq_policy req) | |
426 | { | |
427 | return MAX(MAX(req.qos_req, req.qos_max), req.qos_override); | |
428 | } | |
429 | ||
430 | static inline thread_qos_t | |
431 | workq_pri_override(struct uu_workq_policy req) | |
432 | { | |
433 | return MAX(workq_pri_bucket(req), req.qos_bucket); | |
434 | } | |
435 | ||
436 | static inline bool | |
437 | workq_thread_needs_params_change(workq_threadreq_t req, struct uthread *uth) | |
438 | { | |
439 | workq_threadreq_param_t cur_trp, req_trp = { }; | |
440 | ||
441 | cur_trp.trp_value = uth->uu_save.uus_workq_park_data.workloop_params; | |
cb323159 | 442 | if (req->tr_flags & WORKQ_TR_FLAG_WL_PARAMS) { |
d9a64523 A |
443 | req_trp = kqueue_threadreq_workloop_param(req); |
444 | } | |
445 | ||
446 | /* | |
447 | * CPU percent flags are handled separately to policy changes, so ignore | |
448 | * them for all of these checks. | |
449 | */ | |
450 | uint16_t cur_flags = (cur_trp.trp_flags & ~TRP_CPUPERCENT); | |
451 | uint16_t req_flags = (req_trp.trp_flags & ~TRP_CPUPERCENT); | |
452 | ||
453 | if (!req_flags && !cur_flags) { | |
454 | return false; | |
455 | } | |
456 | ||
457 | if (req_flags != cur_flags) { | |
458 | return true; | |
459 | } | |
460 | ||
461 | if ((req_flags & TRP_PRIORITY) && req_trp.trp_pri != cur_trp.trp_pri) { | |
462 | return true; | |
463 | } | |
464 | ||
4ba76501 | 465 | if ((req_flags & TRP_POLICY) && req_trp.trp_pol != cur_trp.trp_pol) { |
d9a64523 A |
466 | return true; |
467 | } | |
468 | ||
469 | return false; | |
470 | } | |
471 | ||
472 | static inline bool | |
473 | workq_thread_needs_priority_change(workq_threadreq_t req, struct uthread *uth) | |
474 | { | |
475 | if (workq_thread_needs_params_change(req, uth)) { | |
476 | return true; | |
477 | } | |
478 | ||
479 | return req->tr_qos != workq_pri_override(uth->uu_workq_pri); | |
480 | } | |
481 | ||
482 | static void | |
483 | workq_thread_update_bucket(proc_t p, struct workqueue *wq, struct uthread *uth, | |
0a7de745 A |
484 | struct uu_workq_policy old_pri, struct uu_workq_policy new_pri, |
485 | bool force_run) | |
d9a64523 A |
486 | { |
487 | thread_qos_t old_bucket = old_pri.qos_bucket; | |
488 | thread_qos_t new_bucket = workq_pri_bucket(new_pri); | |
489 | ||
490 | if (old_bucket != new_bucket) { | |
491 | _wq_thactive_move(wq, old_bucket, new_bucket); | |
492 | } | |
493 | ||
494 | new_pri.qos_bucket = new_bucket; | |
495 | uth->uu_workq_pri = new_pri; | |
496 | ||
497 | if (workq_pri_override(old_pri) != new_bucket) { | |
498 | thread_set_workq_override(uth->uu_thread, new_bucket); | |
499 | } | |
500 | ||
501 | if (wq->wq_reqcount && (old_bucket > new_bucket || force_run)) { | |
502 | int flags = WORKQ_THREADREQ_CAN_CREATE_THREADS; | |
503 | if (old_bucket > new_bucket) { | |
504 | /* | |
505 | * When lowering our bucket, we may unblock a thread request, | |
506 | * but we can't drop our priority before we have evaluated | |
507 | * whether this is the case, and if we ever drop the workqueue lock | |
508 | * that would cause a priority inversion. | |
509 | * | |
510 | * We hence have to disallow thread creation in that case. | |
511 | */ | |
512 | flags = 0; | |
513 | } | |
514 | workq_schedule_creator(p, wq, flags); | |
515 | } | |
516 | } | |
517 | ||
518 | /* | |
519 | * Sets/resets the cpu percent limits on the current thread. We can't set | |
520 | * these limits from outside of the current thread, so this function needs | |
521 | * to be called when we're executing on the intended | |
522 | */ | |
523 | static void | |
524 | workq_thread_reset_cpupercent(workq_threadreq_t req, struct uthread *uth) | |
525 | { | |
526 | assert(uth == current_uthread()); | |
527 | workq_threadreq_param_t trp = { }; | |
528 | ||
cb323159 | 529 | if (req && (req->tr_flags & WORKQ_TR_FLAG_WL_PARAMS)) { |
d9a64523 A |
530 | trp = kqueue_threadreq_workloop_param(req); |
531 | } | |
532 | ||
533 | if (uth->uu_workq_flags & UT_WORKQ_CPUPERCENT) { | |
534 | /* | |
535 | * Going through disable when we have an existing CPU percent limit | |
536 | * set will force the ledger to refill the token bucket of the current | |
537 | * thread. Removing any penalty applied by previous thread use. | |
538 | */ | |
539 | thread_set_cpulimit(THREAD_CPULIMIT_DISABLE, 0, 0); | |
540 | uth->uu_workq_flags &= ~UT_WORKQ_CPUPERCENT; | |
541 | } | |
542 | ||
543 | if (trp.trp_flags & TRP_CPUPERCENT) { | |
544 | thread_set_cpulimit(THREAD_CPULIMIT_BLOCK, trp.trp_cpupercent, | |
0a7de745 | 545 | (uint64_t)trp.trp_refillms * NSEC_PER_SEC); |
d9a64523 A |
546 | uth->uu_workq_flags |= UT_WORKQ_CPUPERCENT; |
547 | } | |
548 | } | |
549 | ||
550 | static void | |
551 | workq_thread_reset_pri(struct workqueue *wq, struct uthread *uth, | |
cb323159 | 552 | workq_threadreq_t req, bool unpark) |
d9a64523 A |
553 | { |
554 | thread_t th = uth->uu_thread; | |
555 | thread_qos_t qos = req ? req->tr_qos : WORKQ_THREAD_QOS_CLEANUP; | |
556 | workq_threadreq_param_t trp = { }; | |
557 | int priority = 31; | |
558 | int policy = POLICY_TIMESHARE; | |
559 | ||
cb323159 | 560 | if (req && (req->tr_flags & WORKQ_TR_FLAG_WL_PARAMS)) { |
d9a64523 A |
561 | trp = kqueue_threadreq_workloop_param(req); |
562 | } | |
563 | ||
564 | uth->uu_workq_pri = WORKQ_POLICY_INIT(qos); | |
565 | uth->uu_workq_flags &= ~UT_WORKQ_OUTSIDE_QOS; | |
d9a64523 | 566 | |
cb323159 A |
567 | if (unpark) { |
568 | uth->uu_save.uus_workq_park_data.workloop_params = trp.trp_value; | |
569 | // qos sent out to userspace (may differ from uu_workq_pri on param threads) | |
570 | uth->uu_save.uus_workq_park_data.qos = qos; | |
571 | } | |
d9a64523 A |
572 | |
573 | if (qos == WORKQ_THREAD_QOS_MANAGER) { | |
574 | uint32_t mgr_pri = wq->wq_event_manager_priority; | |
575 | assert(trp.trp_value == 0); // manager qos and thread policy don't mix | |
576 | ||
577 | if (mgr_pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) { | |
578 | mgr_pri &= _PTHREAD_PRIORITY_SCHED_PRI_MASK; | |
579 | thread_set_workq_pri(th, THREAD_QOS_UNSPECIFIED, mgr_pri, | |
0a7de745 | 580 | POLICY_TIMESHARE); |
d9a64523 A |
581 | return; |
582 | } | |
583 | ||
584 | qos = _pthread_priority_thread_qos(mgr_pri); | |
585 | } else { | |
586 | if (trp.trp_flags & TRP_PRIORITY) { | |
587 | qos = THREAD_QOS_UNSPECIFIED; | |
588 | priority = trp.trp_pri; | |
589 | uth->uu_workq_flags |= UT_WORKQ_OUTSIDE_QOS; | |
590 | } | |
591 | ||
592 | if (trp.trp_flags & TRP_POLICY) { | |
593 | policy = trp.trp_pol; | |
594 | } | |
595 | } | |
596 | ||
597 | thread_set_workq_pri(th, qos, priority, policy); | |
598 | } | |
599 | ||
600 | /* | |
601 | * Called by kevent with the NOTE_WL_THREAD_REQUEST knote lock held, | |
602 | * every time a servicer is being told about a new max QoS. | |
603 | */ | |
604 | void | |
cb323159 | 605 | workq_thread_set_max_qos(struct proc *p, workq_threadreq_t kqr) |
d9a64523 A |
606 | { |
607 | struct uu_workq_policy old_pri, new_pri; | |
cb323159 | 608 | struct uthread *uth = current_uthread(); |
d9a64523 | 609 | struct workqueue *wq = proc_get_wqptr_fast(p); |
cb323159 | 610 | thread_qos_t qos = kqr->tr_kq_qos_index; |
d9a64523 | 611 | |
0a7de745 | 612 | if (uth->uu_workq_pri.qos_max == qos) { |
d9a64523 | 613 | return; |
0a7de745 | 614 | } |
d9a64523 A |
615 | |
616 | workq_lock_spin(wq); | |
617 | old_pri = new_pri = uth->uu_workq_pri; | |
618 | new_pri.qos_max = qos; | |
619 | workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, false); | |
620 | workq_unlock(wq); | |
621 | } | |
622 | ||
623 | #pragma mark idle threads accounting and handling | |
624 | ||
625 | static inline struct uthread * | |
626 | workq_oldest_killable_idle_thread(struct workqueue *wq) | |
627 | { | |
628 | struct uthread *uth = TAILQ_LAST(&wq->wq_thidlelist, workq_uthread_head); | |
629 | ||
630 | if (uth && !uth->uu_save.uus_workq_park_data.has_stack) { | |
631 | uth = TAILQ_PREV(uth, workq_uthread_head, uu_workq_entry); | |
632 | if (uth) { | |
633 | assert(uth->uu_save.uus_workq_park_data.has_stack); | |
634 | } | |
635 | } | |
636 | return uth; | |
637 | } | |
638 | ||
639 | static inline uint64_t | |
640 | workq_kill_delay_for_idle_thread(struct workqueue *wq) | |
641 | { | |
642 | uint64_t delay = wq_reduce_pool_window.abstime; | |
643 | uint16_t idle = wq->wq_thidlecount; | |
644 | ||
645 | /* | |
646 | * If we have less than wq_death_max_load threads, have a 5s timer. | |
647 | * | |
648 | * For the next wq_max_constrained_threads ones, decay linearly from | |
649 | * from 5s to 50ms. | |
650 | */ | |
651 | if (idle <= wq_death_max_load) { | |
652 | return delay; | |
653 | } | |
654 | ||
655 | if (wq_max_constrained_threads > idle - wq_death_max_load) { | |
656 | delay *= (wq_max_constrained_threads - (idle - wq_death_max_load)); | |
657 | } | |
658 | return delay / wq_max_constrained_threads; | |
659 | } | |
660 | ||
661 | static inline bool | |
662 | workq_should_kill_idle_thread(struct workqueue *wq, struct uthread *uth, | |
0a7de745 | 663 | uint64_t now) |
d9a64523 A |
664 | { |
665 | uint64_t delay = workq_kill_delay_for_idle_thread(wq); | |
666 | return now - uth->uu_save.uus_workq_park_data.idle_stamp > delay; | |
667 | } | |
668 | ||
669 | static void | |
670 | workq_death_call_schedule(struct workqueue *wq, uint64_t deadline) | |
671 | { | |
672 | uint32_t wq_flags = os_atomic_load(&wq->wq_flags, relaxed); | |
673 | ||
674 | if (wq_flags & (WQ_EXITING | WQ_DEATH_CALL_SCHEDULED)) { | |
675 | return; | |
676 | } | |
677 | os_atomic_or(&wq->wq_flags, WQ_DEATH_CALL_SCHEDULED, relaxed); | |
678 | ||
679 | WQ_TRACE_WQ(TRACE_wq_death_call | DBG_FUNC_NONE, wq, 1, 0, 0, 0); | |
680 | ||
681 | /* | |
682 | * <rdar://problem/13139182> Due to how long term timers work, the leeway | |
683 | * can't be too short, so use 500ms which is long enough that we will not | |
684 | * wake up the CPU for killing threads, but short enough that it doesn't | |
685 | * fall into long-term timer list shenanigans. | |
686 | */ | |
687 | thread_call_enter_delayed_with_leeway(wq->wq_death_call, NULL, deadline, | |
0a7de745 A |
688 | wq_reduce_pool_window.abstime / 10, |
689 | THREAD_CALL_DELAY_LEEWAY | THREAD_CALL_DELAY_USER_BACKGROUND); | |
d9a64523 A |
690 | } |
691 | ||
692 | /* | |
693 | * `decrement` is set to the number of threads that are no longer dying: | |
694 | * - because they have been resuscitated just in time (workq_pop_idle_thread) | |
695 | * - or have been killed (workq_thread_terminate). | |
696 | */ | |
697 | static void | |
698 | workq_death_policy_evaluate(struct workqueue *wq, uint16_t decrement) | |
699 | { | |
700 | struct uthread *uth; | |
701 | ||
702 | assert(wq->wq_thdying_count >= decrement); | |
0a7de745 | 703 | if ((wq->wq_thdying_count -= decrement) > 0) { |
d9a64523 | 704 | return; |
0a7de745 | 705 | } |
d9a64523 | 706 | |
0a7de745 | 707 | if (wq->wq_thidlecount <= 1) { |
d9a64523 | 708 | return; |
0a7de745 | 709 | } |
d9a64523 | 710 | |
0a7de745 | 711 | if ((uth = workq_oldest_killable_idle_thread(wq)) == NULL) { |
d9a64523 | 712 | return; |
0a7de745 | 713 | } |
d9a64523 A |
714 | |
715 | uint64_t now = mach_absolute_time(); | |
716 | uint64_t delay = workq_kill_delay_for_idle_thread(wq); | |
717 | ||
718 | if (now - uth->uu_save.uus_workq_park_data.idle_stamp > delay) { | |
719 | WQ_TRACE_WQ(TRACE_wq_thread_terminate | DBG_FUNC_START, | |
0a7de745 | 720 | wq, wq->wq_thidlecount, 0, 0, 0); |
d9a64523 A |
721 | wq->wq_thdying_count++; |
722 | uth->uu_workq_flags |= UT_WORKQ_DYING; | |
cb323159 A |
723 | if ((uth->uu_workq_flags & UT_WORKQ_IDLE_CLEANUP) == 0) { |
724 | workq_thread_wakeup(uth); | |
725 | } | |
d9a64523 A |
726 | return; |
727 | } | |
728 | ||
729 | workq_death_call_schedule(wq, | |
0a7de745 | 730 | uth->uu_save.uus_workq_park_data.idle_stamp + delay); |
d9a64523 A |
731 | } |
732 | ||
733 | void | |
734 | workq_thread_terminate(struct proc *p, struct uthread *uth) | |
735 | { | |
736 | struct workqueue *wq = proc_get_wqptr_fast(p); | |
737 | ||
738 | workq_lock_spin(wq); | |
739 | TAILQ_REMOVE(&wq->wq_thrunlist, uth, uu_workq_entry); | |
740 | if (uth->uu_workq_flags & UT_WORKQ_DYING) { | |
741 | WQ_TRACE_WQ(TRACE_wq_thread_terminate | DBG_FUNC_END, | |
0a7de745 | 742 | wq, wq->wq_thidlecount, 0, 0, 0); |
d9a64523 A |
743 | workq_death_policy_evaluate(wq, 1); |
744 | } | |
745 | if (wq->wq_nthreads-- == wq_max_threads) { | |
746 | /* | |
747 | * We got under the thread limit again, which may have prevented | |
748 | * thread creation from happening, redrive if there are pending requests | |
749 | */ | |
750 | if (wq->wq_reqcount) { | |
751 | workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS); | |
752 | } | |
753 | } | |
754 | workq_unlock(wq); | |
755 | ||
756 | thread_deallocate(uth->uu_thread); | |
757 | } | |
758 | ||
759 | static void | |
760 | workq_kill_old_threads_call(void *param0, void *param1 __unused) | |
761 | { | |
762 | struct workqueue *wq = param0; | |
763 | ||
764 | workq_lock_spin(wq); | |
765 | WQ_TRACE_WQ(TRACE_wq_death_call | DBG_FUNC_START, wq, 0, 0, 0, 0); | |
cb323159 | 766 | os_atomic_andnot(&wq->wq_flags, WQ_DEATH_CALL_SCHEDULED, relaxed); |
d9a64523 A |
767 | workq_death_policy_evaluate(wq, 0); |
768 | WQ_TRACE_WQ(TRACE_wq_death_call | DBG_FUNC_END, wq, 0, 0, 0, 0); | |
769 | workq_unlock(wq); | |
770 | } | |
771 | ||
772 | static struct uthread * | |
cb323159 A |
773 | workq_pop_idle_thread(struct workqueue *wq, uint8_t uu_flags, |
774 | bool *needs_wakeup) | |
d9a64523 A |
775 | { |
776 | struct uthread *uth; | |
777 | ||
778 | if ((uth = TAILQ_FIRST(&wq->wq_thidlelist))) { | |
779 | TAILQ_REMOVE(&wq->wq_thidlelist, uth, uu_workq_entry); | |
780 | } else { | |
781 | uth = TAILQ_FIRST(&wq->wq_thnewlist); | |
782 | TAILQ_REMOVE(&wq->wq_thnewlist, uth, uu_workq_entry); | |
783 | } | |
784 | TAILQ_INSERT_TAIL(&wq->wq_thrunlist, uth, uu_workq_entry); | |
785 | ||
786 | assert((uth->uu_workq_flags & UT_WORKQ_RUNNING) == 0); | |
cb323159 A |
787 | uth->uu_workq_flags |= UT_WORKQ_RUNNING | uu_flags; |
788 | if ((uu_flags & UT_WORKQ_OVERCOMMIT) == 0) { | |
789 | wq->wq_constrained_threads_scheduled++; | |
790 | } | |
d9a64523 A |
791 | wq->wq_threads_scheduled++; |
792 | wq->wq_thidlecount--; | |
793 | ||
794 | if (__improbable(uth->uu_workq_flags & UT_WORKQ_DYING)) { | |
795 | uth->uu_workq_flags ^= UT_WORKQ_DYING; | |
796 | workq_death_policy_evaluate(wq, 1); | |
cb323159 A |
797 | *needs_wakeup = false; |
798 | } else if (uth->uu_workq_flags & UT_WORKQ_IDLE_CLEANUP) { | |
799 | *needs_wakeup = false; | |
800 | } else { | |
801 | *needs_wakeup = true; | |
d9a64523 A |
802 | } |
803 | return uth; | |
804 | } | |
805 | ||
806 | /* | |
807 | * Called by thread_create_workq_waiting() during thread initialization, before | |
808 | * assert_wait, before the thread has been started. | |
809 | */ | |
810 | event_t | |
811 | workq_thread_init_and_wq_lock(task_t task, thread_t th) | |
812 | { | |
813 | struct uthread *uth = get_bsdthread_info(th); | |
814 | ||
815 | uth->uu_workq_flags = UT_WORKQ_NEW; | |
816 | uth->uu_workq_pri = WORKQ_POLICY_INIT(THREAD_QOS_LEGACY); | |
817 | uth->uu_workq_thport = MACH_PORT_NULL; | |
818 | uth->uu_workq_stackaddr = 0; | |
cb323159 | 819 | uth->uu_workq_pthread_kill_allowed = 0; |
d9a64523 A |
820 | |
821 | thread_set_tag(th, THREAD_TAG_PTHREAD | THREAD_TAG_WORKQUEUE); | |
822 | thread_reset_workq_qos(th, THREAD_QOS_LEGACY); | |
823 | ||
824 | workq_lock_spin(proc_get_wqptr_fast(get_bsdtask_info(task))); | |
825 | return workq_parked_wait_event(uth); | |
826 | } | |
827 | ||
828 | /** | |
829 | * Try to add a new workqueue thread. | |
830 | * | |
831 | * - called with workq lock held | |
832 | * - dropped and retaken around thread creation | |
833 | * - return with workq lock held | |
834 | */ | |
835 | static bool | |
836 | workq_add_new_idle_thread(proc_t p, struct workqueue *wq) | |
837 | { | |
838 | mach_vm_offset_t th_stackaddr; | |
839 | kern_return_t kret; | |
840 | thread_t th; | |
841 | ||
842 | wq->wq_nthreads++; | |
843 | ||
844 | workq_unlock(wq); | |
845 | ||
846 | vm_map_t vmap = get_task_map(p->task); | |
847 | ||
848 | kret = pthread_functions->workq_create_threadstack(p, vmap, &th_stackaddr); | |
849 | if (kret != KERN_SUCCESS) { | |
850 | WQ_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, | |
0a7de745 | 851 | kret, 1, 0, 0); |
d9a64523 A |
852 | goto out; |
853 | } | |
854 | ||
855 | kret = thread_create_workq_waiting(p->task, workq_unpark_continue, &th); | |
856 | if (kret != KERN_SUCCESS) { | |
857 | WQ_TRACE_WQ(TRACE_wq_thread_create_failed | DBG_FUNC_NONE, wq, | |
0a7de745 | 858 | kret, 0, 0, 0); |
d9a64523 A |
859 | pthread_functions->workq_destroy_threadstack(p, vmap, th_stackaddr); |
860 | goto out; | |
861 | } | |
862 | ||
863 | // thread_create_workq_waiting() will return with the wq lock held | |
864 | // on success, because it calls workq_thread_init_and_wq_lock() above | |
865 | ||
866 | struct uthread *uth = get_bsdthread_info(th); | |
867 | ||
868 | wq->wq_creations++; | |
869 | wq->wq_thidlecount++; | |
f427ee49 | 870 | uth->uu_workq_stackaddr = (user_addr_t)th_stackaddr; |
d9a64523 A |
871 | TAILQ_INSERT_TAIL(&wq->wq_thnewlist, uth, uu_workq_entry); |
872 | ||
873 | WQ_TRACE_WQ(TRACE_wq_thread_create | DBG_FUNC_NONE, wq, 0, 0, 0, 0); | |
874 | return true; | |
875 | ||
876 | out: | |
877 | workq_lock_spin(wq); | |
878 | /* | |
879 | * Do not redrive here if we went under wq_max_threads again, | |
880 | * it is the responsibility of the callers of this function | |
881 | * to do so when it fails. | |
882 | */ | |
883 | wq->wq_nthreads--; | |
884 | return false; | |
885 | } | |
886 | ||
887 | #define WORKQ_UNPARK_FOR_DEATH_WAS_IDLE 0x1 | |
888 | ||
889 | __attribute__((noreturn, noinline)) | |
890 | static void | |
891 | workq_unpark_for_death_and_unlock(proc_t p, struct workqueue *wq, | |
cb323159 | 892 | struct uthread *uth, uint32_t death_flags, uint32_t setup_flags) |
d9a64523 A |
893 | { |
894 | thread_qos_t qos = workq_pri_override(uth->uu_workq_pri); | |
895 | bool first_use = uth->uu_workq_flags & UT_WORKQ_NEW; | |
896 | ||
897 | if (qos > WORKQ_THREAD_QOS_CLEANUP) { | |
cb323159 | 898 | workq_thread_reset_pri(wq, uth, NULL, /*unpark*/ true); |
d9a64523 A |
899 | qos = WORKQ_THREAD_QOS_CLEANUP; |
900 | } | |
901 | ||
902 | workq_thread_reset_cpupercent(NULL, uth); | |
903 | ||
904 | if (death_flags & WORKQ_UNPARK_FOR_DEATH_WAS_IDLE) { | |
905 | wq->wq_thidlecount--; | |
906 | if (first_use) { | |
907 | TAILQ_REMOVE(&wq->wq_thnewlist, uth, uu_workq_entry); | |
908 | } else { | |
909 | TAILQ_REMOVE(&wq->wq_thidlelist, uth, uu_workq_entry); | |
910 | } | |
911 | } | |
912 | TAILQ_INSERT_TAIL(&wq->wq_thrunlist, uth, uu_workq_entry); | |
913 | ||
914 | workq_unlock(wq); | |
915 | ||
cb323159 A |
916 | if (setup_flags & WQ_SETUP_CLEAR_VOUCHER) { |
917 | __assert_only kern_return_t kr; | |
918 | kr = thread_set_voucher_name(MACH_PORT_NULL); | |
919 | assert(kr == KERN_SUCCESS); | |
920 | } | |
921 | ||
d9a64523 | 922 | uint32_t flags = WQ_FLAG_THREAD_NEWSPI | qos | WQ_FLAG_THREAD_PRIO_QOS; |
d9a64523 A |
923 | thread_t th = uth->uu_thread; |
924 | vm_map_t vmap = get_task_map(p->task); | |
925 | ||
0a7de745 A |
926 | if (!first_use) { |
927 | flags |= WQ_FLAG_THREAD_REUSE; | |
928 | } | |
d9a64523 A |
929 | |
930 | pthread_functions->workq_setup_thread(p, th, vmap, uth->uu_workq_stackaddr, | |
cb323159 | 931 | uth->uu_workq_thport, 0, WQ_SETUP_EXIT_THREAD, flags); |
d9a64523 A |
932 | __builtin_unreachable(); |
933 | } | |
934 | ||
935 | bool | |
936 | workq_is_current_thread_updating_turnstile(struct workqueue *wq) | |
937 | { | |
938 | return wq->wq_turnstile_updater == current_thread(); | |
939 | } | |
940 | ||
941 | __attribute__((always_inline)) | |
942 | static inline void | |
943 | workq_perform_turnstile_operation_locked(struct workqueue *wq, | |
0a7de745 | 944 | void (^operation)(void)) |
d9a64523 A |
945 | { |
946 | workq_lock_held(wq); | |
947 | wq->wq_turnstile_updater = current_thread(); | |
948 | operation(); | |
949 | wq->wq_turnstile_updater = THREAD_NULL; | |
950 | } | |
951 | ||
952 | static void | |
953 | workq_turnstile_update_inheritor(struct workqueue *wq, | |
0a7de745 A |
954 | turnstile_inheritor_t inheritor, |
955 | turnstile_update_flags_t flags) | |
d9a64523 | 956 | { |
cb323159 A |
957 | if (wq->wq_inheritor == inheritor) { |
958 | return; | |
959 | } | |
960 | wq->wq_inheritor = inheritor; | |
d9a64523 A |
961 | workq_perform_turnstile_operation_locked(wq, ^{ |
962 | turnstile_update_inheritor(wq->wq_turnstile, inheritor, | |
0a7de745 | 963 | flags | TURNSTILE_IMMEDIATE_UPDATE); |
d9a64523 | 964 | turnstile_update_inheritor_complete(wq->wq_turnstile, |
0a7de745 | 965 | TURNSTILE_INTERLOCK_HELD); |
d9a64523 A |
966 | }); |
967 | } | |
968 | ||
969 | static void | |
cb323159 A |
970 | workq_push_idle_thread(proc_t p, struct workqueue *wq, struct uthread *uth, |
971 | uint32_t setup_flags) | |
d9a64523 A |
972 | { |
973 | uint64_t now = mach_absolute_time(); | |
cb323159 | 974 | bool is_creator = (uth == wq->wq_creator); |
d9a64523 | 975 | |
d9a64523 A |
976 | if ((uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) == 0) { |
977 | wq->wq_constrained_threads_scheduled--; | |
978 | } | |
cb323159 | 979 | uth->uu_workq_flags &= ~(UT_WORKQ_RUNNING | UT_WORKQ_OVERCOMMIT); |
d9a64523 A |
980 | TAILQ_REMOVE(&wq->wq_thrunlist, uth, uu_workq_entry); |
981 | wq->wq_threads_scheduled--; | |
982 | ||
cb323159 A |
983 | if (is_creator) { |
984 | wq->wq_creator = NULL; | |
d9a64523 | 985 | WQ_TRACE_WQ(TRACE_wq_creator_select, wq, 3, 0, |
0a7de745 | 986 | uth->uu_save.uus_workq_park_data.yields, 0); |
cb323159 A |
987 | } |
988 | ||
989 | if (wq->wq_inheritor == uth->uu_thread) { | |
990 | assert(wq->wq_creator == NULL); | |
d9a64523 A |
991 | if (wq->wq_reqcount) { |
992 | workq_turnstile_update_inheritor(wq, wq, TURNSTILE_INHERITOR_WORKQ); | |
993 | } else { | |
994 | workq_turnstile_update_inheritor(wq, TURNSTILE_INHERITOR_NULL, 0); | |
995 | } | |
cb323159 A |
996 | } |
997 | ||
998 | if (uth->uu_workq_flags & UT_WORKQ_NEW) { | |
999 | assert(is_creator || (_wq_flags(wq) & WQ_EXITING)); | |
1000 | TAILQ_INSERT_TAIL(&wq->wq_thnewlist, uth, uu_workq_entry); | |
1001 | wq->wq_thidlecount++; | |
1002 | return; | |
1003 | } | |
1004 | ||
1005 | if (!is_creator) { | |
d9a64523 A |
1006 | _wq_thactive_dec(wq, uth->uu_workq_pri.qos_bucket); |
1007 | wq->wq_thscheduled_count[_wq_bucket(uth->uu_workq_pri.qos_bucket)]--; | |
d9a64523 A |
1008 | uth->uu_workq_flags |= UT_WORKQ_IDLE_CLEANUP; |
1009 | } | |
1010 | ||
1011 | uth->uu_save.uus_workq_park_data.idle_stamp = now; | |
1012 | ||
1013 | struct uthread *oldest = workq_oldest_killable_idle_thread(wq); | |
1014 | uint16_t cur_idle = wq->wq_thidlecount; | |
1015 | ||
1016 | if (cur_idle >= wq_max_constrained_threads || | |
0a7de745 A |
1017 | (wq->wq_thdying_count == 0 && oldest && |
1018 | workq_should_kill_idle_thread(wq, oldest, now))) { | |
d9a64523 A |
1019 | /* |
1020 | * Immediately kill threads if we have too may of them. | |
1021 | * | |
1022 | * And swap "place" with the oldest one we'd have woken up. | |
1023 | * This is a relatively desperate situation where we really | |
1024 | * need to kill threads quickly and it's best to kill | |
1025 | * the one that's currently on core than context switching. | |
1026 | */ | |
1027 | if (oldest) { | |
1028 | oldest->uu_save.uus_workq_park_data.idle_stamp = now; | |
1029 | TAILQ_REMOVE(&wq->wq_thidlelist, oldest, uu_workq_entry); | |
1030 | TAILQ_INSERT_HEAD(&wq->wq_thidlelist, oldest, uu_workq_entry); | |
1031 | } | |
1032 | ||
1033 | WQ_TRACE_WQ(TRACE_wq_thread_terminate | DBG_FUNC_START, | |
0a7de745 | 1034 | wq, cur_idle, 0, 0, 0); |
d9a64523 A |
1035 | wq->wq_thdying_count++; |
1036 | uth->uu_workq_flags |= UT_WORKQ_DYING; | |
1037 | uth->uu_workq_flags &= ~UT_WORKQ_IDLE_CLEANUP; | |
cb323159 | 1038 | workq_unpark_for_death_and_unlock(p, wq, uth, 0, setup_flags); |
d9a64523 A |
1039 | __builtin_unreachable(); |
1040 | } | |
1041 | ||
1042 | struct uthread *tail = TAILQ_LAST(&wq->wq_thidlelist, workq_uthread_head); | |
1043 | ||
1044 | cur_idle += 1; | |
1045 | wq->wq_thidlecount = cur_idle; | |
1046 | ||
1047 | if (cur_idle >= wq_death_max_load && tail && | |
0a7de745 | 1048 | tail->uu_save.uus_workq_park_data.has_stack) { |
d9a64523 A |
1049 | uth->uu_save.uus_workq_park_data.has_stack = false; |
1050 | TAILQ_INSERT_TAIL(&wq->wq_thidlelist, uth, uu_workq_entry); | |
1051 | } else { | |
1052 | uth->uu_save.uus_workq_park_data.has_stack = true; | |
1053 | TAILQ_INSERT_HEAD(&wq->wq_thidlelist, uth, uu_workq_entry); | |
1054 | } | |
1055 | ||
1056 | if (!tail) { | |
1057 | uint64_t delay = workq_kill_delay_for_idle_thread(wq); | |
1058 | workq_death_call_schedule(wq, now + delay); | |
1059 | } | |
1060 | } | |
1061 | ||
1062 | #pragma mark thread requests | |
1063 | ||
1064 | static inline int | |
1065 | workq_priority_for_req(workq_threadreq_t req) | |
1066 | { | |
1067 | thread_qos_t qos = req->tr_qos; | |
1068 | ||
cb323159 | 1069 | if (req->tr_flags & WORKQ_TR_FLAG_WL_OUTSIDE_QOS) { |
d9a64523 A |
1070 | workq_threadreq_param_t trp = kqueue_threadreq_workloop_param(req); |
1071 | assert(trp.trp_flags & TRP_PRIORITY); | |
1072 | return trp.trp_pri; | |
1073 | } | |
1074 | return thread_workq_pri_for_qos(qos); | |
1075 | } | |
1076 | ||
f427ee49 | 1077 | static inline struct priority_queue_sched_max * |
d9a64523 A |
1078 | workq_priority_queue_for_req(struct workqueue *wq, workq_threadreq_t req) |
1079 | { | |
cb323159 | 1080 | if (req->tr_flags & WORKQ_TR_FLAG_WL_OUTSIDE_QOS) { |
d9a64523 | 1081 | return &wq->wq_special_queue; |
cb323159 | 1082 | } else if (req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) { |
d9a64523 A |
1083 | return &wq->wq_overcommit_queue; |
1084 | } else { | |
1085 | return &wq->wq_constrained_queue; | |
1086 | } | |
1087 | } | |
1088 | ||
1089 | /* | |
1090 | * returns true if the the enqueued request is the highest priority item | |
1091 | * in its priority queue. | |
1092 | */ | |
1093 | static bool | |
1094 | workq_threadreq_enqueue(struct workqueue *wq, workq_threadreq_t req) | |
1095 | { | |
cb323159 | 1096 | assert(req->tr_state == WORKQ_TR_STATE_NEW); |
d9a64523 | 1097 | |
cb323159 | 1098 | req->tr_state = WORKQ_TR_STATE_QUEUED; |
d9a64523 A |
1099 | wq->wq_reqcount += req->tr_count; |
1100 | ||
1101 | if (req->tr_qos == WORKQ_THREAD_QOS_MANAGER) { | |
1102 | assert(wq->wq_event_manager_threadreq == NULL); | |
cb323159 | 1103 | assert(req->tr_flags & WORKQ_TR_FLAG_KEVENT); |
d9a64523 A |
1104 | assert(req->tr_count == 1); |
1105 | wq->wq_event_manager_threadreq = req; | |
1106 | return true; | |
1107 | } | |
f427ee49 A |
1108 | |
1109 | struct priority_queue_sched_max *q = workq_priority_queue_for_req(wq, req); | |
1110 | priority_queue_entry_set_sched_pri(q, &req->tr_entry, | |
1111 | workq_priority_for_req(req), false); | |
1112 | ||
1113 | if (priority_queue_insert(q, &req->tr_entry)) { | |
cb323159 | 1114 | if ((req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0) { |
d9a64523 A |
1115 | _wq_thactive_refresh_best_constrained_req_qos(wq); |
1116 | } | |
1117 | return true; | |
1118 | } | |
1119 | return false; | |
1120 | } | |
1121 | ||
1122 | /* | |
1123 | * returns true if the the dequeued request was the highest priority item | |
1124 | * in its priority queue. | |
1125 | */ | |
1126 | static bool | |
1127 | workq_threadreq_dequeue(struct workqueue *wq, workq_threadreq_t req) | |
1128 | { | |
1129 | wq->wq_reqcount--; | |
1130 | ||
1131 | if (--req->tr_count == 0) { | |
1132 | if (req->tr_qos == WORKQ_THREAD_QOS_MANAGER) { | |
1133 | assert(wq->wq_event_manager_threadreq == req); | |
1134 | assert(req->tr_count == 0); | |
1135 | wq->wq_event_manager_threadreq = NULL; | |
1136 | return true; | |
1137 | } | |
1138 | if (priority_queue_remove(workq_priority_queue_for_req(wq, req), | |
f427ee49 | 1139 | &req->tr_entry)) { |
cb323159 | 1140 | if ((req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0) { |
d9a64523 A |
1141 | _wq_thactive_refresh_best_constrained_req_qos(wq); |
1142 | } | |
1143 | return true; | |
1144 | } | |
1145 | } | |
1146 | return false; | |
1147 | } | |
1148 | ||
1149 | static void | |
1150 | workq_threadreq_destroy(proc_t p, workq_threadreq_t req) | |
1151 | { | |
cb323159 A |
1152 | req->tr_state = WORKQ_TR_STATE_CANCELED; |
1153 | if (req->tr_flags & (WORKQ_TR_FLAG_WORKLOOP | WORKQ_TR_FLAG_KEVENT)) { | |
d9a64523 A |
1154 | kqueue_threadreq_cancel(p, req); |
1155 | } else { | |
1156 | zfree(workq_zone_threadreq, req); | |
1157 | } | |
1158 | } | |
1159 | ||
d9a64523 A |
1160 | #pragma mark workqueue thread creation thread calls |
1161 | ||
1162 | static inline bool | |
1163 | workq_thread_call_prepost(struct workqueue *wq, uint32_t sched, uint32_t pend, | |
0a7de745 | 1164 | uint32_t fail_mask) |
d9a64523 A |
1165 | { |
1166 | uint32_t old_flags, new_flags; | |
1167 | ||
1168 | os_atomic_rmw_loop(&wq->wq_flags, old_flags, new_flags, acquire, { | |
1169 | if (__improbable(old_flags & (WQ_EXITING | sched | pend | fail_mask))) { | |
0a7de745 | 1170 | os_atomic_rmw_loop_give_up(return false); |
d9a64523 A |
1171 | } |
1172 | if (__improbable(old_flags & WQ_PROC_SUSPENDED)) { | |
0a7de745 | 1173 | new_flags = old_flags | pend; |
d9a64523 | 1174 | } else { |
0a7de745 | 1175 | new_flags = old_flags | sched; |
d9a64523 A |
1176 | } |
1177 | }); | |
1178 | ||
1179 | return (old_flags & WQ_PROC_SUSPENDED) == 0; | |
1180 | } | |
1181 | ||
1182 | #define WORKQ_SCHEDULE_DELAYED_THREAD_CREATION_RESTART 0x1 | |
1183 | ||
1184 | static bool | |
1185 | workq_schedule_delayed_thread_creation(struct workqueue *wq, int flags) | |
1186 | { | |
1187 | assert(!preemption_enabled()); | |
1188 | ||
1189 | if (!workq_thread_call_prepost(wq, WQ_DELAYED_CALL_SCHEDULED, | |
0a7de745 A |
1190 | WQ_DELAYED_CALL_PENDED, WQ_IMMEDIATE_CALL_PENDED | |
1191 | WQ_IMMEDIATE_CALL_SCHEDULED)) { | |
d9a64523 A |
1192 | return false; |
1193 | } | |
1194 | ||
1195 | uint64_t now = mach_absolute_time(); | |
1196 | ||
1197 | if (flags & WORKQ_SCHEDULE_DELAYED_THREAD_CREATION_RESTART) { | |
1198 | /* do not change the window */ | |
1199 | } else if (now - wq->wq_thread_call_last_run <= wq->wq_timer_interval) { | |
1200 | wq->wq_timer_interval *= 2; | |
1201 | if (wq->wq_timer_interval > wq_max_timer_interval.abstime) { | |
f427ee49 | 1202 | wq->wq_timer_interval = (uint32_t)wq_max_timer_interval.abstime; |
d9a64523 A |
1203 | } |
1204 | } else if (now - wq->wq_thread_call_last_run > 2 * wq->wq_timer_interval) { | |
1205 | wq->wq_timer_interval /= 2; | |
1206 | if (wq->wq_timer_interval < wq_stalled_window.abstime) { | |
f427ee49 | 1207 | wq->wq_timer_interval = (uint32_t)wq_stalled_window.abstime; |
d9a64523 A |
1208 | } |
1209 | } | |
1210 | ||
1211 | WQ_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, | |
0a7de745 | 1212 | _wq_flags(wq), wq->wq_timer_interval, 0); |
d9a64523 A |
1213 | |
1214 | thread_call_t call = wq->wq_delayed_call; | |
1215 | uintptr_t arg = WQ_DELAYED_CALL_SCHEDULED; | |
1216 | uint64_t deadline = now + wq->wq_timer_interval; | |
1217 | if (thread_call_enter1_delayed(call, (void *)arg, deadline)) { | |
1218 | panic("delayed_call was already enqueued"); | |
1219 | } | |
1220 | return true; | |
1221 | } | |
1222 | ||
1223 | static void | |
1224 | workq_schedule_immediate_thread_creation(struct workqueue *wq) | |
1225 | { | |
1226 | assert(!preemption_enabled()); | |
1227 | ||
1228 | if (workq_thread_call_prepost(wq, WQ_IMMEDIATE_CALL_SCHEDULED, | |
0a7de745 | 1229 | WQ_IMMEDIATE_CALL_PENDED, 0)) { |
d9a64523 | 1230 | WQ_TRACE_WQ(TRACE_wq_start_add_timer, wq, wq->wq_reqcount, |
0a7de745 | 1231 | _wq_flags(wq), 0, 0); |
d9a64523 A |
1232 | |
1233 | uintptr_t arg = WQ_IMMEDIATE_CALL_SCHEDULED; | |
1234 | if (thread_call_enter1(wq->wq_immediate_call, (void *)arg)) { | |
1235 | panic("immediate_call was already enqueued"); | |
1236 | } | |
1237 | } | |
1238 | } | |
1239 | ||
1240 | void | |
1241 | workq_proc_suspended(struct proc *p) | |
1242 | { | |
1243 | struct workqueue *wq = proc_get_wqptr(p); | |
1244 | ||
0a7de745 A |
1245 | if (wq) { |
1246 | os_atomic_or(&wq->wq_flags, WQ_PROC_SUSPENDED, relaxed); | |
1247 | } | |
d9a64523 A |
1248 | } |
1249 | ||
1250 | void | |
1251 | workq_proc_resumed(struct proc *p) | |
1252 | { | |
1253 | struct workqueue *wq = proc_get_wqptr(p); | |
1254 | uint32_t wq_flags; | |
1255 | ||
0a7de745 A |
1256 | if (!wq) { |
1257 | return; | |
1258 | } | |
d9a64523 | 1259 | |
cb323159 A |
1260 | wq_flags = os_atomic_andnot_orig(&wq->wq_flags, WQ_PROC_SUSPENDED | |
1261 | WQ_DELAYED_CALL_PENDED | WQ_IMMEDIATE_CALL_PENDED, relaxed); | |
d9a64523 A |
1262 | if ((wq_flags & WQ_EXITING) == 0) { |
1263 | disable_preemption(); | |
1264 | if (wq_flags & WQ_IMMEDIATE_CALL_PENDED) { | |
1265 | workq_schedule_immediate_thread_creation(wq); | |
1266 | } else if (wq_flags & WQ_DELAYED_CALL_PENDED) { | |
1267 | workq_schedule_delayed_thread_creation(wq, | |
0a7de745 | 1268 | WORKQ_SCHEDULE_DELAYED_THREAD_CREATION_RESTART); |
d9a64523 A |
1269 | } |
1270 | enable_preemption(); | |
1271 | } | |
1272 | } | |
1273 | ||
1274 | /** | |
1275 | * returns whether lastblocked_tsp is within wq_stalled_window usecs of now | |
1276 | */ | |
1277 | static bool | |
1278 | workq_thread_is_busy(uint64_t now, _Atomic uint64_t *lastblocked_tsp) | |
1279 | { | |
cb323159 | 1280 | uint64_t lastblocked_ts = os_atomic_load_wide(lastblocked_tsp, relaxed); |
d9a64523 A |
1281 | if (now <= lastblocked_ts) { |
1282 | /* | |
1283 | * Because the update of the timestamp when a thread blocks | |
1284 | * isn't serialized against us looking at it (i.e. we don't hold | |
1285 | * the workq lock), it's possible to have a timestamp that matches | |
1286 | * the current time or that even looks to be in the future relative | |
1287 | * to when we grabbed the current time... | |
1288 | * | |
1289 | * Just treat this as a busy thread since it must have just blocked. | |
1290 | */ | |
1291 | return true; | |
1292 | } | |
1293 | return (now - lastblocked_ts) < wq_stalled_window.abstime; | |
1294 | } | |
1295 | ||
1296 | static void | |
1297 | workq_add_new_threads_call(void *_p, void *flags) | |
1298 | { | |
1299 | proc_t p = _p; | |
1300 | struct workqueue *wq = proc_get_wqptr(p); | |
1301 | uint32_t my_flag = (uint32_t)(uintptr_t)flags; | |
1302 | ||
1303 | /* | |
1304 | * workq_exit() will set the workqueue to NULL before | |
1305 | * it cancels thread calls. | |
1306 | */ | |
0a7de745 A |
1307 | if (!wq) { |
1308 | return; | |
1309 | } | |
d9a64523 A |
1310 | |
1311 | assert((my_flag == WQ_DELAYED_CALL_SCHEDULED) || | |
0a7de745 | 1312 | (my_flag == WQ_IMMEDIATE_CALL_SCHEDULED)); |
d9a64523 A |
1313 | |
1314 | WQ_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_START, wq, _wq_flags(wq), | |
0a7de745 | 1315 | wq->wq_nthreads, wq->wq_thidlecount, 0); |
d9a64523 A |
1316 | |
1317 | workq_lock_spin(wq); | |
1318 | ||
1319 | wq->wq_thread_call_last_run = mach_absolute_time(); | |
cb323159 | 1320 | os_atomic_andnot(&wq->wq_flags, my_flag, release); |
d9a64523 A |
1321 | |
1322 | /* This can drop the workqueue lock, and take it again */ | |
1323 | workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS); | |
1324 | ||
1325 | workq_unlock(wq); | |
1326 | ||
1327 | WQ_TRACE_WQ(TRACE_wq_add_timer | DBG_FUNC_END, wq, 0, | |
0a7de745 | 1328 | wq->wq_nthreads, wq->wq_thidlecount, 0); |
d9a64523 A |
1329 | } |
1330 | ||
1331 | #pragma mark thread state tracking | |
1332 | ||
1333 | static void | |
1334 | workq_sched_callback(int type, thread_t thread) | |
1335 | { | |
1336 | struct uthread *uth = get_bsdthread_info(thread); | |
1337 | proc_t proc = get_bsdtask_info(get_threadtask(thread)); | |
1338 | struct workqueue *wq = proc_get_wqptr(proc); | |
1339 | thread_qos_t req_qos, qos = uth->uu_workq_pri.qos_bucket; | |
1340 | wq_thactive_t old_thactive; | |
1341 | bool start_timer = false; | |
1342 | ||
1343 | if (qos == WORKQ_THREAD_QOS_MANAGER) { | |
1344 | return; | |
1345 | } | |
1346 | ||
1347 | switch (type) { | |
1348 | case SCHED_CALL_BLOCK: | |
1349 | old_thactive = _wq_thactive_dec(wq, qos); | |
1350 | req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive); | |
1351 | ||
1352 | /* | |
1353 | * Remember the timestamp of the last thread that blocked in this | |
1354 | * bucket, it used used by admission checks to ignore one thread | |
1355 | * being inactive if this timestamp is recent enough. | |
1356 | * | |
1357 | * If we collide with another thread trying to update the | |
1358 | * last_blocked (really unlikely since another thread would have to | |
1359 | * get scheduled and then block after we start down this path), it's | |
1360 | * not a problem. Either timestamp is adequate, so no need to retry | |
1361 | */ | |
cb323159 | 1362 | os_atomic_store_wide(&wq->wq_lastblocked_ts[_wq_bucket(qos)], |
0a7de745 | 1363 | thread_last_run_time(thread), relaxed); |
d9a64523 A |
1364 | |
1365 | if (req_qos == THREAD_QOS_UNSPECIFIED) { | |
1366 | /* | |
1367 | * No pending request at the moment we could unblock, move on. | |
1368 | */ | |
1369 | } else if (qos < req_qos) { | |
1370 | /* | |
1371 | * The blocking thread is at a lower QoS than the highest currently | |
1372 | * pending constrained request, nothing has to be redriven | |
1373 | */ | |
1374 | } else { | |
1375 | uint32_t max_busycount, old_req_count; | |
1376 | old_req_count = _wq_thactive_aggregate_downto_qos(wq, old_thactive, | |
0a7de745 | 1377 | req_qos, NULL, &max_busycount); |
d9a64523 A |
1378 | /* |
1379 | * If it is possible that may_start_constrained_thread had refused | |
1380 | * admission due to being over the max concurrency, we may need to | |
1381 | * spin up a new thread. | |
1382 | * | |
1383 | * We take into account the maximum number of busy threads | |
1384 | * that can affect may_start_constrained_thread as looking at the | |
1385 | * actual number may_start_constrained_thread will see is racy. | |
1386 | * | |
1387 | * IOW at NCPU = 4, for IN (req_qos = 1), if the old req count is | |
1388 | * between NCPU (4) and NCPU - 2 (2) we need to redrive. | |
1389 | */ | |
1390 | uint32_t conc = wq_max_parallelism[_wq_bucket(qos)]; | |
1391 | if (old_req_count <= conc && conc <= old_req_count + max_busycount) { | |
1392 | start_timer = workq_schedule_delayed_thread_creation(wq, 0); | |
1393 | } | |
1394 | } | |
1395 | if (__improbable(kdebug_enable)) { | |
1396 | __unused uint32_t old = _wq_thactive_aggregate_downto_qos(wq, | |
0a7de745 | 1397 | old_thactive, qos, NULL, NULL); |
d9a64523 | 1398 | WQ_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_START, wq, |
0a7de745 A |
1399 | old - 1, qos | (req_qos << 8), |
1400 | wq->wq_reqcount << 1 | start_timer, 0); | |
d9a64523 A |
1401 | } |
1402 | break; | |
1403 | ||
1404 | case SCHED_CALL_UNBLOCK: | |
1405 | /* | |
1406 | * we cannot take the workqueue_lock here... | |
1407 | * an UNBLOCK can occur from a timer event which | |
1408 | * is run from an interrupt context... if the workqueue_lock | |
1409 | * is already held by this processor, we'll deadlock... | |
1410 | * the thread lock for the thread being UNBLOCKED | |
1411 | * is also held | |
1412 | */ | |
1413 | old_thactive = _wq_thactive_inc(wq, qos); | |
1414 | if (__improbable(kdebug_enable)) { | |
1415 | __unused uint32_t old = _wq_thactive_aggregate_downto_qos(wq, | |
0a7de745 | 1416 | old_thactive, qos, NULL, NULL); |
d9a64523 A |
1417 | req_qos = WQ_THACTIVE_BEST_CONSTRAINED_REQ_QOS(old_thactive); |
1418 | WQ_TRACE_WQ(TRACE_wq_thread_block | DBG_FUNC_END, wq, | |
0a7de745 A |
1419 | old + 1, qos | (req_qos << 8), |
1420 | wq->wq_threads_scheduled, 0); | |
d9a64523 A |
1421 | } |
1422 | break; | |
1423 | } | |
1424 | } | |
1425 | ||
1426 | #pragma mark workq lifecycle | |
1427 | ||
1428 | void | |
1429 | workq_reference(struct workqueue *wq) | |
1430 | { | |
1431 | os_ref_retain(&wq->wq_refcnt); | |
1432 | } | |
1433 | ||
cb323159 A |
1434 | static void |
1435 | workq_deallocate_queue_invoke(mpsc_queue_chain_t e, | |
1436 | __assert_only mpsc_daemon_queue_t dq) | |
d9a64523 | 1437 | { |
cb323159 | 1438 | struct workqueue *wq; |
d9a64523 A |
1439 | struct turnstile *ts; |
1440 | ||
cb323159 A |
1441 | wq = mpsc_queue_element(e, struct workqueue, wq_destroy_link); |
1442 | assert(dq == &workq_deallocate_queue); | |
1443 | ||
1444 | turnstile_complete((uintptr_t)wq, &wq->wq_turnstile, &ts, TURNSTILE_WORKQS); | |
d9a64523 A |
1445 | assert(ts); |
1446 | turnstile_cleanup(); | |
1447 | turnstile_deallocate(ts); | |
1448 | ||
f427ee49 | 1449 | lck_spin_destroy(&wq->wq_lock, &workq_lck_grp); |
d9a64523 A |
1450 | zfree(workq_zone_workqueue, wq); |
1451 | } | |
1452 | ||
1453 | static void | |
1454 | workq_deallocate(struct workqueue *wq) | |
1455 | { | |
1456 | if (os_ref_release_relaxed(&wq->wq_refcnt) == 0) { | |
cb323159 A |
1457 | workq_deallocate_queue_invoke(&wq->wq_destroy_link, |
1458 | &workq_deallocate_queue); | |
d9a64523 A |
1459 | } |
1460 | } | |
1461 | ||
1462 | void | |
1463 | workq_deallocate_safe(struct workqueue *wq) | |
1464 | { | |
1465 | if (__improbable(os_ref_release_relaxed(&wq->wq_refcnt) == 0)) { | |
cb323159 A |
1466 | mpsc_daemon_enqueue(&workq_deallocate_queue, &wq->wq_destroy_link, |
1467 | MPSC_QUEUE_DISABLE_PREEMPTION); | |
d9a64523 A |
1468 | } |
1469 | } | |
1470 | ||
1471 | /** | |
1472 | * Setup per-process state for the workqueue. | |
1473 | */ | |
1474 | int | |
1475 | workq_open(struct proc *p, __unused struct workq_open_args *uap, | |
0a7de745 | 1476 | __unused int32_t *retval) |
d9a64523 A |
1477 | { |
1478 | struct workqueue *wq; | |
1479 | int error = 0; | |
1480 | ||
1481 | if ((p->p_lflag & P_LREGISTER) == 0) { | |
1482 | return EINVAL; | |
1483 | } | |
1484 | ||
1485 | if (wq_init_constrained_limit) { | |
f427ee49 | 1486 | uint32_t limit, num_cpus = ml_wait_max_cpus(); |
d9a64523 A |
1487 | |
1488 | /* | |
1489 | * set up the limit for the constrained pool | |
1490 | * this is a virtual pool in that we don't | |
1491 | * maintain it on a separate idle and run list | |
1492 | */ | |
1493 | limit = num_cpus * WORKQUEUE_CONSTRAINED_FACTOR; | |
1494 | ||
0a7de745 | 1495 | if (limit > wq_max_constrained_threads) { |
d9a64523 | 1496 | wq_max_constrained_threads = limit; |
0a7de745 | 1497 | } |
d9a64523 A |
1498 | |
1499 | if (wq_max_threads > WQ_THACTIVE_BUCKET_HALF) { | |
1500 | wq_max_threads = WQ_THACTIVE_BUCKET_HALF; | |
1501 | } | |
1502 | if (wq_max_threads > CONFIG_THREAD_MAX - 20) { | |
1503 | wq_max_threads = CONFIG_THREAD_MAX - 20; | |
1504 | } | |
1505 | ||
1506 | wq_death_max_load = (uint16_t)fls(num_cpus) + 1; | |
1507 | ||
1508 | for (thread_qos_t qos = WORKQ_THREAD_QOS_MIN; qos <= WORKQ_THREAD_QOS_MAX; qos++) { | |
1509 | wq_max_parallelism[_wq_bucket(qos)] = | |
0a7de745 | 1510 | qos_max_parallelism(qos, QOS_PARALLELISM_COUNT_LOGICAL); |
d9a64523 A |
1511 | } |
1512 | ||
1513 | wq_init_constrained_limit = 0; | |
1514 | } | |
1515 | ||
1516 | if (proc_get_wqptr(p) == NULL) { | |
1517 | if (proc_init_wqptr_or_wait(p) == FALSE) { | |
1518 | assert(proc_get_wqptr(p) != NULL); | |
1519 | goto out; | |
1520 | } | |
1521 | ||
1522 | wq = (struct workqueue *)zalloc(workq_zone_workqueue); | |
1523 | bzero(wq, sizeof(struct workqueue)); | |
1524 | ||
1525 | os_ref_init_count(&wq->wq_refcnt, &workq_refgrp, 1); | |
1526 | ||
1527 | // Start the event manager at the priority hinted at by the policy engine | |
1528 | thread_qos_t mgr_priority_hint = task_get_default_manager_qos(current_task()); | |
1529 | pthread_priority_t pp = _pthread_priority_make_from_thread_qos(mgr_priority_hint, 0, 0); | |
1530 | wq->wq_event_manager_priority = (uint32_t)pp; | |
f427ee49 | 1531 | wq->wq_timer_interval = (uint32_t)wq_stalled_window.abstime; |
d9a64523 A |
1532 | wq->wq_proc = p; |
1533 | turnstile_prepare((uintptr_t)wq, &wq->wq_turnstile, turnstile_alloc(), | |
0a7de745 | 1534 | TURNSTILE_WORKQS); |
d9a64523 A |
1535 | |
1536 | TAILQ_INIT(&wq->wq_thrunlist); | |
1537 | TAILQ_INIT(&wq->wq_thnewlist); | |
1538 | TAILQ_INIT(&wq->wq_thidlelist); | |
f427ee49 A |
1539 | priority_queue_init(&wq->wq_overcommit_queue); |
1540 | priority_queue_init(&wq->wq_constrained_queue); | |
1541 | priority_queue_init(&wq->wq_special_queue); | |
d9a64523 | 1542 | |
c3c9b80d A |
1543 | /* We are only using the delayed thread call for the constrained pool |
1544 | * which can't have work at >= UI QoS and so we can be fine with a | |
1545 | * UI QoS thread call. | |
1546 | */ | |
1547 | wq->wq_delayed_call = thread_call_allocate_with_qos( | |
1548 | workq_add_new_threads_call, p, THREAD_QOS_USER_INTERACTIVE, | |
0a7de745 | 1549 | THREAD_CALL_OPTIONS_ONCE); |
d9a64523 | 1550 | wq->wq_immediate_call = thread_call_allocate_with_options( |
0a7de745 A |
1551 | workq_add_new_threads_call, p, THREAD_CALL_PRIORITY_KERNEL, |
1552 | THREAD_CALL_OPTIONS_ONCE); | |
d9a64523 | 1553 | wq->wq_death_call = thread_call_allocate_with_options( |
0a7de745 A |
1554 | workq_kill_old_threads_call, wq, |
1555 | THREAD_CALL_PRIORITY_USER, THREAD_CALL_OPTIONS_ONCE); | |
d9a64523 | 1556 | |
f427ee49 | 1557 | lck_spin_init(&wq->wq_lock, &workq_lck_grp, LCK_ATTR_NULL); |
d9a64523 A |
1558 | |
1559 | WQ_TRACE_WQ(TRACE_wq_create | DBG_FUNC_NONE, wq, | |
0a7de745 | 1560 | VM_KERNEL_ADDRHIDE(wq), 0, 0, 0); |
d9a64523 A |
1561 | proc_set_wqptr(p, wq); |
1562 | } | |
1563 | out: | |
1564 | ||
1565 | return error; | |
1566 | } | |
1567 | ||
1568 | /* | |
1569 | * Routine: workq_mark_exiting | |
1570 | * | |
1571 | * Function: Mark the work queue such that new threads will not be added to the | |
1572 | * work queue after we return. | |
1573 | * | |
1574 | * Conditions: Called against the current process. | |
1575 | */ | |
1576 | void | |
1577 | workq_mark_exiting(struct proc *p) | |
1578 | { | |
1579 | struct workqueue *wq = proc_get_wqptr(p); | |
1580 | uint32_t wq_flags; | |
1581 | workq_threadreq_t mgr_req; | |
1582 | ||
0a7de745 A |
1583 | if (!wq) { |
1584 | return; | |
1585 | } | |
d9a64523 | 1586 | |
0a7de745 | 1587 | WQ_TRACE_WQ(TRACE_wq_pthread_exit | DBG_FUNC_START, wq, 0, 0, 0, 0); |
d9a64523 A |
1588 | |
1589 | workq_lock_spin(wq); | |
1590 | ||
1591 | wq_flags = os_atomic_or_orig(&wq->wq_flags, WQ_EXITING, relaxed); | |
1592 | if (__improbable(wq_flags & WQ_EXITING)) { | |
1593 | panic("workq_mark_exiting called twice"); | |
1594 | } | |
1595 | ||
1596 | /* | |
1597 | * Opportunistically try to cancel thread calls that are likely in flight. | |
1598 | * workq_exit() will do the proper cleanup. | |
1599 | */ | |
1600 | if (wq_flags & WQ_IMMEDIATE_CALL_SCHEDULED) { | |
1601 | thread_call_cancel(wq->wq_immediate_call); | |
1602 | } | |
1603 | if (wq_flags & WQ_DELAYED_CALL_SCHEDULED) { | |
1604 | thread_call_cancel(wq->wq_delayed_call); | |
1605 | } | |
1606 | if (wq_flags & WQ_DEATH_CALL_SCHEDULED) { | |
1607 | thread_call_cancel(wq->wq_death_call); | |
1608 | } | |
1609 | ||
1610 | mgr_req = wq->wq_event_manager_threadreq; | |
1611 | wq->wq_event_manager_threadreq = NULL; | |
1612 | wq->wq_reqcount = 0; /* workq_schedule_creator must not look at queues */ | |
cb323159 A |
1613 | wq->wq_creator = NULL; |
1614 | workq_turnstile_update_inheritor(wq, TURNSTILE_INHERITOR_NULL, 0); | |
d9a64523 A |
1615 | |
1616 | workq_unlock(wq); | |
1617 | ||
1618 | if (mgr_req) { | |
1619 | kqueue_threadreq_cancel(p, mgr_req); | |
1620 | } | |
1621 | /* | |
1622 | * No one touches the priority queues once WQ_EXITING is set. | |
1623 | * It is hence safe to do the tear down without holding any lock. | |
1624 | */ | |
1625 | priority_queue_destroy(&wq->wq_overcommit_queue, | |
f427ee49 | 1626 | struct workq_threadreq_s, tr_entry, ^(workq_threadreq_t e){ |
d9a64523 A |
1627 | workq_threadreq_destroy(p, e); |
1628 | }); | |
1629 | priority_queue_destroy(&wq->wq_constrained_queue, | |
f427ee49 | 1630 | struct workq_threadreq_s, tr_entry, ^(workq_threadreq_t e){ |
d9a64523 A |
1631 | workq_threadreq_destroy(p, e); |
1632 | }); | |
1633 | priority_queue_destroy(&wq->wq_special_queue, | |
f427ee49 | 1634 | struct workq_threadreq_s, tr_entry, ^(workq_threadreq_t e){ |
d9a64523 A |
1635 | workq_threadreq_destroy(p, e); |
1636 | }); | |
1637 | ||
0a7de745 | 1638 | WQ_TRACE(TRACE_wq_pthread_exit | DBG_FUNC_END, 0, 0, 0, 0, 0); |
d9a64523 A |
1639 | } |
1640 | ||
1641 | /* | |
1642 | * Routine: workq_exit | |
1643 | * | |
1644 | * Function: clean up the work queue structure(s) now that there are no threads | |
1645 | * left running inside the work queue (except possibly current_thread). | |
1646 | * | |
1647 | * Conditions: Called by the last thread in the process. | |
1648 | * Called against current process. | |
1649 | */ | |
1650 | void | |
1651 | workq_exit(struct proc *p) | |
1652 | { | |
1653 | struct workqueue *wq; | |
1654 | struct uthread *uth, *tmp; | |
1655 | ||
1656 | wq = os_atomic_xchg(&p->p_wqptr, NULL, relaxed); | |
1657 | if (wq != NULL) { | |
1658 | thread_t th = current_thread(); | |
1659 | ||
0a7de745 | 1660 | WQ_TRACE_WQ(TRACE_wq_workqueue_exit | DBG_FUNC_START, wq, 0, 0, 0, 0); |
d9a64523 A |
1661 | |
1662 | if (thread_get_tag(th) & THREAD_TAG_WORKQUEUE) { | |
1663 | /* | |
1664 | * <rdar://problem/40111515> Make sure we will no longer call the | |
1665 | * sched call, if we ever block this thread, which the cancel_wait | |
1666 | * below can do. | |
1667 | */ | |
1668 | thread_sched_call(th, NULL); | |
1669 | } | |
1670 | ||
1671 | /* | |
1672 | * Thread calls are always scheduled by the proc itself or under the | |
1673 | * workqueue spinlock if WQ_EXITING is not yet set. | |
1674 | * | |
1675 | * Either way, when this runs, the proc has no threads left beside | |
1676 | * the one running this very code, so we know no thread call can be | |
1677 | * dispatched anymore. | |
1678 | */ | |
1679 | thread_call_cancel_wait(wq->wq_delayed_call); | |
1680 | thread_call_cancel_wait(wq->wq_immediate_call); | |
1681 | thread_call_cancel_wait(wq->wq_death_call); | |
1682 | thread_call_free(wq->wq_delayed_call); | |
1683 | thread_call_free(wq->wq_immediate_call); | |
1684 | thread_call_free(wq->wq_death_call); | |
1685 | ||
1686 | /* | |
1687 | * Clean up workqueue data structures for threads that exited and | |
1688 | * didn't get a chance to clean up after themselves. | |
1689 | * | |
1690 | * idle/new threads should have been interrupted and died on their own | |
1691 | */ | |
1692 | TAILQ_FOREACH_SAFE(uth, &wq->wq_thrunlist, uu_workq_entry, tmp) { | |
1693 | thread_sched_call(uth->uu_thread, NULL); | |
1694 | thread_deallocate(uth->uu_thread); | |
1695 | } | |
1696 | assert(TAILQ_EMPTY(&wq->wq_thnewlist)); | |
1697 | assert(TAILQ_EMPTY(&wq->wq_thidlelist)); | |
1698 | ||
1699 | WQ_TRACE_WQ(TRACE_wq_destroy | DBG_FUNC_END, wq, | |
0a7de745 | 1700 | VM_KERNEL_ADDRHIDE(wq), 0, 0, 0); |
d9a64523 A |
1701 | |
1702 | workq_deallocate(wq); | |
1703 | ||
0a7de745 | 1704 | WQ_TRACE(TRACE_wq_workqueue_exit | DBG_FUNC_END, 0, 0, 0, 0, 0); |
d9a64523 A |
1705 | } |
1706 | } | |
1707 | ||
1708 | ||
1709 | #pragma mark bsd thread control | |
1710 | ||
1711 | static bool | |
1712 | _pthread_priority_to_policy(pthread_priority_t priority, | |
0a7de745 | 1713 | thread_qos_policy_data_t *data) |
d9a64523 A |
1714 | { |
1715 | data->qos_tier = _pthread_priority_thread_qos(priority); | |
1716 | data->tier_importance = _pthread_priority_relpri(priority); | |
1717 | if (data->qos_tier == THREAD_QOS_UNSPECIFIED || data->tier_importance > 0 || | |
0a7de745 | 1718 | data->tier_importance < THREAD_QOS_MIN_TIER_IMPORTANCE) { |
d9a64523 A |
1719 | return false; |
1720 | } | |
1721 | return true; | |
1722 | } | |
1723 | ||
1724 | static int | |
1725 | bsdthread_set_self(proc_t p, thread_t th, pthread_priority_t priority, | |
0a7de745 | 1726 | mach_port_name_t voucher, enum workq_set_self_flags flags) |
d9a64523 A |
1727 | { |
1728 | struct uthread *uth = get_bsdthread_info(th); | |
1729 | struct workqueue *wq = proc_get_wqptr(p); | |
1730 | ||
1731 | kern_return_t kr; | |
1732 | int unbind_rv = 0, qos_rv = 0, voucher_rv = 0, fixedpri_rv = 0; | |
1733 | bool is_wq_thread = (thread_get_tag(th) & THREAD_TAG_WORKQUEUE); | |
1734 | ||
1735 | if (flags & WORKQ_SET_SELF_WQ_KEVENT_UNBIND) { | |
1736 | if (!is_wq_thread) { | |
1737 | unbind_rv = EINVAL; | |
1738 | goto qos; | |
1739 | } | |
1740 | ||
1741 | if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER) { | |
1742 | unbind_rv = EINVAL; | |
1743 | goto qos; | |
1744 | } | |
1745 | ||
cb323159 | 1746 | workq_threadreq_t kqr = uth->uu_kqr_bound; |
d9a64523 A |
1747 | if (kqr == NULL) { |
1748 | unbind_rv = EALREADY; | |
1749 | goto qos; | |
1750 | } | |
1751 | ||
cb323159 | 1752 | if (kqr->tr_flags & WORKQ_TR_FLAG_WORKLOOP) { |
d9a64523 A |
1753 | unbind_rv = EINVAL; |
1754 | goto qos; | |
1755 | } | |
1756 | ||
cb323159 | 1757 | kqueue_threadreq_unbind(p, kqr); |
d9a64523 A |
1758 | } |
1759 | ||
1760 | qos: | |
1761 | if (flags & WORKQ_SET_SELF_QOS_FLAG) { | |
1762 | thread_qos_policy_data_t new_policy; | |
1763 | ||
1764 | if (!_pthread_priority_to_policy(priority, &new_policy)) { | |
1765 | qos_rv = EINVAL; | |
1766 | goto voucher; | |
1767 | } | |
1768 | ||
1769 | if (!is_wq_thread) { | |
1770 | /* | |
1771 | * Threads opted out of QoS can't change QoS | |
1772 | */ | |
1773 | if (!thread_has_qos_policy(th)) { | |
1774 | qos_rv = EPERM; | |
1775 | goto voucher; | |
1776 | } | |
cb323159 A |
1777 | } else if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER || |
1778 | uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_ABOVEUI) { | |
d9a64523 | 1779 | /* |
cb323159 | 1780 | * Workqueue manager threads or threads above UI can't change QoS |
d9a64523 A |
1781 | */ |
1782 | qos_rv = EINVAL; | |
1783 | goto voucher; | |
1784 | } else { | |
1785 | /* | |
1786 | * For workqueue threads, possibly adjust buckets and redrive thread | |
1787 | * requests. | |
1788 | */ | |
1789 | bool old_overcommit = uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT; | |
1790 | bool new_overcommit = priority & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG; | |
1791 | struct uu_workq_policy old_pri, new_pri; | |
1792 | bool force_run = false; | |
1793 | ||
1794 | workq_lock_spin(wq); | |
1795 | ||
1796 | if (old_overcommit != new_overcommit) { | |
1797 | uth->uu_workq_flags ^= UT_WORKQ_OVERCOMMIT; | |
1798 | if (old_overcommit) { | |
1799 | wq->wq_constrained_threads_scheduled++; | |
1800 | } else if (wq->wq_constrained_threads_scheduled-- == | |
0a7de745 | 1801 | wq_max_constrained_threads) { |
d9a64523 A |
1802 | force_run = true; |
1803 | } | |
1804 | } | |
1805 | ||
1806 | old_pri = new_pri = uth->uu_workq_pri; | |
f427ee49 | 1807 | new_pri.qos_req = (thread_qos_t)new_policy.qos_tier; |
d9a64523 A |
1808 | workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, force_run); |
1809 | workq_unlock(wq); | |
1810 | } | |
1811 | ||
1812 | kr = thread_policy_set_internal(th, THREAD_QOS_POLICY, | |
0a7de745 | 1813 | (thread_policy_t)&new_policy, THREAD_QOS_POLICY_COUNT); |
d9a64523 A |
1814 | if (kr != KERN_SUCCESS) { |
1815 | qos_rv = EINVAL; | |
1816 | } | |
1817 | } | |
1818 | ||
1819 | voucher: | |
1820 | if (flags & WORKQ_SET_SELF_VOUCHER_FLAG) { | |
1821 | kr = thread_set_voucher_name(voucher); | |
1822 | if (kr != KERN_SUCCESS) { | |
1823 | voucher_rv = ENOENT; | |
1824 | goto fixedpri; | |
1825 | } | |
1826 | } | |
1827 | ||
1828 | fixedpri: | |
0a7de745 A |
1829 | if (qos_rv) { |
1830 | goto done; | |
1831 | } | |
d9a64523 A |
1832 | if (flags & WORKQ_SET_SELF_FIXEDPRIORITY_FLAG) { |
1833 | thread_extended_policy_data_t extpol = {.timeshare = 0}; | |
1834 | ||
1835 | if (is_wq_thread) { | |
1836 | /* Not allowed on workqueue threads */ | |
1837 | fixedpri_rv = ENOTSUP; | |
1838 | goto done; | |
1839 | } | |
1840 | ||
1841 | kr = thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, | |
0a7de745 | 1842 | (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT); |
d9a64523 A |
1843 | if (kr != KERN_SUCCESS) { |
1844 | fixedpri_rv = EINVAL; | |
1845 | goto done; | |
1846 | } | |
1847 | } else if (flags & WORKQ_SET_SELF_TIMESHARE_FLAG) { | |
1848 | thread_extended_policy_data_t extpol = {.timeshare = 1}; | |
1849 | ||
1850 | if (is_wq_thread) { | |
1851 | /* Not allowed on workqueue threads */ | |
1852 | fixedpri_rv = ENOTSUP; | |
1853 | goto done; | |
1854 | } | |
1855 | ||
1856 | kr = thread_policy_set_internal(th, THREAD_EXTENDED_POLICY, | |
0a7de745 | 1857 | (thread_policy_t)&extpol, THREAD_EXTENDED_POLICY_COUNT); |
d9a64523 A |
1858 | if (kr != KERN_SUCCESS) { |
1859 | fixedpri_rv = EINVAL; | |
1860 | goto done; | |
1861 | } | |
1862 | } | |
1863 | ||
1864 | done: | |
1865 | if (qos_rv && voucher_rv) { | |
1866 | /* Both failed, give that a unique error. */ | |
1867 | return EBADMSG; | |
1868 | } | |
1869 | ||
1870 | if (unbind_rv) { | |
1871 | return unbind_rv; | |
1872 | } | |
1873 | ||
1874 | if (qos_rv) { | |
1875 | return qos_rv; | |
1876 | } | |
1877 | ||
1878 | if (voucher_rv) { | |
1879 | return voucher_rv; | |
1880 | } | |
1881 | ||
1882 | if (fixedpri_rv) { | |
1883 | return fixedpri_rv; | |
1884 | } | |
1885 | ||
c6bf4f31 | 1886 | |
d9a64523 A |
1887 | return 0; |
1888 | } | |
1889 | ||
1890 | static int | |
1891 | bsdthread_add_explicit_override(proc_t p, mach_port_name_t kport, | |
0a7de745 | 1892 | pthread_priority_t pp, user_addr_t resource) |
d9a64523 A |
1893 | { |
1894 | thread_qos_t qos = _pthread_priority_thread_qos(pp); | |
1895 | if (qos == THREAD_QOS_UNSPECIFIED) { | |
1896 | return EINVAL; | |
1897 | } | |
1898 | ||
cb323159 A |
1899 | thread_t th = port_name_to_thread(kport, |
1900 | PORT_TO_THREAD_IN_CURRENT_TASK); | |
d9a64523 A |
1901 | if (th == THREAD_NULL) { |
1902 | return ESRCH; | |
1903 | } | |
1904 | ||
1905 | int rv = proc_thread_qos_add_override(p->task, th, 0, qos, TRUE, | |
0a7de745 | 1906 | resource, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE); |
d9a64523 A |
1907 | |
1908 | thread_deallocate(th); | |
1909 | return rv; | |
1910 | } | |
1911 | ||
1912 | static int | |
1913 | bsdthread_remove_explicit_override(proc_t p, mach_port_name_t kport, | |
0a7de745 | 1914 | user_addr_t resource) |
d9a64523 | 1915 | { |
cb323159 A |
1916 | thread_t th = port_name_to_thread(kport, |
1917 | PORT_TO_THREAD_IN_CURRENT_TASK); | |
d9a64523 A |
1918 | if (th == THREAD_NULL) { |
1919 | return ESRCH; | |
1920 | } | |
1921 | ||
1922 | int rv = proc_thread_qos_remove_override(p->task, th, 0, resource, | |
0a7de745 | 1923 | THREAD_QOS_OVERRIDE_TYPE_PTHREAD_EXPLICIT_OVERRIDE); |
d9a64523 A |
1924 | |
1925 | thread_deallocate(th); | |
1926 | return rv; | |
1927 | } | |
1928 | ||
1929 | static int | |
1930 | workq_thread_add_dispatch_override(proc_t p, mach_port_name_t kport, | |
0a7de745 | 1931 | pthread_priority_t pp, user_addr_t ulock_addr) |
d9a64523 A |
1932 | { |
1933 | struct uu_workq_policy old_pri, new_pri; | |
1934 | struct workqueue *wq = proc_get_wqptr(p); | |
1935 | ||
1936 | thread_qos_t qos_override = _pthread_priority_thread_qos(pp); | |
1937 | if (qos_override == THREAD_QOS_UNSPECIFIED) { | |
1938 | return EINVAL; | |
1939 | } | |
1940 | ||
cb323159 A |
1941 | thread_t thread = port_name_to_thread(kport, |
1942 | PORT_TO_THREAD_IN_CURRENT_TASK); | |
d9a64523 A |
1943 | if (thread == THREAD_NULL) { |
1944 | return ESRCH; | |
1945 | } | |
1946 | ||
1947 | struct uthread *uth = get_bsdthread_info(thread); | |
1948 | if ((thread_get_tag(thread) & THREAD_TAG_WORKQUEUE) == 0) { | |
1949 | thread_deallocate(thread); | |
1950 | return EPERM; | |
1951 | } | |
1952 | ||
1953 | WQ_TRACE_WQ(TRACE_wq_override_dispatch | DBG_FUNC_NONE, | |
0a7de745 | 1954 | wq, thread_tid(thread), 1, pp, 0); |
d9a64523 A |
1955 | |
1956 | thread_mtx_lock(thread); | |
1957 | ||
1958 | if (ulock_addr) { | |
cb323159 | 1959 | uint32_t val; |
d9a64523 A |
1960 | int rc; |
1961 | /* | |
1962 | * Workaround lack of explicit support for 'no-fault copyin' | |
1963 | * <rdar://problem/24999882>, as disabling preemption prevents paging in | |
1964 | */ | |
1965 | disable_preemption(); | |
cb323159 | 1966 | rc = copyin_atomic32(ulock_addr, &val); |
d9a64523 | 1967 | enable_preemption(); |
cb323159 | 1968 | if (rc == 0 && ulock_owner_value_to_port_name(val) != kport) { |
d9a64523 A |
1969 | goto out; |
1970 | } | |
1971 | } | |
1972 | ||
1973 | workq_lock_spin(wq); | |
1974 | ||
1975 | old_pri = uth->uu_workq_pri; | |
1976 | if (old_pri.qos_override >= qos_override) { | |
1977 | /* Nothing to do */ | |
1978 | } else if (thread == current_thread()) { | |
1979 | new_pri = old_pri; | |
1980 | new_pri.qos_override = qos_override; | |
1981 | workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, false); | |
1982 | } else { | |
1983 | uth->uu_workq_pri.qos_override = qos_override; | |
1984 | if (qos_override > workq_pri_override(old_pri)) { | |
1985 | thread_set_workq_override(thread, qos_override); | |
1986 | } | |
1987 | } | |
1988 | ||
1989 | workq_unlock(wq); | |
1990 | ||
1991 | out: | |
1992 | thread_mtx_unlock(thread); | |
1993 | thread_deallocate(thread); | |
1994 | return 0; | |
1995 | } | |
1996 | ||
1997 | static int | |
1998 | workq_thread_reset_dispatch_override(proc_t p, thread_t thread) | |
1999 | { | |
2000 | struct uu_workq_policy old_pri, new_pri; | |
2001 | struct workqueue *wq = proc_get_wqptr(p); | |
2002 | struct uthread *uth = get_bsdthread_info(thread); | |
2003 | ||
2004 | if ((thread_get_tag(thread) & THREAD_TAG_WORKQUEUE) == 0) { | |
2005 | return EPERM; | |
2006 | } | |
2007 | ||
2008 | WQ_TRACE_WQ(TRACE_wq_override_reset | DBG_FUNC_NONE, wq, 0, 0, 0, 0); | |
2009 | ||
2010 | workq_lock_spin(wq); | |
2011 | old_pri = new_pri = uth->uu_workq_pri; | |
2012 | new_pri.qos_override = THREAD_QOS_UNSPECIFIED; | |
2013 | workq_thread_update_bucket(p, wq, uth, old_pri, new_pri, false); | |
2014 | workq_unlock(wq); | |
2015 | return 0; | |
2016 | } | |
2017 | ||
cb323159 A |
2018 | static int |
2019 | workq_thread_allow_kill(__unused proc_t p, thread_t thread, bool enable) | |
2020 | { | |
2021 | if (!(thread_get_tag(thread) & THREAD_TAG_WORKQUEUE)) { | |
2022 | // If the thread isn't a workqueue thread, don't set the | |
2023 | // kill_allowed bit; however, we still need to return 0 | |
2024 | // instead of an error code since this code is executed | |
2025 | // on the abort path which needs to not depend on the | |
2026 | // pthread_t (returning an error depends on pthread_t via | |
2027 | // cerror_nocancel) | |
2028 | return 0; | |
2029 | } | |
2030 | struct uthread *uth = get_bsdthread_info(thread); | |
2031 | uth->uu_workq_pthread_kill_allowed = enable; | |
2032 | return 0; | |
2033 | } | |
2034 | ||
d9a64523 A |
2035 | static int |
2036 | bsdthread_get_max_parallelism(thread_qos_t qos, unsigned long flags, | |
0a7de745 | 2037 | int *retval) |
d9a64523 A |
2038 | { |
2039 | static_assert(QOS_PARALLELISM_COUNT_LOGICAL == | |
0a7de745 | 2040 | _PTHREAD_QOS_PARALLELISM_COUNT_LOGICAL, "logical"); |
d9a64523 | 2041 | static_assert(QOS_PARALLELISM_REALTIME == |
0a7de745 | 2042 | _PTHREAD_QOS_PARALLELISM_REALTIME, "realtime"); |
d9a64523 A |
2043 | |
2044 | if (flags & ~(QOS_PARALLELISM_REALTIME | QOS_PARALLELISM_COUNT_LOGICAL)) { | |
2045 | return EINVAL; | |
2046 | } | |
2047 | ||
2048 | if (flags & QOS_PARALLELISM_REALTIME) { | |
2049 | if (qos) { | |
2050 | return EINVAL; | |
2051 | } | |
2052 | } else if (qos == THREAD_QOS_UNSPECIFIED || qos >= THREAD_QOS_LAST) { | |
2053 | return EINVAL; | |
2054 | } | |
2055 | ||
2056 | *retval = qos_max_parallelism(qos, flags); | |
2057 | return 0; | |
2058 | } | |
2059 | ||
2060 | #define ENSURE_UNUSED(arg) \ | |
0a7de745 | 2061 | ({ if ((arg) != 0) { return EINVAL; } }) |
d9a64523 A |
2062 | |
2063 | int | |
2064 | bsdthread_ctl(struct proc *p, struct bsdthread_ctl_args *uap, int *retval) | |
2065 | { | |
2066 | switch (uap->cmd) { | |
2067 | case BSDTHREAD_CTL_QOS_OVERRIDE_START: | |
2068 | return bsdthread_add_explicit_override(p, (mach_port_name_t)uap->arg1, | |
0a7de745 | 2069 | (pthread_priority_t)uap->arg2, uap->arg3); |
d9a64523 A |
2070 | case BSDTHREAD_CTL_QOS_OVERRIDE_END: |
2071 | ENSURE_UNUSED(uap->arg3); | |
2072 | return bsdthread_remove_explicit_override(p, (mach_port_name_t)uap->arg1, | |
0a7de745 | 2073 | (user_addr_t)uap->arg2); |
d9a64523 A |
2074 | |
2075 | case BSDTHREAD_CTL_QOS_OVERRIDE_DISPATCH: | |
2076 | return workq_thread_add_dispatch_override(p, (mach_port_name_t)uap->arg1, | |
0a7de745 | 2077 | (pthread_priority_t)uap->arg2, uap->arg3); |
d9a64523 A |
2078 | case BSDTHREAD_CTL_QOS_OVERRIDE_RESET: |
2079 | return workq_thread_reset_dispatch_override(p, current_thread()); | |
2080 | ||
2081 | case BSDTHREAD_CTL_SET_SELF: | |
2082 | return bsdthread_set_self(p, current_thread(), | |
0a7de745 A |
2083 | (pthread_priority_t)uap->arg1, (mach_port_name_t)uap->arg2, |
2084 | (enum workq_set_self_flags)uap->arg3); | |
d9a64523 A |
2085 | |
2086 | case BSDTHREAD_CTL_QOS_MAX_PARALLELISM: | |
2087 | ENSURE_UNUSED(uap->arg3); | |
2088 | return bsdthread_get_max_parallelism((thread_qos_t)uap->arg1, | |
0a7de745 | 2089 | (unsigned long)uap->arg2, retval); |
cb323159 A |
2090 | case BSDTHREAD_CTL_WORKQ_ALLOW_KILL: |
2091 | ENSURE_UNUSED(uap->arg2); | |
2092 | ENSURE_UNUSED(uap->arg3); | |
2093 | return workq_thread_allow_kill(p, current_thread(), (bool)uap->arg1); | |
d9a64523 A |
2094 | |
2095 | case BSDTHREAD_CTL_SET_QOS: | |
2096 | case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_ADD: | |
2097 | case BSDTHREAD_CTL_QOS_DISPATCH_ASYNCHRONOUS_OVERRIDE_RESET: | |
2098 | /* no longer supported */ | |
2099 | return ENOTSUP; | |
2100 | ||
2101 | default: | |
2102 | return EINVAL; | |
2103 | } | |
2104 | } | |
2105 | ||
2106 | #pragma mark workqueue thread manipulation | |
2107 | ||
cb323159 A |
2108 | static void __dead2 |
2109 | workq_unpark_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq, | |
2110 | struct uthread *uth, uint32_t setup_flags); | |
2111 | ||
d9a64523 A |
2112 | static void __dead2 |
2113 | workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq, | |
cb323159 | 2114 | struct uthread *uth, uint32_t setup_flags); |
d9a64523 A |
2115 | |
2116 | static void workq_setup_and_run(proc_t p, struct uthread *uth, int flags) __dead2; | |
2117 | ||
2118 | #if KDEBUG_LEVEL >= KDEBUG_LEVEL_STANDARD | |
2119 | static inline uint64_t | |
2120 | workq_trace_req_id(workq_threadreq_t req) | |
2121 | { | |
2122 | struct kqworkloop *kqwl; | |
cb323159 A |
2123 | if (req->tr_flags & WORKQ_TR_FLAG_WORKLOOP) { |
2124 | kqwl = __container_of(req, struct kqworkloop, kqwl_request); | |
d9a64523 A |
2125 | return kqwl->kqwl_dynamicid; |
2126 | } | |
2127 | ||
2128 | return VM_KERNEL_ADDRHIDE(req); | |
2129 | } | |
2130 | #endif | |
2131 | ||
2132 | /** | |
2133 | * Entry point for libdispatch to ask for threads | |
2134 | */ | |
2135 | static int | |
2136 | workq_reqthreads(struct proc *p, uint32_t reqcount, pthread_priority_t pp) | |
2137 | { | |
2138 | thread_qos_t qos = _pthread_priority_thread_qos(pp); | |
2139 | struct workqueue *wq = proc_get_wqptr(p); | |
2140 | uint32_t unpaced, upcall_flags = WQ_FLAG_THREAD_NEWSPI; | |
2141 | ||
2142 | if (wq == NULL || reqcount <= 0 || reqcount > UINT16_MAX || | |
0a7de745 | 2143 | qos == THREAD_QOS_UNSPECIFIED) { |
d9a64523 A |
2144 | return EINVAL; |
2145 | } | |
2146 | ||
2147 | WQ_TRACE_WQ(TRACE_wq_wqops_reqthreads | DBG_FUNC_NONE, | |
0a7de745 | 2148 | wq, reqcount, pp, 0, 0); |
d9a64523 A |
2149 | |
2150 | workq_threadreq_t req = zalloc(workq_zone_threadreq); | |
2151 | priority_queue_entry_init(&req->tr_entry); | |
cb323159 | 2152 | req->tr_state = WORKQ_TR_STATE_NEW; |
d9a64523 A |
2153 | req->tr_flags = 0; |
2154 | req->tr_qos = qos; | |
2155 | ||
2156 | if (pp & _PTHREAD_PRIORITY_OVERCOMMIT_FLAG) { | |
cb323159 | 2157 | req->tr_flags |= WORKQ_TR_FLAG_OVERCOMMIT; |
d9a64523 A |
2158 | upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT; |
2159 | } | |
2160 | ||
2161 | WQ_TRACE_WQ(TRACE_wq_thread_request_initiate | DBG_FUNC_NONE, | |
0a7de745 | 2162 | wq, workq_trace_req_id(req), req->tr_qos, reqcount, 0); |
d9a64523 A |
2163 | |
2164 | workq_lock_spin(wq); | |
2165 | do { | |
2166 | if (_wq_exiting(wq)) { | |
2167 | goto exiting; | |
2168 | } | |
2169 | ||
2170 | /* | |
2171 | * When userspace is asking for parallelism, wakeup up to (reqcount - 1) | |
2172 | * threads without pacing, to inform the scheduler of that workload. | |
2173 | * | |
2174 | * The last requests, or the ones that failed the admission checks are | |
2175 | * enqueued and go through the regular creator codepath. | |
2176 | * | |
2177 | * If there aren't enough threads, add one, but re-evaluate everything | |
2178 | * as conditions may now have changed. | |
2179 | */ | |
cb323159 | 2180 | if (reqcount > 1 && (req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0) { |
d9a64523 A |
2181 | unpaced = workq_constrained_allowance(wq, qos, NULL, false); |
2182 | if (unpaced >= reqcount - 1) { | |
2183 | unpaced = reqcount - 1; | |
2184 | } | |
2185 | } else { | |
2186 | unpaced = reqcount - 1; | |
2187 | } | |
2188 | ||
2189 | /* | |
2190 | * This path does not currently handle custom workloop parameters | |
2191 | * when creating threads for parallelism. | |
2192 | */ | |
cb323159 | 2193 | assert(!(req->tr_flags & WORKQ_TR_FLAG_WL_PARAMS)); |
d9a64523 A |
2194 | |
2195 | /* | |
2196 | * This is a trimmed down version of workq_threadreq_bind_and_unlock() | |
2197 | */ | |
2198 | while (unpaced > 0 && wq->wq_thidlecount) { | |
cb323159 A |
2199 | struct uthread *uth; |
2200 | bool needs_wakeup; | |
2201 | uint8_t uu_flags = UT_WORKQ_EARLY_BOUND; | |
2202 | ||
2203 | if (req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) { | |
2204 | uu_flags |= UT_WORKQ_OVERCOMMIT; | |
2205 | } | |
2206 | ||
2207 | uth = workq_pop_idle_thread(wq, uu_flags, &needs_wakeup); | |
d9a64523 A |
2208 | |
2209 | _wq_thactive_inc(wq, qos); | |
2210 | wq->wq_thscheduled_count[_wq_bucket(qos)]++; | |
cb323159 | 2211 | workq_thread_reset_pri(wq, uth, req, /*unpark*/ true); |
d9a64523 A |
2212 | wq->wq_fulfilled++; |
2213 | ||
d9a64523 A |
2214 | uth->uu_save.uus_workq_park_data.upcall_flags = upcall_flags; |
2215 | uth->uu_save.uus_workq_park_data.thread_request = req; | |
cb323159 A |
2216 | if (needs_wakeup) { |
2217 | workq_thread_wakeup(uth); | |
2218 | } | |
d9a64523 A |
2219 | unpaced--; |
2220 | reqcount--; | |
2221 | } | |
2222 | } while (unpaced && wq->wq_nthreads < wq_max_threads && | |
0a7de745 | 2223 | workq_add_new_idle_thread(p, wq)); |
d9a64523 A |
2224 | |
2225 | if (_wq_exiting(wq)) { | |
2226 | goto exiting; | |
2227 | } | |
2228 | ||
f427ee49 | 2229 | req->tr_count = (uint16_t)reqcount; |
d9a64523 A |
2230 | if (workq_threadreq_enqueue(wq, req)) { |
2231 | /* This can drop the workqueue lock, and take it again */ | |
2232 | workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS); | |
2233 | } | |
2234 | workq_unlock(wq); | |
2235 | return 0; | |
2236 | ||
2237 | exiting: | |
2238 | workq_unlock(wq); | |
2239 | zfree(workq_zone_threadreq, req); | |
2240 | return ECANCELED; | |
2241 | } | |
2242 | ||
2243 | bool | |
cb323159 A |
2244 | workq_kern_threadreq_initiate(struct proc *p, workq_threadreq_t req, |
2245 | struct turnstile *workloop_ts, thread_qos_t qos, | |
2246 | workq_kern_threadreq_flags_t flags) | |
d9a64523 A |
2247 | { |
2248 | struct workqueue *wq = proc_get_wqptr_fast(p); | |
d9a64523 | 2249 | struct uthread *uth = NULL; |
d9a64523 | 2250 | |
cb323159 | 2251 | assert(req->tr_flags & (WORKQ_TR_FLAG_WORKLOOP | WORKQ_TR_FLAG_KEVENT)); |
d9a64523 | 2252 | |
cb323159 | 2253 | if (req->tr_flags & WORKQ_TR_FLAG_WL_OUTSIDE_QOS) { |
d9a64523 | 2254 | workq_threadreq_param_t trp = kqueue_threadreq_workloop_param(req); |
cb323159 A |
2255 | qos = thread_workq_qos_for_pri(trp.trp_pri); |
2256 | if (qos == THREAD_QOS_UNSPECIFIED) { | |
2257 | qos = WORKQ_THREAD_QOS_ABOVEUI; | |
d9a64523 | 2258 | } |
d9a64523 A |
2259 | } |
2260 | ||
cb323159 | 2261 | assert(req->tr_state == WORKQ_TR_STATE_IDLE); |
d9a64523 A |
2262 | priority_queue_entry_init(&req->tr_entry); |
2263 | req->tr_count = 1; | |
cb323159 | 2264 | req->tr_state = WORKQ_TR_STATE_NEW; |
d9a64523 A |
2265 | req->tr_qos = qos; |
2266 | ||
2267 | WQ_TRACE_WQ(TRACE_wq_thread_request_initiate | DBG_FUNC_NONE, wq, | |
0a7de745 | 2268 | workq_trace_req_id(req), qos, 1, 0); |
d9a64523 A |
2269 | |
2270 | if (flags & WORKQ_THREADREQ_ATTEMPT_REBIND) { | |
2271 | /* | |
2272 | * we're called back synchronously from the context of | |
2273 | * kqueue_threadreq_unbind from within workq_thread_return() | |
2274 | * we can try to match up this thread with this request ! | |
2275 | */ | |
2276 | uth = current_uthread(); | |
2277 | assert(uth->uu_kqr_bound == NULL); | |
2278 | } | |
2279 | ||
2280 | workq_lock_spin(wq); | |
2281 | if (_wq_exiting(wq)) { | |
cb323159 | 2282 | req->tr_state = WORKQ_TR_STATE_IDLE; |
d9a64523 A |
2283 | workq_unlock(wq); |
2284 | return false; | |
2285 | } | |
2286 | ||
2287 | if (uth && workq_threadreq_admissible(wq, uth, req)) { | |
2288 | assert(uth != wq->wq_creator); | |
cb323159 A |
2289 | if (uth->uu_workq_pri.qos_bucket != req->tr_qos) { |
2290 | _wq_thactive_move(wq, uth->uu_workq_pri.qos_bucket, req->tr_qos); | |
2291 | workq_thread_reset_pri(wq, uth, req, /*unpark*/ false); | |
2292 | } | |
2293 | /* | |
2294 | * We're called from workq_kern_threadreq_initiate() | |
2295 | * due to an unbind, with the kq req held. | |
2296 | */ | |
2297 | WQ_TRACE_WQ(TRACE_wq_thread_logical_run | DBG_FUNC_START, wq, | |
2298 | workq_trace_req_id(req), 0, 0, 0); | |
2299 | wq->wq_fulfilled++; | |
2300 | kqueue_threadreq_bind(p, req, uth->uu_thread, 0); | |
d9a64523 A |
2301 | } else { |
2302 | if (workloop_ts) { | |
2303 | workq_perform_turnstile_operation_locked(wq, ^{ | |
2304 | turnstile_update_inheritor(workloop_ts, wq->wq_turnstile, | |
0a7de745 | 2305 | TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE); |
d9a64523 | 2306 | turnstile_update_inheritor_complete(workloop_ts, |
0a7de745 | 2307 | TURNSTILE_INTERLOCK_HELD); |
d9a64523 A |
2308 | }); |
2309 | } | |
2310 | if (workq_threadreq_enqueue(wq, req)) { | |
2311 | workq_schedule_creator(p, wq, flags); | |
2312 | } | |
d9a64523 A |
2313 | } |
2314 | ||
cb323159 A |
2315 | workq_unlock(wq); |
2316 | ||
d9a64523 A |
2317 | return true; |
2318 | } | |
2319 | ||
2320 | void | |
cb323159 A |
2321 | workq_kern_threadreq_modify(struct proc *p, workq_threadreq_t req, |
2322 | thread_qos_t qos, workq_kern_threadreq_flags_t flags) | |
d9a64523 A |
2323 | { |
2324 | struct workqueue *wq = proc_get_wqptr_fast(p); | |
cb323159 | 2325 | bool make_overcommit = false; |
d9a64523 | 2326 | |
cb323159 | 2327 | if (req->tr_flags & WORKQ_TR_FLAG_WL_OUTSIDE_QOS) { |
d9a64523 A |
2328 | /* Requests outside-of-QoS shouldn't accept modify operations */ |
2329 | return; | |
2330 | } | |
2331 | ||
2332 | workq_lock_spin(wq); | |
2333 | ||
2334 | assert(req->tr_qos != WORKQ_THREAD_QOS_MANAGER); | |
cb323159 | 2335 | assert(req->tr_flags & (WORKQ_TR_FLAG_KEVENT | WORKQ_TR_FLAG_WORKLOOP)); |
d9a64523 | 2336 | |
cb323159 A |
2337 | if (req->tr_state == WORKQ_TR_STATE_BINDING) { |
2338 | kqueue_threadreq_bind(p, req, req->tr_thread, 0); | |
d9a64523 A |
2339 | workq_unlock(wq); |
2340 | return; | |
2341 | } | |
2342 | ||
cb323159 A |
2343 | if (flags & WORKQ_THREADREQ_MAKE_OVERCOMMIT) { |
2344 | make_overcommit = (req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0; | |
2345 | } | |
d9a64523 | 2346 | |
cb323159 | 2347 | if (_wq_exiting(wq) || (req->tr_qos == qos && !make_overcommit)) { |
d9a64523 A |
2348 | workq_unlock(wq); |
2349 | return; | |
2350 | } | |
2351 | ||
2352 | assert(req->tr_count == 1); | |
cb323159 | 2353 | if (req->tr_state != WORKQ_TR_STATE_QUEUED) { |
d9a64523 A |
2354 | panic("Invalid thread request (%p) state %d", req, req->tr_state); |
2355 | } | |
2356 | ||
2357 | WQ_TRACE_WQ(TRACE_wq_thread_request_modify | DBG_FUNC_NONE, wq, | |
0a7de745 | 2358 | workq_trace_req_id(req), qos, 0, 0); |
d9a64523 | 2359 | |
f427ee49 | 2360 | struct priority_queue_sched_max *pq = workq_priority_queue_for_req(wq, req); |
d9a64523 A |
2361 | workq_threadreq_t req_max; |
2362 | ||
2363 | /* | |
2364 | * Stage 1: Dequeue the request from its priority queue. | |
2365 | * | |
2366 | * If we dequeue the root item of the constrained priority queue, | |
2367 | * maintain the best constrained request qos invariant. | |
2368 | */ | |
f427ee49 | 2369 | if (priority_queue_remove(pq, &req->tr_entry)) { |
cb323159 | 2370 | if ((req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0) { |
d9a64523 A |
2371 | _wq_thactive_refresh_best_constrained_req_qos(wq); |
2372 | } | |
2373 | } | |
2374 | ||
2375 | /* | |
2376 | * Stage 2: Apply changes to the thread request | |
2377 | * | |
2378 | * If the item will not become the root of the priority queue it belongs to, | |
2379 | * then we need to wait in line, just enqueue and return quickly. | |
2380 | */ | |
cb323159 A |
2381 | if (__improbable(make_overcommit)) { |
2382 | req->tr_flags ^= WORKQ_TR_FLAG_OVERCOMMIT; | |
d9a64523 A |
2383 | pq = workq_priority_queue_for_req(wq, req); |
2384 | } | |
2385 | req->tr_qos = qos; | |
2386 | ||
2387 | req_max = priority_queue_max(pq, struct workq_threadreq_s, tr_entry); | |
2388 | if (req_max && req_max->tr_qos >= qos) { | |
f427ee49 A |
2389 | priority_queue_entry_set_sched_pri(pq, &req->tr_entry, |
2390 | workq_priority_for_req(req), false); | |
2391 | priority_queue_insert(pq, &req->tr_entry); | |
d9a64523 A |
2392 | workq_unlock(wq); |
2393 | return; | |
2394 | } | |
2395 | ||
2396 | /* | |
2397 | * Stage 3: Reevaluate whether we should run the thread request. | |
2398 | * | |
2399 | * Pretend the thread request is new again: | |
2400 | * - adjust wq_reqcount to not count it anymore. | |
cb323159 | 2401 | * - make its state WORKQ_TR_STATE_NEW (so that workq_threadreq_bind_and_unlock |
d9a64523 A |
2402 | * properly attempts a synchronous bind) |
2403 | */ | |
2404 | wq->wq_reqcount--; | |
cb323159 | 2405 | req->tr_state = WORKQ_TR_STATE_NEW; |
d9a64523 A |
2406 | if (workq_threadreq_enqueue(wq, req)) { |
2407 | workq_schedule_creator(p, wq, flags); | |
2408 | } | |
2409 | workq_unlock(wq); | |
2410 | } | |
2411 | ||
2412 | void | |
2413 | workq_kern_threadreq_lock(struct proc *p) | |
2414 | { | |
2415 | workq_lock_spin(proc_get_wqptr_fast(p)); | |
2416 | } | |
2417 | ||
2418 | void | |
2419 | workq_kern_threadreq_unlock(struct proc *p) | |
2420 | { | |
2421 | workq_unlock(proc_get_wqptr_fast(p)); | |
2422 | } | |
2423 | ||
2424 | void | |
cb323159 | 2425 | workq_kern_threadreq_update_inheritor(struct proc *p, workq_threadreq_t req, |
0a7de745 A |
2426 | thread_t owner, struct turnstile *wl_ts, |
2427 | turnstile_update_flags_t flags) | |
d9a64523 A |
2428 | { |
2429 | struct workqueue *wq = proc_get_wqptr_fast(p); | |
d9a64523 A |
2430 | turnstile_inheritor_t inheritor; |
2431 | ||
2432 | assert(req->tr_qos != WORKQ_THREAD_QOS_MANAGER); | |
cb323159 | 2433 | assert(req->tr_flags & WORKQ_TR_FLAG_WORKLOOP); |
d9a64523 A |
2434 | workq_lock_held(wq); |
2435 | ||
cb323159 A |
2436 | if (req->tr_state == WORKQ_TR_STATE_BINDING) { |
2437 | kqueue_threadreq_bind(p, req, req->tr_thread, | |
0a7de745 | 2438 | KQUEUE_THREADERQ_BIND_NO_INHERITOR_UPDATE); |
d9a64523 A |
2439 | return; |
2440 | } | |
2441 | ||
2442 | if (_wq_exiting(wq)) { | |
2443 | inheritor = TURNSTILE_INHERITOR_NULL; | |
2444 | } else { | |
cb323159 | 2445 | if (req->tr_state != WORKQ_TR_STATE_QUEUED) { |
d9a64523 A |
2446 | panic("Invalid thread request (%p) state %d", req, req->tr_state); |
2447 | } | |
2448 | ||
2449 | if (owner) { | |
2450 | inheritor = owner; | |
2451 | flags |= TURNSTILE_INHERITOR_THREAD; | |
2452 | } else { | |
2453 | inheritor = wq->wq_turnstile; | |
2454 | flags |= TURNSTILE_INHERITOR_TURNSTILE; | |
2455 | } | |
2456 | } | |
2457 | ||
2458 | workq_perform_turnstile_operation_locked(wq, ^{ | |
2459 | turnstile_update_inheritor(wl_ts, inheritor, flags); | |
2460 | }); | |
2461 | } | |
2462 | ||
2463 | void | |
cb323159 | 2464 | workq_kern_threadreq_redrive(struct proc *p, workq_kern_threadreq_flags_t flags) |
d9a64523 A |
2465 | { |
2466 | struct workqueue *wq = proc_get_wqptr_fast(p); | |
2467 | ||
2468 | workq_lock_spin(wq); | |
2469 | workq_schedule_creator(p, wq, flags); | |
2470 | workq_unlock(wq); | |
2471 | } | |
2472 | ||
2473 | void | |
2474 | workq_schedule_creator_turnstile_redrive(struct workqueue *wq, bool locked) | |
2475 | { | |
cb323159 A |
2476 | if (locked) { |
2477 | workq_schedule_creator(NULL, wq, WORKQ_THREADREQ_NONE); | |
2478 | } else { | |
2479 | workq_schedule_immediate_thread_creation(wq); | |
0a7de745 | 2480 | } |
d9a64523 A |
2481 | } |
2482 | ||
2483 | static int | |
2484 | workq_thread_return(struct proc *p, struct workq_kernreturn_args *uap, | |
0a7de745 | 2485 | struct workqueue *wq) |
d9a64523 A |
2486 | { |
2487 | thread_t th = current_thread(); | |
2488 | struct uthread *uth = get_bsdthread_info(th); | |
cb323159 | 2489 | workq_threadreq_t kqr = uth->uu_kqr_bound; |
d9a64523 A |
2490 | workq_threadreq_param_t trp = { }; |
2491 | int nevents = uap->affinity, error; | |
2492 | user_addr_t eventlist = uap->item; | |
2493 | ||
2494 | if (((thread_get_tag(th) & THREAD_TAG_WORKQUEUE) == 0) || | |
0a7de745 | 2495 | (uth->uu_workq_flags & UT_WORKQ_DYING)) { |
d9a64523 A |
2496 | return EINVAL; |
2497 | } | |
2498 | ||
2499 | if (eventlist && nevents && kqr == NULL) { | |
2500 | return EINVAL; | |
2501 | } | |
2502 | ||
2503 | /* reset signal mask on the workqueue thread to default state */ | |
2504 | if (uth->uu_sigmask != (sigset_t)(~workq_threadmask)) { | |
2505 | proc_lock(p); | |
2506 | uth->uu_sigmask = ~workq_threadmask; | |
2507 | proc_unlock(p); | |
2508 | } | |
2509 | ||
cb323159 | 2510 | if (kqr && kqr->tr_flags & WORKQ_TR_FLAG_WL_PARAMS) { |
d9a64523 A |
2511 | /* |
2512 | * Ensure we store the threadreq param before unbinding | |
2513 | * the kqr from this thread. | |
2514 | */ | |
cb323159 | 2515 | trp = kqueue_threadreq_workloop_param(kqr); |
d9a64523 A |
2516 | } |
2517 | ||
cb323159 A |
2518 | /* |
2519 | * Freeze thee base pri while we decide the fate of this thread. | |
2520 | * | |
2521 | * Either: | |
2522 | * - we return to user and kevent_cleanup will have unfrozen the base pri, | |
2523 | * - or we proceed to workq_select_threadreq_or_park_and_unlock() who will. | |
2524 | */ | |
2525 | thread_freeze_base_pri(th); | |
2526 | ||
d9a64523 A |
2527 | if (kqr) { |
2528 | uint32_t upcall_flags = WQ_FLAG_THREAD_NEWSPI | WQ_FLAG_THREAD_REUSE; | |
cb323159 | 2529 | if (kqr->tr_flags & WORKQ_TR_FLAG_WORKLOOP) { |
d9a64523 A |
2530 | upcall_flags |= WQ_FLAG_THREAD_WORKLOOP | WQ_FLAG_THREAD_KEVENT; |
2531 | } else { | |
2532 | upcall_flags |= WQ_FLAG_THREAD_KEVENT; | |
2533 | } | |
2534 | if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER) { | |
2535 | upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER; | |
2536 | } else { | |
2537 | if (uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) { | |
2538 | upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT; | |
2539 | } | |
2540 | if (uth->uu_workq_flags & UT_WORKQ_OUTSIDE_QOS) { | |
2541 | upcall_flags |= WQ_FLAG_THREAD_OUTSIDEQOS; | |
2542 | } else { | |
2543 | upcall_flags |= uth->uu_workq_pri.qos_req | | |
0a7de745 | 2544 | WQ_FLAG_THREAD_PRIO_QOS; |
d9a64523 A |
2545 | } |
2546 | } | |
2547 | ||
2548 | error = pthread_functions->workq_handle_stack_events(p, th, | |
0a7de745 A |
2549 | get_task_map(p->task), uth->uu_workq_stackaddr, |
2550 | uth->uu_workq_thport, eventlist, nevents, upcall_flags); | |
2551 | if (error) { | |
cb323159 | 2552 | assert(uth->uu_kqr_bound == kqr); |
0a7de745 A |
2553 | return error; |
2554 | } | |
d9a64523 A |
2555 | |
2556 | // pthread is supposed to pass KEVENT_FLAG_PARKING here | |
2557 | // which should cause the above call to either: | |
2558 | // - not return | |
2559 | // - return an error | |
2560 | // - return 0 and have unbound properly | |
2561 | assert(uth->uu_kqr_bound == NULL); | |
2562 | } | |
2563 | ||
2564 | WQ_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_END, wq, uap->options, 0, 0, 0); | |
2565 | ||
2566 | thread_sched_call(th, NULL); | |
2567 | thread_will_park_or_terminate(th); | |
2568 | #if CONFIG_WORKLOOP_DEBUG | |
2569 | UU_KEVENT_HISTORY_WRITE_ENTRY(uth, { .uu_error = -1, }); | |
2570 | #endif | |
2571 | ||
2572 | workq_lock_spin(wq); | |
2573 | WQ_TRACE_WQ(TRACE_wq_thread_logical_run | DBG_FUNC_END, wq, 0, 0, 0, 0); | |
2574 | uth->uu_save.uus_workq_park_data.workloop_params = trp.trp_value; | |
cb323159 A |
2575 | workq_select_threadreq_or_park_and_unlock(p, wq, uth, |
2576 | WQ_SETUP_CLEAR_VOUCHER); | |
d9a64523 A |
2577 | __builtin_unreachable(); |
2578 | } | |
2579 | ||
2580 | /** | |
2581 | * Multiplexed call to interact with the workqueue mechanism | |
2582 | */ | |
2583 | int | |
2584 | workq_kernreturn(struct proc *p, struct workq_kernreturn_args *uap, int32_t *retval) | |
2585 | { | |
2586 | int options = uap->options; | |
2587 | int arg2 = uap->affinity; | |
2588 | int arg3 = uap->prio; | |
2589 | struct workqueue *wq = proc_get_wqptr(p); | |
2590 | int error = 0; | |
2591 | ||
2592 | if ((p->p_lflag & P_LREGISTER) == 0) { | |
2593 | return EINVAL; | |
2594 | } | |
2595 | ||
2596 | switch (options) { | |
2597 | case WQOPS_QUEUE_NEWSPISUPP: { | |
2598 | /* | |
2599 | * arg2 = offset of serialno into dispatch queue | |
2600 | * arg3 = kevent support | |
2601 | */ | |
2602 | int offset = arg2; | |
0a7de745 | 2603 | if (arg3 & 0x01) { |
d9a64523 A |
2604 | // If we get here, then userspace has indicated support for kevent delivery. |
2605 | } | |
2606 | ||
2607 | p->p_dispatchqueue_serialno_offset = (uint64_t)offset; | |
2608 | break; | |
2609 | } | |
2610 | case WQOPS_QUEUE_REQTHREADS: { | |
2611 | /* | |
2612 | * arg2 = number of threads to start | |
2613 | * arg3 = priority | |
2614 | */ | |
2615 | error = workq_reqthreads(p, arg2, arg3); | |
2616 | break; | |
2617 | } | |
2618 | case WQOPS_SET_EVENT_MANAGER_PRIORITY: { | |
2619 | /* | |
2620 | * arg2 = priority for the manager thread | |
2621 | * | |
2622 | * if _PTHREAD_PRIORITY_SCHED_PRI_FLAG is set, | |
2623 | * the low bits of the value contains a scheduling priority | |
2624 | * instead of a QOS value | |
2625 | */ | |
2626 | pthread_priority_t pri = arg2; | |
2627 | ||
2628 | if (wq == NULL) { | |
2629 | error = EINVAL; | |
2630 | break; | |
2631 | } | |
2632 | ||
2633 | /* | |
2634 | * Normalize the incoming priority so that it is ordered numerically. | |
2635 | */ | |
2636 | if (pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) { | |
2637 | pri &= (_PTHREAD_PRIORITY_SCHED_PRI_MASK | | |
0a7de745 | 2638 | _PTHREAD_PRIORITY_SCHED_PRI_FLAG); |
d9a64523 A |
2639 | } else { |
2640 | thread_qos_t qos = _pthread_priority_thread_qos(pri); | |
2641 | int relpri = _pthread_priority_relpri(pri); | |
2642 | if (relpri > 0 || relpri < THREAD_QOS_MIN_TIER_IMPORTANCE || | |
0a7de745 | 2643 | qos == THREAD_QOS_UNSPECIFIED) { |
d9a64523 A |
2644 | error = EINVAL; |
2645 | break; | |
2646 | } | |
2647 | pri &= ~_PTHREAD_PRIORITY_FLAGS_MASK; | |
2648 | } | |
2649 | ||
2650 | /* | |
2651 | * If userspace passes a scheduling priority, that wins over any QoS. | |
2652 | * Userspace should takes care not to lower the priority this way. | |
2653 | */ | |
2654 | workq_lock_spin(wq); | |
2655 | if (wq->wq_event_manager_priority < (uint32_t)pri) { | |
2656 | wq->wq_event_manager_priority = (uint32_t)pri; | |
2657 | } | |
2658 | workq_unlock(wq); | |
2659 | break; | |
2660 | } | |
2661 | case WQOPS_THREAD_KEVENT_RETURN: | |
2662 | case WQOPS_THREAD_WORKLOOP_RETURN: | |
2663 | case WQOPS_THREAD_RETURN: { | |
2664 | error = workq_thread_return(p, uap, wq); | |
2665 | break; | |
2666 | } | |
2667 | ||
2668 | case WQOPS_SHOULD_NARROW: { | |
2669 | /* | |
2670 | * arg2 = priority to test | |
2671 | * arg3 = unused | |
2672 | */ | |
2673 | thread_t th = current_thread(); | |
2674 | struct uthread *uth = get_bsdthread_info(th); | |
2675 | if (((thread_get_tag(th) & THREAD_TAG_WORKQUEUE) == 0) || | |
0a7de745 | 2676 | (uth->uu_workq_flags & (UT_WORKQ_DYING | UT_WORKQ_OVERCOMMIT))) { |
d9a64523 A |
2677 | error = EINVAL; |
2678 | break; | |
2679 | } | |
2680 | ||
2681 | thread_qos_t qos = _pthread_priority_thread_qos(arg2); | |
2682 | if (qos == THREAD_QOS_UNSPECIFIED) { | |
2683 | error = EINVAL; | |
2684 | break; | |
2685 | } | |
2686 | workq_lock_spin(wq); | |
2687 | bool should_narrow = !workq_constrained_allowance(wq, qos, uth, false); | |
2688 | workq_unlock(wq); | |
2689 | ||
2690 | *retval = should_narrow; | |
2691 | break; | |
2692 | } | |
cb323159 A |
2693 | case WQOPS_SETUP_DISPATCH: { |
2694 | /* | |
2695 | * item = pointer to workq_dispatch_config structure | |
2696 | * arg2 = sizeof(item) | |
2697 | */ | |
2698 | struct workq_dispatch_config cfg; | |
2699 | bzero(&cfg, sizeof(cfg)); | |
2700 | ||
2701 | error = copyin(uap->item, &cfg, MIN(sizeof(cfg), (unsigned long) arg2)); | |
2702 | if (error) { | |
2703 | break; | |
2704 | } | |
2705 | ||
2706 | if (cfg.wdc_flags & ~WORKQ_DISPATCH_SUPPORTED_FLAGS || | |
2707 | cfg.wdc_version < WORKQ_DISPATCH_MIN_SUPPORTED_VERSION) { | |
2708 | error = ENOTSUP; | |
2709 | break; | |
2710 | } | |
2711 | ||
2712 | /* Load fields from version 1 */ | |
2713 | p->p_dispatchqueue_serialno_offset = cfg.wdc_queue_serialno_offs; | |
2714 | ||
2715 | /* Load fields from version 2 */ | |
2716 | if (cfg.wdc_version >= 2) { | |
2717 | p->p_dispatchqueue_label_offset = cfg.wdc_queue_label_offs; | |
2718 | } | |
2719 | ||
2720 | break; | |
2721 | } | |
d9a64523 A |
2722 | default: |
2723 | error = EINVAL; | |
2724 | break; | |
2725 | } | |
2726 | ||
0a7de745 | 2727 | return error; |
d9a64523 A |
2728 | } |
2729 | ||
2730 | /* | |
2731 | * We have no work to do, park ourselves on the idle list. | |
2732 | * | |
2733 | * Consumes the workqueue lock and does not return. | |
2734 | */ | |
2735 | __attribute__((noreturn, noinline)) | |
2736 | static void | |
cb323159 A |
2737 | workq_park_and_unlock(proc_t p, struct workqueue *wq, struct uthread *uth, |
2738 | uint32_t setup_flags) | |
d9a64523 A |
2739 | { |
2740 | assert(uth == current_uthread()); | |
2741 | assert(uth->uu_kqr_bound == NULL); | |
cb323159 | 2742 | workq_push_idle_thread(p, wq, uth, setup_flags); // may not return |
d9a64523 A |
2743 | |
2744 | workq_thread_reset_cpupercent(NULL, uth); | |
2745 | ||
cb323159 A |
2746 | if ((uth->uu_workq_flags & UT_WORKQ_IDLE_CLEANUP) && |
2747 | !(uth->uu_workq_flags & UT_WORKQ_DYING)) { | |
d9a64523 A |
2748 | workq_unlock(wq); |
2749 | ||
2750 | /* | |
2751 | * workq_push_idle_thread() will unset `has_stack` | |
2752 | * if it wants us to free the stack before parking. | |
2753 | */ | |
2754 | if (!uth->uu_save.uus_workq_park_data.has_stack) { | |
2755 | pthread_functions->workq_markfree_threadstack(p, uth->uu_thread, | |
0a7de745 | 2756 | get_task_map(p->task), uth->uu_workq_stackaddr); |
d9a64523 A |
2757 | } |
2758 | ||
2759 | /* | |
2760 | * When we remove the voucher from the thread, we may lose our importance | |
2761 | * causing us to get preempted, so we do this after putting the thread on | |
2762 | * the idle list. Then, when we get our importance back we'll be able to | |
2763 | * use this thread from e.g. the kevent call out to deliver a boosting | |
2764 | * message. | |
2765 | */ | |
2766 | __assert_only kern_return_t kr; | |
2767 | kr = thread_set_voucher_name(MACH_PORT_NULL); | |
2768 | assert(kr == KERN_SUCCESS); | |
2769 | ||
2770 | workq_lock_spin(wq); | |
2771 | uth->uu_workq_flags &= ~UT_WORKQ_IDLE_CLEANUP; | |
cb323159 | 2772 | setup_flags &= ~WQ_SETUP_CLEAR_VOUCHER; |
d9a64523 A |
2773 | } |
2774 | ||
f427ee49 A |
2775 | WQ_TRACE_WQ(TRACE_wq_thread_logical_run | DBG_FUNC_END, wq, 0, 0, 0, 0); |
2776 | ||
d9a64523 A |
2777 | if (uth->uu_workq_flags & UT_WORKQ_RUNNING) { |
2778 | /* | |
2779 | * While we'd dropped the lock to unset our voucher, someone came | |
2780 | * around and made us runnable. But because we weren't waiting on the | |
2781 | * event their thread_wakeup() was ineffectual. To correct for that, | |
2782 | * we just run the continuation ourselves. | |
2783 | */ | |
cb323159 | 2784 | workq_unpark_select_threadreq_or_park_and_unlock(p, wq, uth, setup_flags); |
d9a64523 A |
2785 | __builtin_unreachable(); |
2786 | } | |
2787 | ||
2788 | if (uth->uu_workq_flags & UT_WORKQ_DYING) { | |
2789 | workq_unpark_for_death_and_unlock(p, wq, uth, | |
cb323159 | 2790 | WORKQ_UNPARK_FOR_DEATH_WAS_IDLE, setup_flags); |
d9a64523 A |
2791 | __builtin_unreachable(); |
2792 | } | |
2793 | ||
2794 | thread_set_pending_block_hint(uth->uu_thread, kThreadWaitParkedWorkQueue); | |
2795 | assert_wait(workq_parked_wait_event(uth), THREAD_INTERRUPTIBLE); | |
2796 | workq_unlock(wq); | |
d9a64523 A |
2797 | thread_block(workq_unpark_continue); |
2798 | __builtin_unreachable(); | |
2799 | } | |
2800 | ||
2801 | static inline bool | |
2802 | workq_may_start_event_mgr_thread(struct workqueue *wq, struct uthread *uth) | |
2803 | { | |
2804 | /* | |
2805 | * There's an event manager request and either: | |
2806 | * - no event manager currently running | |
2807 | * - we are re-using the event manager | |
2808 | */ | |
2809 | return wq->wq_thscheduled_count[_wq_bucket(WORKQ_THREAD_QOS_MANAGER)] == 0 || | |
0a7de745 | 2810 | (uth && uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER); |
d9a64523 A |
2811 | } |
2812 | ||
2813 | static uint32_t | |
2814 | workq_constrained_allowance(struct workqueue *wq, thread_qos_t at_qos, | |
0a7de745 | 2815 | struct uthread *uth, bool may_start_timer) |
d9a64523 A |
2816 | { |
2817 | assert(at_qos != WORKQ_THREAD_QOS_MANAGER); | |
2818 | uint32_t count = 0; | |
2819 | ||
2820 | uint32_t max_count = wq->wq_constrained_threads_scheduled; | |
2821 | if (uth && (uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) == 0) { | |
2822 | /* | |
2823 | * don't count the current thread as scheduled | |
2824 | */ | |
2825 | assert(max_count > 0); | |
2826 | max_count--; | |
2827 | } | |
2828 | if (max_count >= wq_max_constrained_threads) { | |
2829 | WQ_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 1, | |
0a7de745 A |
2830 | wq->wq_constrained_threads_scheduled, |
2831 | wq_max_constrained_threads, 0); | |
d9a64523 A |
2832 | /* |
2833 | * we need 1 or more constrained threads to return to the kernel before | |
2834 | * we can dispatch additional work | |
2835 | */ | |
2836 | return 0; | |
2837 | } | |
2838 | max_count -= wq_max_constrained_threads; | |
2839 | ||
2840 | /* | |
2841 | * Compute a metric for many how many threads are active. We find the | |
c3c9b80d A |
2842 | * highest priority request outstanding and then add up the number of active |
2843 | * threads in that and all higher-priority buckets. We'll also add any | |
2844 | * "busy" threads which are not currently active but blocked recently enough | |
2845 | * that we can't be sure that they won't be unblocked soon and start | |
2846 | * being active again. | |
2847 | * | |
2848 | * We'll then compare this metric to our max concurrency to decide whether | |
2849 | * to add a new thread. | |
d9a64523 A |
2850 | */ |
2851 | ||
2852 | uint32_t busycount, thactive_count; | |
2853 | ||
2854 | thactive_count = _wq_thactive_aggregate_downto_qos(wq, _wq_thactive(wq), | |
0a7de745 | 2855 | at_qos, &busycount, NULL); |
d9a64523 A |
2856 | |
2857 | if (uth && uth->uu_workq_pri.qos_bucket != WORKQ_THREAD_QOS_MANAGER && | |
0a7de745 | 2858 | at_qos <= uth->uu_workq_pri.qos_bucket) { |
d9a64523 A |
2859 | /* |
2860 | * Don't count this thread as currently active, but only if it's not | |
2861 | * a manager thread, as _wq_thactive_aggregate_downto_qos ignores active | |
2862 | * managers. | |
2863 | */ | |
2864 | assert(thactive_count > 0); | |
2865 | thactive_count--; | |
2866 | } | |
2867 | ||
2868 | count = wq_max_parallelism[_wq_bucket(at_qos)]; | |
2869 | if (count > thactive_count + busycount) { | |
2870 | count -= thactive_count + busycount; | |
2871 | WQ_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 2, | |
0a7de745 | 2872 | thactive_count, busycount, 0); |
d9a64523 A |
2873 | return MIN(count, max_count); |
2874 | } else { | |
2875 | WQ_TRACE_WQ(TRACE_wq_constrained_admission | DBG_FUNC_NONE, wq, 3, | |
0a7de745 | 2876 | thactive_count, busycount, 0); |
d9a64523 A |
2877 | } |
2878 | ||
c3c9b80d | 2879 | if (may_start_timer) { |
d9a64523 A |
2880 | /* |
2881 | * If this is called from the add timer, we won't have another timer | |
2882 | * fire when the thread exits the "busy" state, so rearm the timer. | |
2883 | */ | |
2884 | workq_schedule_delayed_thread_creation(wq, 0); | |
2885 | } | |
2886 | ||
2887 | return 0; | |
2888 | } | |
2889 | ||
2890 | static bool | |
2891 | workq_threadreq_admissible(struct workqueue *wq, struct uthread *uth, | |
0a7de745 | 2892 | workq_threadreq_t req) |
d9a64523 A |
2893 | { |
2894 | if (req->tr_qos == WORKQ_THREAD_QOS_MANAGER) { | |
2895 | return workq_may_start_event_mgr_thread(wq, uth); | |
2896 | } | |
cb323159 | 2897 | if ((req->tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) == 0) { |
d9a64523 A |
2898 | return workq_constrained_allowance(wq, req->tr_qos, uth, true); |
2899 | } | |
2900 | return true; | |
2901 | } | |
2902 | ||
2903 | static workq_threadreq_t | |
2904 | workq_threadreq_select_for_creator(struct workqueue *wq) | |
2905 | { | |
f427ee49 | 2906 | workq_threadreq_t req_qos, req_pri, req_tmp, req_mgr; |
d9a64523 A |
2907 | thread_qos_t qos = THREAD_QOS_UNSPECIFIED; |
2908 | uint8_t pri = 0; | |
2909 | ||
d9a64523 A |
2910 | /* |
2911 | * Compute the best priority request, and ignore the turnstile for now | |
2912 | */ | |
2913 | ||
2914 | req_pri = priority_queue_max(&wq->wq_special_queue, | |
0a7de745 | 2915 | struct workq_threadreq_s, tr_entry); |
d9a64523 | 2916 | if (req_pri) { |
f427ee49 A |
2917 | pri = (uint8_t)priority_queue_entry_sched_pri(&wq->wq_special_queue, |
2918 | &req_pri->tr_entry); | |
2919 | } | |
2920 | ||
2921 | /* | |
2922 | * Handle the manager thread request. The special queue might yield | |
2923 | * a higher priority, but the manager always beats the QoS world. | |
2924 | */ | |
2925 | ||
2926 | req_mgr = wq->wq_event_manager_threadreq; | |
2927 | if (req_mgr && workq_may_start_event_mgr_thread(wq, NULL)) { | |
2928 | uint32_t mgr_pri = wq->wq_event_manager_priority; | |
2929 | ||
2930 | if (mgr_pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) { | |
2931 | mgr_pri &= _PTHREAD_PRIORITY_SCHED_PRI_MASK; | |
2932 | } else { | |
2933 | mgr_pri = thread_workq_pri_for_qos( | |
2934 | _pthread_priority_thread_qos(mgr_pri)); | |
2935 | } | |
2936 | ||
2937 | return mgr_pri >= pri ? req_mgr : req_pri; | |
d9a64523 A |
2938 | } |
2939 | ||
2940 | /* | |
2941 | * Compute the best QoS Request, and check whether it beats the "pri" one | |
2942 | */ | |
2943 | ||
2944 | req_qos = priority_queue_max(&wq->wq_overcommit_queue, | |
0a7de745 | 2945 | struct workq_threadreq_s, tr_entry); |
d9a64523 A |
2946 | if (req_qos) { |
2947 | qos = req_qos->tr_qos; | |
2948 | } | |
2949 | ||
2950 | req_tmp = priority_queue_max(&wq->wq_constrained_queue, | |
0a7de745 | 2951 | struct workq_threadreq_s, tr_entry); |
d9a64523 A |
2952 | |
2953 | if (req_tmp && qos < req_tmp->tr_qos) { | |
2954 | if (pri && pri >= thread_workq_pri_for_qos(req_tmp->tr_qos)) { | |
2955 | return req_pri; | |
2956 | } | |
2957 | ||
2958 | if (workq_constrained_allowance(wq, req_tmp->tr_qos, NULL, true)) { | |
2959 | /* | |
2960 | * If the constrained thread request is the best one and passes | |
2961 | * the admission check, pick it. | |
2962 | */ | |
2963 | return req_tmp; | |
2964 | } | |
2965 | } | |
2966 | ||
2967 | if (pri && (!qos || pri >= thread_workq_pri_for_qos(qos))) { | |
2968 | return req_pri; | |
2969 | } | |
2970 | ||
2971 | if (req_qos) { | |
2972 | return req_qos; | |
2973 | } | |
2974 | ||
2975 | /* | |
2976 | * If we had no eligible request but we have a turnstile push, | |
2977 | * it must be a non overcommit thread request that failed | |
2978 | * the admission check. | |
2979 | * | |
2980 | * Just fake a BG thread request so that if the push stops the creator | |
2981 | * priority just drops to 4. | |
2982 | */ | |
2983 | if (turnstile_workq_proprietor_of_max_turnstile(wq->wq_turnstile, NULL)) { | |
2984 | static struct workq_threadreq_s workq_sync_push_fake_req = { | |
2985 | .tr_qos = THREAD_QOS_BACKGROUND, | |
2986 | }; | |
2987 | ||
2988 | return &workq_sync_push_fake_req; | |
2989 | } | |
2990 | ||
2991 | return NULL; | |
2992 | } | |
2993 | ||
2994 | static workq_threadreq_t | |
2995 | workq_threadreq_select(struct workqueue *wq, struct uthread *uth) | |
2996 | { | |
f427ee49 | 2997 | workq_threadreq_t req_qos, req_pri, req_tmp, req_mgr; |
d9a64523 A |
2998 | uintptr_t proprietor; |
2999 | thread_qos_t qos = THREAD_QOS_UNSPECIFIED; | |
3000 | uint8_t pri = 0; | |
3001 | ||
0a7de745 A |
3002 | if (uth == wq->wq_creator) { |
3003 | uth = NULL; | |
3004 | } | |
d9a64523 | 3005 | |
d9a64523 A |
3006 | /* |
3007 | * Compute the best priority request (special or turnstile) | |
3008 | */ | |
3009 | ||
f427ee49 | 3010 | pri = (uint8_t)turnstile_workq_proprietor_of_max_turnstile(wq->wq_turnstile, |
0a7de745 | 3011 | &proprietor); |
d9a64523 A |
3012 | if (pri) { |
3013 | struct kqworkloop *kqwl = (struct kqworkloop *)proprietor; | |
cb323159 A |
3014 | req_pri = &kqwl->kqwl_request; |
3015 | if (req_pri->tr_state != WORKQ_TR_STATE_QUEUED) { | |
d9a64523 | 3016 | panic("Invalid thread request (%p) state %d", |
0a7de745 | 3017 | req_pri, req_pri->tr_state); |
d9a64523 A |
3018 | } |
3019 | } else { | |
3020 | req_pri = NULL; | |
3021 | } | |
3022 | ||
3023 | req_tmp = priority_queue_max(&wq->wq_special_queue, | |
0a7de745 | 3024 | struct workq_threadreq_s, tr_entry); |
f427ee49 | 3025 | if (req_tmp && pri < priority_queue_entry_sched_pri(&wq->wq_special_queue, |
0a7de745 | 3026 | &req_tmp->tr_entry)) { |
d9a64523 | 3027 | req_pri = req_tmp; |
f427ee49 A |
3028 | pri = (uint8_t)priority_queue_entry_sched_pri(&wq->wq_special_queue, |
3029 | &req_tmp->tr_entry); | |
3030 | } | |
3031 | ||
3032 | /* | |
3033 | * Handle the manager thread request. The special queue might yield | |
3034 | * a higher priority, but the manager always beats the QoS world. | |
3035 | */ | |
3036 | ||
3037 | req_mgr = wq->wq_event_manager_threadreq; | |
3038 | if (req_mgr && workq_may_start_event_mgr_thread(wq, uth)) { | |
3039 | uint32_t mgr_pri = wq->wq_event_manager_priority; | |
3040 | ||
3041 | if (mgr_pri & _PTHREAD_PRIORITY_SCHED_PRI_FLAG) { | |
3042 | mgr_pri &= _PTHREAD_PRIORITY_SCHED_PRI_MASK; | |
3043 | } else { | |
3044 | mgr_pri = thread_workq_pri_for_qos( | |
3045 | _pthread_priority_thread_qos(mgr_pri)); | |
3046 | } | |
3047 | ||
3048 | return mgr_pri >= pri ? req_mgr : req_pri; | |
d9a64523 A |
3049 | } |
3050 | ||
3051 | /* | |
3052 | * Compute the best QoS Request, and check whether it beats the "pri" one | |
3053 | */ | |
3054 | ||
3055 | req_qos = priority_queue_max(&wq->wq_overcommit_queue, | |
0a7de745 | 3056 | struct workq_threadreq_s, tr_entry); |
d9a64523 A |
3057 | if (req_qos) { |
3058 | qos = req_qos->tr_qos; | |
3059 | } | |
3060 | ||
3061 | req_tmp = priority_queue_max(&wq->wq_constrained_queue, | |
0a7de745 | 3062 | struct workq_threadreq_s, tr_entry); |
d9a64523 A |
3063 | |
3064 | if (req_tmp && qos < req_tmp->tr_qos) { | |
3065 | if (pri && pri >= thread_workq_pri_for_qos(req_tmp->tr_qos)) { | |
3066 | return req_pri; | |
3067 | } | |
3068 | ||
3069 | if (workq_constrained_allowance(wq, req_tmp->tr_qos, uth, true)) { | |
3070 | /* | |
3071 | * If the constrained thread request is the best one and passes | |
3072 | * the admission check, pick it. | |
3073 | */ | |
3074 | return req_tmp; | |
3075 | } | |
3076 | } | |
3077 | ||
3078 | if (req_pri && (!qos || pri >= thread_workq_pri_for_qos(qos))) { | |
3079 | return req_pri; | |
3080 | } | |
3081 | ||
3082 | return req_qos; | |
3083 | } | |
3084 | ||
3085 | /* | |
3086 | * The creator is an anonymous thread that is counted as scheduled, | |
3087 | * but otherwise without its scheduler callback set or tracked as active | |
3088 | * that is used to make other threads. | |
3089 | * | |
3090 | * When more requests are added or an existing one is hurried along, | |
3091 | * a creator is elected and setup, or the existing one overridden accordingly. | |
3092 | * | |
3093 | * While this creator is in flight, because no request has been dequeued, | |
3094 | * already running threads have a chance at stealing thread requests avoiding | |
3095 | * useless context switches, and the creator once scheduled may not find any | |
3096 | * work to do and will then just park again. | |
3097 | * | |
3098 | * The creator serves the dual purpose of informing the scheduler of work that | |
3099 | * hasn't be materialized as threads yet, and also as a natural pacing mechanism | |
3100 | * for thread creation. | |
3101 | * | |
3102 | * By being anonymous (and not bound to anything) it means that thread requests | |
3103 | * can be stolen from this creator by threads already on core yielding more | |
3104 | * efficient scheduling and reduced context switches. | |
3105 | */ | |
3106 | static void | |
cb323159 A |
3107 | workq_schedule_creator(proc_t p, struct workqueue *wq, |
3108 | workq_kern_threadreq_flags_t flags) | |
d9a64523 A |
3109 | { |
3110 | workq_threadreq_t req; | |
3111 | struct uthread *uth; | |
cb323159 | 3112 | bool needs_wakeup; |
d9a64523 A |
3113 | |
3114 | workq_lock_held(wq); | |
3115 | assert(p || (flags & WORKQ_THREADREQ_CAN_CREATE_THREADS) == 0); | |
3116 | ||
3117 | again: | |
3118 | uth = wq->wq_creator; | |
3119 | ||
3120 | if (!wq->wq_reqcount) { | |
cb323159 A |
3121 | /* |
3122 | * There is no thread request left. | |
3123 | * | |
3124 | * If there is a creator, leave everything in place, so that it cleans | |
3125 | * up itself in workq_push_idle_thread(). | |
3126 | * | |
3127 | * Else, make sure the turnstile state is reset to no inheritor. | |
3128 | */ | |
d9a64523 A |
3129 | if (uth == NULL) { |
3130 | workq_turnstile_update_inheritor(wq, TURNSTILE_INHERITOR_NULL, 0); | |
3131 | } | |
3132 | return; | |
3133 | } | |
3134 | ||
3135 | req = workq_threadreq_select_for_creator(wq); | |
3136 | if (req == NULL) { | |
cb323159 A |
3137 | /* |
3138 | * There isn't a thread request that passes the admission check. | |
3139 | * | |
3140 | * If there is a creator, do not touch anything, the creator will sort | |
3141 | * it out when it runs. | |
3142 | * | |
3143 | * Else, set the inheritor to "WORKQ" so that the turnstile propagation | |
3144 | * code calls us if anything changes. | |
3145 | */ | |
3146 | if (uth == NULL) { | |
d9a64523 A |
3147 | workq_turnstile_update_inheritor(wq, wq, TURNSTILE_INHERITOR_WORKQ); |
3148 | } | |
3149 | return; | |
3150 | } | |
3151 | ||
3152 | if (uth) { | |
3153 | /* | |
3154 | * We need to maybe override the creator we already have | |
3155 | */ | |
3156 | if (workq_thread_needs_priority_change(req, uth)) { | |
3157 | WQ_TRACE_WQ(TRACE_wq_creator_select | DBG_FUNC_NONE, | |
0a7de745 | 3158 | wq, 1, thread_tid(uth->uu_thread), req->tr_qos, 0); |
cb323159 | 3159 | workq_thread_reset_pri(wq, uth, req, /*unpark*/ true); |
d9a64523 | 3160 | } |
cb323159 | 3161 | assert(wq->wq_inheritor == uth->uu_thread); |
d9a64523 A |
3162 | } else if (wq->wq_thidlecount) { |
3163 | /* | |
3164 | * We need to unpark a creator thread | |
3165 | */ | |
cb323159 A |
3166 | wq->wq_creator = uth = workq_pop_idle_thread(wq, UT_WORKQ_OVERCOMMIT, |
3167 | &needs_wakeup); | |
4ba76501 A |
3168 | /* Always reset the priorities on the newly chosen creator */ |
3169 | workq_thread_reset_pri(wq, uth, req, /*unpark*/ true); | |
d9a64523 | 3170 | workq_turnstile_update_inheritor(wq, uth->uu_thread, |
0a7de745 | 3171 | TURNSTILE_INHERITOR_THREAD); |
d9a64523 | 3172 | WQ_TRACE_WQ(TRACE_wq_creator_select | DBG_FUNC_NONE, |
0a7de745 | 3173 | wq, 2, thread_tid(uth->uu_thread), req->tr_qos, 0); |
d9a64523 A |
3174 | uth->uu_save.uus_workq_park_data.fulfilled_snapshot = wq->wq_fulfilled; |
3175 | uth->uu_save.uus_workq_park_data.yields = 0; | |
cb323159 A |
3176 | if (needs_wakeup) { |
3177 | workq_thread_wakeup(uth); | |
3178 | } | |
d9a64523 A |
3179 | } else { |
3180 | /* | |
3181 | * We need to allocate a thread... | |
3182 | */ | |
3183 | if (__improbable(wq->wq_nthreads >= wq_max_threads)) { | |
3184 | /* out of threads, just go away */ | |
cb323159 | 3185 | flags = WORKQ_THREADREQ_NONE; |
d9a64523 A |
3186 | } else if (flags & WORKQ_THREADREQ_SET_AST_ON_FAILURE) { |
3187 | act_set_astkevent(current_thread(), AST_KEVENT_REDRIVE_THREADREQ); | |
3188 | } else if (!(flags & WORKQ_THREADREQ_CAN_CREATE_THREADS)) { | |
3189 | /* This can drop the workqueue lock, and take it again */ | |
3190 | workq_schedule_immediate_thread_creation(wq); | |
3191 | } else if (workq_add_new_idle_thread(p, wq)) { | |
3192 | goto again; | |
3193 | } else { | |
3194 | workq_schedule_delayed_thread_creation(wq, 0); | |
3195 | } | |
3196 | ||
cb323159 A |
3197 | /* |
3198 | * If the current thread is the inheritor: | |
3199 | * | |
3200 | * If we set the AST, then the thread will stay the inheritor until | |
3201 | * either the AST calls workq_kern_threadreq_redrive(), or it parks | |
3202 | * and calls workq_push_idle_thread(). | |
3203 | * | |
3204 | * Else, the responsibility of the thread creation is with a thread-call | |
3205 | * and we need to clear the inheritor. | |
3206 | */ | |
3207 | if ((flags & WORKQ_THREADREQ_SET_AST_ON_FAILURE) == 0 && | |
3208 | wq->wq_inheritor == current_thread()) { | |
3209 | workq_turnstile_update_inheritor(wq, TURNSTILE_INHERITOR_NULL, 0); | |
3210 | } | |
3211 | } | |
3212 | } | |
3213 | ||
3214 | /** | |
3215 | * Same as workq_unpark_select_threadreq_or_park_and_unlock, | |
3216 | * but do not allow early binds. | |
3217 | * | |
3218 | * Called with the base pri frozen, will unfreeze it. | |
3219 | */ | |
3220 | __attribute__((noreturn, noinline)) | |
3221 | static void | |
3222 | workq_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq, | |
3223 | struct uthread *uth, uint32_t setup_flags) | |
3224 | { | |
3225 | workq_threadreq_t req = NULL; | |
3226 | bool is_creator = (wq->wq_creator == uth); | |
3227 | bool schedule_creator = false; | |
3228 | ||
3229 | if (__improbable(_wq_exiting(wq))) { | |
3230 | WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 0, 0, 0, 0); | |
3231 | goto park; | |
3232 | } | |
3233 | ||
3234 | if (wq->wq_reqcount == 0) { | |
3235 | WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 1, 0, 0, 0); | |
3236 | goto park; | |
3237 | } | |
3238 | ||
3239 | req = workq_threadreq_select(wq, uth); | |
3240 | if (__improbable(req == NULL)) { | |
3241 | WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 2, 0, 0, 0); | |
3242 | goto park; | |
3243 | } | |
3244 | ||
3245 | uint8_t tr_flags = req->tr_flags; | |
3246 | struct turnstile *req_ts = kqueue_threadreq_get_turnstile(req); | |
3247 | ||
3248 | /* | |
3249 | * Attempt to setup ourselves as the new thing to run, moving all priority | |
3250 | * pushes to ourselves. | |
3251 | * | |
3252 | * If the current thread is the creator, then the fact that we are presently | |
3253 | * running is proof that we'll do something useful, so keep going. | |
3254 | * | |
3255 | * For other cases, peek at the AST to know whether the scheduler wants | |
3256 | * to preempt us, if yes, park instead, and move the thread request | |
3257 | * turnstile back to the workqueue. | |
3258 | */ | |
3259 | if (req_ts) { | |
3260 | workq_perform_turnstile_operation_locked(wq, ^{ | |
3261 | turnstile_update_inheritor(req_ts, uth->uu_thread, | |
3262 | TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_THREAD); | |
3263 | turnstile_update_inheritor_complete(req_ts, | |
3264 | TURNSTILE_INTERLOCK_HELD); | |
3265 | }); | |
3266 | } | |
3267 | ||
3268 | if (is_creator) { | |
3269 | WQ_TRACE_WQ(TRACE_wq_creator_select, wq, 4, 0, | |
3270 | uth->uu_save.uus_workq_park_data.yields, 0); | |
3271 | wq->wq_creator = NULL; | |
3272 | _wq_thactive_inc(wq, req->tr_qos); | |
3273 | wq->wq_thscheduled_count[_wq_bucket(req->tr_qos)]++; | |
3274 | } else if (uth->uu_workq_pri.qos_bucket != req->tr_qos) { | |
3275 | _wq_thactive_move(wq, uth->uu_workq_pri.qos_bucket, req->tr_qos); | |
3276 | } | |
3277 | ||
3278 | workq_thread_reset_pri(wq, uth, req, /*unpark*/ true); | |
3279 | ||
3280 | if (__improbable(thread_unfreeze_base_pri(uth->uu_thread) && !is_creator)) { | |
3281 | if (req_ts) { | |
3282 | workq_perform_turnstile_operation_locked(wq, ^{ | |
3283 | turnstile_update_inheritor(req_ts, wq->wq_turnstile, | |
3284 | TURNSTILE_IMMEDIATE_UPDATE | TURNSTILE_INHERITOR_TURNSTILE); | |
3285 | turnstile_update_inheritor_complete(req_ts, | |
3286 | TURNSTILE_INTERLOCK_HELD); | |
3287 | }); | |
d9a64523 | 3288 | } |
cb323159 A |
3289 | WQ_TRACE_WQ(TRACE_wq_select_threadreq | DBG_FUNC_NONE, wq, 3, 0, 0, 0); |
3290 | goto park_thawed; | |
3291 | } | |
3292 | ||
3293 | /* | |
3294 | * We passed all checks, dequeue the request, bind to it, and set it up | |
3295 | * to return to user. | |
3296 | */ | |
3297 | WQ_TRACE_WQ(TRACE_wq_thread_logical_run | DBG_FUNC_START, wq, | |
3298 | workq_trace_req_id(req), 0, 0, 0); | |
3299 | wq->wq_fulfilled++; | |
3300 | schedule_creator = workq_threadreq_dequeue(wq, req); | |
3301 | ||
3302 | if (tr_flags & (WORKQ_TR_FLAG_KEVENT | WORKQ_TR_FLAG_WORKLOOP)) { | |
3303 | kqueue_threadreq_bind_prepost(p, req, uth); | |
3304 | req = NULL; | |
3305 | } else if (req->tr_count > 0) { | |
3306 | req = NULL; | |
3307 | } | |
3308 | ||
3309 | workq_thread_reset_cpupercent(req, uth); | |
3310 | if (uth->uu_workq_flags & UT_WORKQ_NEW) { | |
3311 | uth->uu_workq_flags ^= UT_WORKQ_NEW; | |
3312 | setup_flags |= WQ_SETUP_FIRST_USE; | |
3313 | } | |
3314 | if (tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) { | |
3315 | if ((uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) == 0) { | |
3316 | uth->uu_workq_flags |= UT_WORKQ_OVERCOMMIT; | |
3317 | wq->wq_constrained_threads_scheduled--; | |
3318 | } | |
3319 | } else { | |
3320 | if ((uth->uu_workq_flags & UT_WORKQ_OVERCOMMIT) != 0) { | |
3321 | uth->uu_workq_flags &= ~UT_WORKQ_OVERCOMMIT; | |
3322 | wq->wq_constrained_threads_scheduled++; | |
3323 | } | |
3324 | } | |
3325 | ||
3326 | if (is_creator || schedule_creator) { | |
3327 | /* This can drop the workqueue lock, and take it again */ | |
3328 | workq_schedule_creator(p, wq, WORKQ_THREADREQ_CAN_CREATE_THREADS); | |
3329 | } | |
3330 | ||
3331 | workq_unlock(wq); | |
3332 | ||
3333 | if (req) { | |
3334 | zfree(workq_zone_threadreq, req); | |
3335 | } | |
3336 | ||
3337 | /* | |
3338 | * Run Thread, Run! | |
3339 | */ | |
3340 | uint32_t upcall_flags = WQ_FLAG_THREAD_NEWSPI; | |
3341 | if (uth->uu_workq_pri.qos_bucket == WORKQ_THREAD_QOS_MANAGER) { | |
3342 | upcall_flags |= WQ_FLAG_THREAD_EVENT_MANAGER; | |
3343 | } else if (tr_flags & WORKQ_TR_FLAG_OVERCOMMIT) { | |
3344 | upcall_flags |= WQ_FLAG_THREAD_OVERCOMMIT; | |
d9a64523 | 3345 | } |
cb323159 A |
3346 | if (tr_flags & WORKQ_TR_FLAG_KEVENT) { |
3347 | upcall_flags |= WQ_FLAG_THREAD_KEVENT; | |
3348 | } | |
3349 | if (tr_flags & WORKQ_TR_FLAG_WORKLOOP) { | |
3350 | upcall_flags |= WQ_FLAG_THREAD_WORKLOOP | WQ_FLAG_THREAD_KEVENT; | |
3351 | } | |
3352 | uth->uu_save.uus_workq_park_data.upcall_flags = upcall_flags; | |
3353 | ||
3354 | if (tr_flags & (WORKQ_TR_FLAG_KEVENT | WORKQ_TR_FLAG_WORKLOOP)) { | |
3355 | kqueue_threadreq_bind_commit(p, uth->uu_thread); | |
3356 | } | |
3357 | workq_setup_and_run(p, uth, setup_flags); | |
3358 | __builtin_unreachable(); | |
3359 | ||
3360 | park: | |
3361 | thread_unfreeze_base_pri(uth->uu_thread); | |
3362 | park_thawed: | |
3363 | workq_park_and_unlock(p, wq, uth, setup_flags); | |
d9a64523 A |
3364 | } |
3365 | ||
3366 | /** | |
3367 | * Runs a thread request on a thread | |
3368 | * | |
3369 | * - if thread is THREAD_NULL, will find a thread and run the request there. | |
3370 | * Otherwise, the thread must be the current thread. | |
3371 | * | |
3372 | * - if req is NULL, will find the highest priority request and run that. If | |
3373 | * it is not NULL, it must be a threadreq object in state NEW. If it can not | |
3374 | * be run immediately, it will be enqueued and moved to state QUEUED. | |
3375 | * | |
3376 | * Either way, the thread request object serviced will be moved to state | |
3377 | * BINDING and attached to the uthread. | |
3378 | * | |
cb323159 A |
3379 | * Should be called with the workqueue lock held. Will drop it. |
3380 | * Should be called with the base pri not frozen. | |
d9a64523 A |
3381 | */ |
3382 | __attribute__((noreturn, noinline)) | |
3383 | static void | |
cb323159 A |
3384 | workq_unpark_select_threadreq_or_park_and_unlock(proc_t p, struct workqueue *wq, |
3385 | struct uthread *uth, uint32_t setup_flags) | |
d9a64523 | 3386 | { |
d9a64523 A |
3387 | if (uth->uu_workq_flags & UT_WORKQ_EARLY_BOUND) { |
3388 | if (uth->uu_workq_flags & UT_WORKQ_NEW) { | |
3389 | setup_flags |= WQ_SETUP_FIRST_USE; | |
3390 | } | |
3391 | uth->uu_workq_flags &= ~(UT_WORKQ_NEW | UT_WORKQ_EARLY_BOUND); | |
3392 | /* | |
3393 | * This pointer is possibly freed and only used for tracing purposes. | |
3394 | */ | |
cb323159 | 3395 | workq_threadreq_t req = uth->uu_save.uus_workq_park_data.thread_request; |
d9a64523 A |
3396 | workq_unlock(wq); |
3397 | WQ_TRACE_WQ(TRACE_wq_thread_logical_run | DBG_FUNC_START, wq, | |
0a7de745 | 3398 | VM_KERNEL_ADDRHIDE(req), 0, 0, 0); |
cb323159 | 3399 | (void)req; |
d9a64523 A |
3400 | workq_setup_and_run(p, uth, setup_flags); |
3401 | __builtin_unreachable(); | |
3402 | } | |
3403 | ||
cb323159 A |
3404 | thread_freeze_base_pri(uth->uu_thread); |
3405 | workq_select_threadreq_or_park_and_unlock(p, wq, uth, setup_flags); | |
d9a64523 A |
3406 | } |
3407 | ||
3408 | static bool | |
3409 | workq_creator_should_yield(struct workqueue *wq, struct uthread *uth) | |
3410 | { | |
3411 | thread_qos_t qos = workq_pri_override(uth->uu_workq_pri); | |
3412 | ||
3413 | if (qos >= THREAD_QOS_USER_INTERACTIVE) { | |
3414 | return false; | |
3415 | } | |
3416 | ||
3417 | uint32_t snapshot = uth->uu_save.uus_workq_park_data.fulfilled_snapshot; | |
3418 | if (wq->wq_fulfilled == snapshot) { | |
3419 | return false; | |
3420 | } | |
3421 | ||
3422 | uint32_t cnt = 0, conc = wq_max_parallelism[_wq_bucket(qos)]; | |
3423 | if (wq->wq_fulfilled - snapshot > conc) { | |
3424 | /* we fulfilled more than NCPU requests since being dispatched */ | |
3425 | WQ_TRACE_WQ(TRACE_wq_creator_yield, wq, 1, | |
0a7de745 | 3426 | wq->wq_fulfilled, snapshot, 0); |
d9a64523 A |
3427 | return true; |
3428 | } | |
3429 | ||
3430 | for (int i = _wq_bucket(qos); i < WORKQ_NUM_QOS_BUCKETS; i++) { | |
3431 | cnt += wq->wq_thscheduled_count[i]; | |
3432 | } | |
3433 | if (conc <= cnt) { | |
3434 | /* We fulfilled requests and have more than NCPU scheduled threads */ | |
3435 | WQ_TRACE_WQ(TRACE_wq_creator_yield, wq, 2, | |
0a7de745 | 3436 | wq->wq_fulfilled, snapshot, 0); |
d9a64523 A |
3437 | return true; |
3438 | } | |
3439 | ||
3440 | return false; | |
3441 | } | |
3442 | ||
3443 | /** | |
3444 | * parked thread wakes up | |
3445 | */ | |
3446 | __attribute__((noreturn, noinline)) | |
3447 | static void | |
3448 | workq_unpark_continue(void *parameter __unused, wait_result_t wr __unused) | |
3449 | { | |
cb323159 A |
3450 | thread_t th = current_thread(); |
3451 | struct uthread *uth = get_bsdthread_info(th); | |
d9a64523 A |
3452 | proc_t p = current_proc(); |
3453 | struct workqueue *wq = proc_get_wqptr_fast(p); | |
3454 | ||
3455 | workq_lock_spin(wq); | |
3456 | ||
3457 | if (wq->wq_creator == uth && workq_creator_should_yield(wq, uth)) { | |
3458 | /* | |
3459 | * If the number of threads we have out are able to keep up with the | |
3460 | * demand, then we should avoid sending this creator thread to | |
3461 | * userspace. | |
3462 | */ | |
3463 | uth->uu_save.uus_workq_park_data.fulfilled_snapshot = wq->wq_fulfilled; | |
3464 | uth->uu_save.uus_workq_park_data.yields++; | |
3465 | workq_unlock(wq); | |
3466 | thread_yield_with_continuation(workq_unpark_continue, NULL); | |
3467 | __builtin_unreachable(); | |
3468 | } | |
3469 | ||
3470 | if (__probable(uth->uu_workq_flags & UT_WORKQ_RUNNING)) { | |
cb323159 | 3471 | workq_unpark_select_threadreq_or_park_and_unlock(p, wq, uth, WQ_SETUP_NONE); |
d9a64523 A |
3472 | __builtin_unreachable(); |
3473 | } | |
3474 | ||
3475 | if (__probable(wr == THREAD_AWAKENED)) { | |
3476 | /* | |
3477 | * We were set running, but for the purposes of dying. | |
3478 | */ | |
3479 | assert(uth->uu_workq_flags & UT_WORKQ_DYING); | |
3480 | assert((uth->uu_workq_flags & UT_WORKQ_NEW) == 0); | |
3481 | } else { | |
3482 | /* | |
3483 | * workaround for <rdar://problem/38647347>, | |
3484 | * in case we do hit userspace, make sure calling | |
3485 | * workq_thread_terminate() does the right thing here, | |
3486 | * and if we never call it, that workq_exit() will too because it sees | |
3487 | * this thread on the runlist. | |
3488 | */ | |
3489 | assert(wr == THREAD_INTERRUPTED); | |
3490 | wq->wq_thdying_count++; | |
3491 | uth->uu_workq_flags |= UT_WORKQ_DYING; | |
3492 | } | |
3493 | ||
3494 | workq_unpark_for_death_and_unlock(p, wq, uth, | |
cb323159 | 3495 | WORKQ_UNPARK_FOR_DEATH_WAS_IDLE, WQ_SETUP_NONE); |
d9a64523 A |
3496 | __builtin_unreachable(); |
3497 | } | |
3498 | ||
3499 | __attribute__((noreturn, noinline)) | |
3500 | static void | |
3501 | workq_setup_and_run(proc_t p, struct uthread *uth, int setup_flags) | |
3502 | { | |
3503 | thread_t th = uth->uu_thread; | |
3504 | vm_map_t vmap = get_task_map(p->task); | |
3505 | ||
3506 | if (setup_flags & WQ_SETUP_CLEAR_VOUCHER) { | |
3507 | /* | |
3508 | * For preemption reasons, we want to reset the voucher as late as | |
3509 | * possible, so we do it in two places: | |
3510 | * - Just before parking (i.e. in workq_park_and_unlock()) | |
3511 | * - Prior to doing the setup for the next workitem (i.e. here) | |
3512 | * | |
3513 | * Those two places are sufficient to ensure we always reset it before | |
3514 | * it goes back out to user space, but be careful to not break that | |
3515 | * guarantee. | |
3516 | */ | |
3517 | __assert_only kern_return_t kr; | |
3518 | kr = thread_set_voucher_name(MACH_PORT_NULL); | |
3519 | assert(kr == KERN_SUCCESS); | |
3520 | } | |
3521 | ||
3522 | uint32_t upcall_flags = uth->uu_save.uus_workq_park_data.upcall_flags; | |
3523 | if (!(setup_flags & WQ_SETUP_FIRST_USE)) { | |
3524 | upcall_flags |= WQ_FLAG_THREAD_REUSE; | |
3525 | } | |
3526 | ||
3527 | if (uth->uu_workq_flags & UT_WORKQ_OUTSIDE_QOS) { | |
3528 | /* | |
3529 | * For threads that have an outside-of-QoS thread priority, indicate | |
3530 | * to userspace that setting QoS should only affect the TSD and not | |
3531 | * change QOS in the kernel. | |
3532 | */ | |
3533 | upcall_flags |= WQ_FLAG_THREAD_OUTSIDEQOS; | |
3534 | } else { | |
3535 | /* | |
3536 | * Put the QoS class value into the lower bits of the reuse_thread | |
3537 | * register, this is where the thread priority used to be stored | |
3538 | * anyway. | |
3539 | */ | |
3540 | upcall_flags |= uth->uu_save.uus_workq_park_data.qos | | |
0a7de745 | 3541 | WQ_FLAG_THREAD_PRIO_QOS; |
d9a64523 A |
3542 | } |
3543 | ||
3544 | if (uth->uu_workq_thport == MACH_PORT_NULL) { | |
c3c9b80d | 3545 | /* convert_thread_to_port_pinned() consumes a reference */ |
d9a64523 | 3546 | thread_reference(th); |
c3c9b80d A |
3547 | /* Convert to immovable/pinned thread port, but port is not pinned yet */ |
3548 | ipc_port_t port = convert_thread_to_port_pinned(th); | |
3549 | /* Atomically, pin and copy out the port */ | |
3550 | uth->uu_workq_thport = ipc_port_copyout_send_pinned(port, get_task_ipcspace(p->task)); | |
d9a64523 A |
3551 | } |
3552 | ||
3553 | /* | |
3554 | * Call out to pthread, this sets up the thread, pulls in kevent structs | |
3555 | * onto the stack, sets up the thread state and then returns to userspace. | |
3556 | */ | |
3557 | WQ_TRACE_WQ(TRACE_wq_runthread | DBG_FUNC_START, | |
0a7de745 | 3558 | proc_get_wqptr_fast(p), 0, 0, 0, 0); |
d9a64523 A |
3559 | thread_sched_call(th, workq_sched_callback); |
3560 | pthread_functions->workq_setup_thread(p, th, vmap, uth->uu_workq_stackaddr, | |
0a7de745 | 3561 | uth->uu_workq_thport, 0, setup_flags, upcall_flags); |
d9a64523 A |
3562 | |
3563 | __builtin_unreachable(); | |
3564 | } | |
3565 | ||
3566 | #pragma mark misc | |
3567 | ||
3568 | int | |
3569 | fill_procworkqueue(proc_t p, struct proc_workqueueinfo * pwqinfo) | |
3570 | { | |
3571 | struct workqueue *wq = proc_get_wqptr(p); | |
3572 | int error = 0; | |
0a7de745 | 3573 | int activecount; |
d9a64523 A |
3574 | |
3575 | if (wq == NULL) { | |
3576 | return EINVAL; | |
3577 | } | |
3578 | ||
3579 | /* | |
3580 | * This is sometimes called from interrupt context by the kperf sampler. | |
3581 | * In that case, it's not safe to spin trying to take the lock since we | |
3582 | * might already hold it. So, we just try-lock it and error out if it's | |
3583 | * already held. Since this is just a debugging aid, and all our callers | |
3584 | * are able to handle an error, that's fine. | |
3585 | */ | |
3586 | bool locked = workq_lock_try(wq); | |
3587 | if (!locked) { | |
3588 | return EBUSY; | |
3589 | } | |
3590 | ||
3591 | wq_thactive_t act = _wq_thactive(wq); | |
3592 | activecount = _wq_thactive_aggregate_downto_qos(wq, act, | |
0a7de745 | 3593 | WORKQ_THREAD_QOS_MIN, NULL, NULL); |
d9a64523 A |
3594 | if (act & _wq_thactive_offset_for_qos(WORKQ_THREAD_QOS_MANAGER)) { |
3595 | activecount++; | |
3596 | } | |
3597 | pwqinfo->pwq_nthreads = wq->wq_nthreads; | |
3598 | pwqinfo->pwq_runthreads = activecount; | |
3599 | pwqinfo->pwq_blockedthreads = wq->wq_threads_scheduled - activecount; | |
3600 | pwqinfo->pwq_state = 0; | |
3601 | ||
3602 | if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) { | |
3603 | pwqinfo->pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT; | |
3604 | } | |
3605 | ||
3606 | if (wq->wq_nthreads >= wq_max_threads) { | |
3607 | pwqinfo->pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT; | |
3608 | } | |
3609 | ||
3610 | workq_unlock(wq); | |
3611 | return error; | |
3612 | } | |
3613 | ||
3614 | boolean_t | |
3615 | workqueue_get_pwq_exceeded(void *v, boolean_t *exceeded_total, | |
0a7de745 | 3616 | boolean_t *exceeded_constrained) |
d9a64523 A |
3617 | { |
3618 | proc_t p = v; | |
3619 | struct proc_workqueueinfo pwqinfo; | |
3620 | int err; | |
3621 | ||
3622 | assert(p != NULL); | |
3623 | assert(exceeded_total != NULL); | |
3624 | assert(exceeded_constrained != NULL); | |
3625 | ||
3626 | err = fill_procworkqueue(p, &pwqinfo); | |
3627 | if (err) { | |
3628 | return FALSE; | |
3629 | } | |
3630 | if (!(pwqinfo.pwq_state & WQ_FLAGS_AVAILABLE)) { | |
3631 | return FALSE; | |
3632 | } | |
3633 | ||
3634 | *exceeded_total = (pwqinfo.pwq_state & WQ_EXCEEDED_TOTAL_THREAD_LIMIT); | |
3635 | *exceeded_constrained = (pwqinfo.pwq_state & WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT); | |
3636 | ||
3637 | return TRUE; | |
3638 | } | |
3639 | ||
3640 | uint32_t | |
3641 | workqueue_get_pwq_state_kdp(void * v) | |
3642 | { | |
3643 | static_assert((WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT << 17) == | |
0a7de745 | 3644 | kTaskWqExceededConstrainedThreadLimit); |
d9a64523 | 3645 | static_assert((WQ_EXCEEDED_TOTAL_THREAD_LIMIT << 17) == |
0a7de745 | 3646 | kTaskWqExceededTotalThreadLimit); |
d9a64523 A |
3647 | static_assert((WQ_FLAGS_AVAILABLE << 17) == kTaskWqFlagsAvailable); |
3648 | static_assert((WQ_FLAGS_AVAILABLE | WQ_EXCEEDED_TOTAL_THREAD_LIMIT | | |
0a7de745 | 3649 | WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT) == 0x7); |
d9a64523 A |
3650 | |
3651 | if (v == NULL) { | |
3652 | return 0; | |
3653 | } | |
3654 | ||
3655 | proc_t p = v; | |
3656 | struct workqueue *wq = proc_get_wqptr(p); | |
3657 | ||
3658 | if (wq == NULL || workq_lock_spin_is_acquired_kdp(wq)) { | |
3659 | return 0; | |
3660 | } | |
3661 | ||
3662 | uint32_t pwq_state = WQ_FLAGS_AVAILABLE; | |
3663 | ||
3664 | if (wq->wq_constrained_threads_scheduled >= wq_max_constrained_threads) { | |
3665 | pwq_state |= WQ_EXCEEDED_CONSTRAINED_THREAD_LIMIT; | |
3666 | } | |
3667 | ||
3668 | if (wq->wq_nthreads >= wq_max_threads) { | |
3669 | pwq_state |= WQ_EXCEEDED_TOTAL_THREAD_LIMIT; | |
3670 | } | |
3671 | ||
3672 | return pwq_state; | |
3673 | } | |
3674 | ||
3675 | void | |
3676 | workq_init(void) | |
3677 | { | |
d9a64523 | 3678 | clock_interval_to_absolutetime_interval(wq_stalled_window.usecs, |
0a7de745 | 3679 | NSEC_PER_USEC, &wq_stalled_window.abstime); |
d9a64523 | 3680 | clock_interval_to_absolutetime_interval(wq_reduce_pool_window.usecs, |
0a7de745 | 3681 | NSEC_PER_USEC, &wq_reduce_pool_window.abstime); |
d9a64523 | 3682 | clock_interval_to_absolutetime_interval(wq_max_timer_interval.usecs, |
0a7de745 | 3683 | NSEC_PER_USEC, &wq_max_timer_interval.abstime); |
cb323159 A |
3684 | |
3685 | thread_deallocate_daemon_register_queue(&workq_deallocate_queue, | |
3686 | workq_deallocate_queue_invoke); | |
d9a64523 | 3687 | } |