]> git.saurik.com Git - apple/libpthread.git/blob - kern/kern_synch.c
libpthread-218.51.1.tar.gz
[apple/libpthread.git] / kern / kern_synch.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * pthread_support.c
31 */
32
33 #include <sys/param.h>
34 #include <sys/queue.h>
35 #include <sys/resourcevar.h>
36 //#include <sys/proc_internal.h>
37 #include <sys/kauth.h>
38 #include <sys/systm.h>
39 #include <sys/timeb.h>
40 #include <sys/times.h>
41 #include <sys/time.h>
42 #include <sys/acct.h>
43 #include <sys/kernel.h>
44 #include <sys/wait.h>
45 #include <sys/signalvar.h>
46 #include <sys/syslog.h>
47 #include <sys/stat.h>
48 #include <sys/lock.h>
49 #include <sys/kdebug.h>
50 //#include <sys/sysproto.h>
51 //#include <sys/pthread_internal.h>
52 #include <sys/vm.h>
53 #include <sys/user.h>
54
55 #include <mach/mach_types.h>
56 #include <mach/vm_prot.h>
57 #include <mach/semaphore.h>
58 #include <mach/sync_policy.h>
59 #include <mach/task.h>
60 #include <kern/kern_types.h>
61 #include <kern/task.h>
62 #include <kern/clock.h>
63 #include <mach/kern_return.h>
64 #include <kern/thread.h>
65 #include <kern/sched_prim.h>
66 #include <kern/thread_call.h>
67 #include <kern/kalloc.h>
68 #include <kern/zalloc.h>
69 #include <kern/sched_prim.h>
70 #include <kern/processor.h>
71 #include <kern/block_hint.h>
72 //#include <kern/mach_param.h>
73 #include <mach/mach_vm.h>
74 #include <mach/mach_param.h>
75 #include <mach/thread_policy.h>
76 #include <mach/message.h>
77 #include <mach/port.h>
78 //#include <vm/vm_protos.h>
79 #include <vm/vm_map.h>
80 #include <mach/vm_region.h>
81
82 #include <libkern/OSAtomic.h>
83
84 #include <pexpert/pexpert.h>
85 #include <sys/pthread_shims.h>
86
87 #include "kern_internal.h"
88 #include "synch_internal.h"
89 #include "kern_trace.h"
90
91 typedef struct uthread *uthread_t;
92
93 //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
94 #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
95
96 #define ECVCERORR 256
97 #define ECVPERORR 512
98
99 lck_mtx_t *pthread_list_mlock;
100
101 #define PTH_HASHSIZE 100
102
103 static LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl;
104 static unsigned long pthhash;
105
106 static LIST_HEAD(, ksyn_wait_queue) pth_free_list;
107
108 static zone_t kwq_zone; /* zone for allocation of ksyn_queue */
109 static zone_t kwe_zone; /* zone for allocation of ksyn_waitq_element */
110
111 #define SEQFIT 0
112 #define FIRSTFIT 1
113
114 struct ksyn_queue {
115 TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist;
116 uint32_t ksynq_count; /* number of entries in queue */
117 uint32_t ksynq_firstnum; /* lowest seq in queue */
118 uint32_t ksynq_lastnum; /* highest seq in queue */
119 };
120 typedef struct ksyn_queue *ksyn_queue_t;
121
122 enum {
123 KSYN_QUEUE_READ = 0,
124 KSYN_QUEUE_WRITER,
125 KSYN_QUEUE_MAX,
126 };
127
128 struct ksyn_wait_queue {
129 LIST_ENTRY(ksyn_wait_queue) kw_hash;
130 LIST_ENTRY(ksyn_wait_queue) kw_list;
131 user_addr_t kw_addr;
132 uint64_t kw_owner;
133 uint64_t kw_object; /* object backing in shared mode */
134 uint64_t kw_offset; /* offset inside the object in shared mode */
135 int kw_pflags; /* flags under listlock protection */
136 struct timeval kw_ts; /* timeval need for upkeep before free */
137 int kw_iocount; /* inuse reference */
138 int kw_dropcount; /* current users unlocking... */
139
140 int kw_type; /* queue type like mutex, cvar, etc */
141 uint32_t kw_inqueue; /* num of waiters held */
142 uint32_t kw_fakecount; /* number of error/prepost fakes */
143 uint32_t kw_highseq; /* highest seq in the queue */
144 uint32_t kw_lowseq; /* lowest seq in the queue */
145 uint32_t kw_lword; /* L value from userland */
146 uint32_t kw_uword; /* U world value from userland */
147 uint32_t kw_sword; /* S word value from userland */
148 uint32_t kw_lastunlockseq; /* the last seq that unlocked */
149 /* for CV to be used as the seq kernel has seen so far */
150 #define kw_cvkernelseq kw_lastunlockseq
151 uint32_t kw_lastseqword; /* the last seq that unlocked */
152 /* for mutex and cvar we need to track I bit values */
153 uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */
154 uint32_t kw_overlapwatch; /* chance for overlaps */
155 uint32_t kw_pre_rwwc; /* prepost count */
156 uint32_t kw_pre_lockseq; /* prepost target seq */
157 uint32_t kw_pre_sseq; /* prepost target sword, in cvar used for mutexowned */
158 uint32_t kw_pre_intrcount; /* prepost of missed wakeup due to intrs */
159 uint32_t kw_pre_intrseq; /* prepost of missed wakeup limit seq */
160 uint32_t kw_pre_intrretbits; /* return bits value for missed wakeup threads */
161 uint32_t kw_pre_intrtype; /* type of failed wakueps*/
162
163 int kw_kflags;
164 int kw_qos_override; /* QoS of max waiter during contention period */
165 struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */
166 lck_mtx_t kw_lock; /* mutex lock protecting this structure */
167 };
168 typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
169
170 #define TID_ZERO (uint64_t)0
171
172 /* bits needed in handling the rwlock unlock */
173 #define PTH_RW_TYPE_READ 0x01
174 #define PTH_RW_TYPE_WRITE 0x04
175 #define PTH_RW_TYPE_MASK 0xff
176 #define PTH_RW_TYPE_SHIFT 8
177
178 #define PTH_RWSHFT_TYPE_READ 0x0100
179 #define PTH_RWSHFT_TYPE_WRITE 0x0400
180 #define PTH_RWSHFT_TYPE_MASK 0xff00
181
182 /*
183 * Mutex pshared attributes
184 */
185 #define PTHREAD_PROCESS_SHARED _PTHREAD_MTX_OPT_PSHARED
186 #define PTHREAD_PROCESS_PRIVATE 0x20
187 #define PTHREAD_PSHARED_FLAGS_MASK 0x30
188
189 /*
190 * Mutex policy attributes
191 */
192 #define _PTHREAD_MUTEX_POLICY_NONE 0
193 #define _PTHREAD_MUTEX_POLICY_FAIRSHARE 0x040 /* 1 */
194 #define _PTHREAD_MUTEX_POLICY_FIRSTFIT 0x080 /* 2 */
195 #define _PTHREAD_MUTEX_POLICY_REALTIME 0x0c0 /* 3 */
196 #define _PTHREAD_MUTEX_POLICY_ADAPTIVE 0x100 /* 4 */
197 #define _PTHREAD_MUTEX_POLICY_PRIPROTECT 0x140 /* 5 */
198 #define _PTHREAD_MUTEX_POLICY_PRIINHERIT 0x180 /* 6 */
199 #define PTHREAD_POLICY_FLAGS_MASK 0x1c0
200
201 /* pflags */
202 #define KSYN_WQ_INHASH 2
203 #define KSYN_WQ_SHARED 4
204 #define KSYN_WQ_WAITING 8 /* threads waiting for this wq to be available */
205 #define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */
206
207 /* kflags */
208 #define KSYN_KWF_INITCLEARED 1 /* the init status found and preposts cleared */
209 #define KSYN_KWF_ZEROEDOUT 2 /* the lword, etc are inited to 0 */
210 #define KSYN_KWF_QOS_APPLIED 4 /* QoS override applied to owner */
211
212 #define KSYN_CLEANUP_DEADLINE 10
213 static int psynch_cleanupset;
214 thread_call_t psynch_thcall;
215
216 #define KSYN_WQTYPE_INWAIT 0x1000
217 #define KSYN_WQTYPE_INDROP 0x2000
218 #define KSYN_WQTYPE_MTX 0x01
219 #define KSYN_WQTYPE_CVAR 0x02
220 #define KSYN_WQTYPE_RWLOCK 0x04
221 #define KSYN_WQTYPE_SEMA 0x08
222 #define KSYN_WQTYPE_MASK 0xff
223
224 #define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
225
226 #define KW_UNLOCK_PREPOST 0x01
227 #define KW_UNLOCK_PREPOST_READLOCK 0x08
228 #define KW_UNLOCK_PREPOST_WRLOCK 0x20
229
230 static void
231 CLEAR_PREPOST_BITS(ksyn_wait_queue_t kwq)
232 {
233 kwq->kw_pre_lockseq = 0;
234 kwq->kw_pre_sseq = PTHRW_RWS_INIT;
235 kwq->kw_pre_rwwc = 0;
236 }
237
238 static void
239 CLEAR_INTR_PREPOST_BITS(ksyn_wait_queue_t kwq)
240 {
241 kwq->kw_pre_intrcount = 0;
242 kwq->kw_pre_intrseq = 0;
243 kwq->kw_pre_intrretbits = 0;
244 kwq->kw_pre_intrtype = 0;
245 }
246
247 static void
248 CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
249 {
250 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
251 if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
252 panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
253 }
254 };
255 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
256 kwq->kw_nextseqword = PTHRW_RWS_INIT;
257 kwq->kw_overlapwatch = 0;
258 };
259 CLEAR_PREPOST_BITS(kwq);
260 kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
261 kwq->kw_lastseqword = PTHRW_RWS_INIT;
262 CLEAR_INTR_PREPOST_BITS(kwq);
263 kwq->kw_lword = 0;
264 kwq->kw_uword = 0;
265 kwq->kw_sword = PTHRW_RWS_INIT;
266 }
267
268 static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset);
269 static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
270 static void ksyn_wqrelease(ksyn_wait_queue_t mkwq, int qfreenow, int wqtype);
271 static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp);
272
273 static int _wait_result_to_errno(wait_result_t result);
274
275 static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t, block_hint_t);
276 static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t);
277 static void ksyn_freeallkwe(ksyn_queue_t kq);
278
279 static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t);
280 static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t, uint64_t tid, boolean_t prepost);
281 static void ksyn_mtx_transfer_qos_override(ksyn_wait_queue_t, ksyn_waitq_element_t);
282 static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t);
283
284 static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
285
286 static void ksyn_queue_init(ksyn_queue_t kq);
287 static int ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int firstfit);
288 static void ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe);
289 static void ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all);
290
291 static void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq);
292 static uint32_t find_nextlowseq(ksyn_wait_queue_t kwq);
293 static uint32_t find_nexthighseq(ksyn_wait_queue_t kwq);
294 static int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp);
295
296 static uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto);
297
298 static ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen);
299 static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep);
300 static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
301 static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
302
303 static void psynch_cvcontinue(void *, wait_result_t);
304 static void psynch_mtxcontinue(void *, wait_result_t);
305
306 static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
307 static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
308 static ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq);
309
310 static void
311 UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc)
312 {
313 int sinit = ((rw_wc & PTH_RWS_CV_CBIT) != 0);
314
315 // assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
316
317 if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) {
318 /* the values of L,U and S are cleared out due to L==S in previous transition */
319 kwq->kw_lword = mgen;
320 kwq->kw_uword = ugen;
321 kwq->kw_sword = rw_wc;
322 kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT;
323 } else {
324 if (is_seqhigher(mgen, kwq->kw_lword)) {
325 kwq->kw_lword = mgen;
326 }
327 if (is_seqhigher(ugen, kwq->kw_uword)) {
328 kwq->kw_uword = ugen;
329 }
330 if (sinit && is_seqhigher(rw_wc, kwq->kw_sword)) {
331 kwq->kw_sword = rw_wc;
332 }
333 }
334 if (sinit && is_seqlower(kwq->kw_cvkernelseq, rw_wc)) {
335 kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK);
336 }
337 }
338
339 static void
340 pthread_list_lock(void)
341 {
342 lck_mtx_lock(pthread_list_mlock);
343 }
344
345 static void
346 pthread_list_unlock(void)
347 {
348 lck_mtx_unlock(pthread_list_mlock);
349 }
350
351 static void
352 ksyn_wqlock(ksyn_wait_queue_t kwq)
353 {
354
355 lck_mtx_lock(&kwq->kw_lock);
356 }
357
358 static void
359 ksyn_wqunlock(ksyn_wait_queue_t kwq)
360 {
361 lck_mtx_unlock(&kwq->kw_lock);
362 }
363
364
365 /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
366 static uint32_t
367 _psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, int flags)
368 {
369 kern_return_t ret;
370 uint32_t returnbits = 0;
371 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
372 uint32_t nextgen = (ugen + PTHRW_INC);
373
374 ksyn_wqlock(kwq);
375 kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
376 uint32_t updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT);
377
378 redrive:
379 if (firstfit) {
380 if (kwq->kw_inqueue == 0) {
381 // not set or the new lock sequence is higher
382 if (kwq->kw_pre_rwwc == 0 || is_seqhigher(mgen, kwq->kw_pre_lockseq)) {
383 kwq->kw_pre_lockseq = (mgen & PTHRW_COUNT_MASK);
384 }
385 kwq->kw_pre_rwwc = 1;
386 ksyn_mtx_drop_qos_override(kwq);
387 kwq->kw_owner = 0;
388 // indicate prepost content in kernel
389 returnbits = mgen | PTH_RWL_PBIT;
390 } else {
391 // signal first waiter
392 ret = ksyn_mtxsignal(kwq, NULL, updatebits);
393 if (ret == KERN_NOT_WAITING) {
394 goto redrive;
395 }
396 }
397 } else {
398 int prepost = 0;
399 if (kwq->kw_inqueue == 0) {
400 // No waiters in the queue.
401 prepost = 1;
402 } else {
403 uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum & PTHRW_COUNT_MASK);
404 if (low_writer == nextgen) {
405 /* next seq to be granted found */
406 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
407 ret = ksyn_mtxsignal(kwq, NULL, updatebits | PTH_RWL_MTX_WAIT);
408 if (ret == KERN_NOT_WAITING) {
409 /* interrupt post */
410 kwq->kw_pre_intrcount = 1;
411 kwq->kw_pre_intrseq = nextgen;
412 kwq->kw_pre_intrretbits = updatebits;
413 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
414 }
415
416 } else if (is_seqhigher(low_writer, nextgen)) {
417 prepost = 1;
418 } else {
419 //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
420 ksyn_waitq_element_t kwe;
421 kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen);
422 if (kwe != NULL) {
423 /* next seq to be granted found */
424 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
425 ret = ksyn_mtxsignal(kwq, kwe, updatebits | PTH_RWL_MTX_WAIT);
426 if (ret == KERN_NOT_WAITING) {
427 goto redrive;
428 }
429 } else {
430 prepost = 1;
431 }
432 }
433 }
434 if (prepost) {
435 ksyn_mtx_drop_qos_override(kwq);
436 kwq->kw_owner = 0;
437 if (++kwq->kw_pre_rwwc > 1) {
438 __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
439 } else {
440 kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
441 }
442 }
443 }
444
445 ksyn_wqunlock(kwq);
446 ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
447 return returnbits;
448 }
449
450 static int
451 _ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
452 {
453 int res = (lgenval & PTHRW_RWL_INIT) != 0;
454 if (res) {
455 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
456 /* first to notice the reset of the lock, clear preposts */
457 CLEAR_REINIT_BITS(kwq);
458 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
459 }
460 }
461 return res;
462 }
463
464 static int
465 _ksyn_handle_missed_wakeups(ksyn_wait_queue_t kwq,
466 uint32_t type,
467 uint32_t lockseq,
468 uint32_t *retval)
469 {
470 int res = 0;
471 if (kwq->kw_pre_intrcount != 0 &&
472 kwq->kw_pre_intrtype == type &&
473 (kwq->kw_pre_intrseq == 0 || is_seqlower_eq(lockseq, kwq->kw_pre_intrseq))) {
474 kwq->kw_pre_intrcount--;
475 *retval = kwq->kw_pre_intrretbits;
476 if (kwq->kw_pre_intrcount == 0) {
477 CLEAR_INTR_PREPOST_BITS(kwq);
478 }
479 res = 1;
480 }
481 return res;
482 }
483
484 static int
485 _ksyn_handle_overlap(ksyn_wait_queue_t kwq,
486 uint32_t lgenval,
487 uint32_t rw_wc,
488 uint32_t *retval)
489 {
490 int res = 0;
491
492 // check for overlap and no pending W bit (indicates writers)
493 if (kwq->kw_overlapwatch != 0 &&
494 (rw_wc & PTHRW_RWS_SAVEMASK) == 0 &&
495 (lgenval & PTH_RWL_WBIT) == 0) {
496 /* overlap is set, so no need to check for valid state for overlap */
497
498 if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
499 /* increase the next expected seq by one */
500 kwq->kw_nextseqword += PTHRW_INC;
501 /* set count by one & bits from the nextseq and add M bit */
502 *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
503 res = 1;
504 }
505 }
506 return res;
507 }
508
509 static int
510 _ksyn_handle_prepost(ksyn_wait_queue_t kwq,
511 uint32_t type,
512 uint32_t lockseq,
513 uint32_t *retval)
514 {
515 int res = 0;
516 if (kwq->kw_pre_rwwc != 0 && is_seqlower_eq(lockseq, kwq->kw_pre_lockseq)) {
517 kwq->kw_pre_rwwc--;
518 if (kwq->kw_pre_rwwc == 0) {
519 uint32_t preseq = kwq->kw_pre_lockseq;
520 uint32_t prerw_wc = kwq->kw_pre_sseq;
521 CLEAR_PREPOST_BITS(kwq);
522 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
523 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
524 }
525
526 int error, block;
527 uint32_t updatebits;
528 error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (type|KW_UNLOCK_PREPOST), &block, lockseq);
529 if (error != 0) {
530 panic("kwq_handle_unlock failed %d\n", error);
531 }
532
533 if (block == 0) {
534 *retval = updatebits;
535 res = 1;
536 }
537 }
538 }
539 return res;
540 }
541
542 /* Helpers for QoS override management. Only applies to mutexes */
543 static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t kwq, uint64_t tid, boolean_t prepost)
544 {
545 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
546 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
547 int waiter_qos = pthread_kern->proc_usynch_get_requested_thread_qos(current_uthread());
548
549 kwq->kw_qos_override = MAX(waiter_qos, kwq->kw_qos_override);
550
551 if (prepost && kwq->kw_inqueue == 0) {
552 // if there are no more waiters in the queue after the new (prepost-receiving) owner, we do not set an
553 // override, because the receiving owner may not re-enter the kernel to signal someone else if it is
554 // the last one to unlock. If other waiters end up entering the kernel, they will boost the owner
555 tid = 0;
556 }
557
558 if (tid != 0) {
559 if ((tid == kwq->kw_owner) && (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED)) {
560 // hint continues to be accurate, and a boost was already applied
561 pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, FALSE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
562 } else {
563 // either hint did not match previous owner, or hint was accurate but mutex was not contended enough for a boost previously
564 boolean_t boostsucceded;
565
566 boostsucceded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
567
568 if (boostsucceded) {
569 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
570 }
571
572 if (wasboosted && (tid != kwq->kw_owner) && (kwq->kw_owner != 0)) {
573 // the hint did not match the previous owner, so drop overrides
574 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
575 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
576 }
577 }
578 } else {
579 // new hint tells us that we don't know the owner, so drop any existing overrides
580 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
581 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
582
583 if (wasboosted && (kwq->kw_owner != 0)) {
584 // the hint did not match the previous owner, so drop overrides
585 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
586 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
587 }
588 }
589 }
590 }
591
592 static void ksyn_mtx_transfer_qos_override(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe)
593 {
594 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
595 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
596
597 if (kwq->kw_inqueue > 1) {
598 boolean_t boostsucceeded;
599
600 // More than one waiter, so resource will still be contended after handing off ownership
601 boostsucceeded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), kwe->kwe_uth, 0, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
602
603 if (boostsucceeded) {
604 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
605 }
606 } else {
607 // kw_inqueue == 1 to get to this point, which means there will be no contention after this point
608 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
609 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
610 }
611
612 // Remove the override that was applied to kw_owner. There may have been a race,
613 // in which case it may not match the current thread
614 if (wasboosted) {
615 if (kwq->kw_owner == 0) {
616 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
617 } else if (thread_tid(current_thread()) != kwq->kw_owner) {
618 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
619 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
620 } else {
621 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
622 }
623 }
624 }
625 }
626
627 static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t kwq)
628 {
629 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
630 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
631
632 // assume nobody else in queue if this routine was called
633 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
634 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
635
636 // Remove the override that was applied to kw_owner. There may have been a race,
637 // in which case it may not match the current thread
638 if (wasboosted) {
639 if (kwq->kw_owner == 0) {
640 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
641 } else if (thread_tid(current_thread()) != kwq->kw_owner) {
642 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
643 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
644 } else {
645 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
646 }
647 }
648 }
649 }
650
651 /*
652 * psynch_mutexwait: This system call is used for contended psynch mutexes to block.
653 */
654
655 int
656 _psynch_mutexwait(__unused proc_t p,
657 user_addr_t mutex,
658 uint32_t mgen,
659 uint32_t ugen,
660 uint64_t tid,
661 uint32_t flags,
662 uint32_t *retval)
663 {
664 ksyn_wait_queue_t kwq;
665 int error=0;
666 int ins_flags;
667
668 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
669 uint32_t updatebits = 0;
670
671 uint32_t lockseq = (mgen & PTHRW_COUNT_MASK);
672
673 if (firstfit == 0) {
674 ins_flags = SEQFIT;
675 } else {
676 /* first fit */
677 ins_flags = FIRSTFIT;
678 }
679
680 error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq);
681 if (error != 0) {
682 return(error);
683 }
684
685 ksyn_wqlock(kwq);
686
687 // mutexwait passes in an owner hint at the time userspace contended for the mutex, however, the
688 // owner tid in the userspace data structure may be unset or SWITCHING (-1), or it may correspond
689 // to a stale snapshot after the lock has subsequently been unlocked by another thread.
690 if (tid == 0) {
691 // contender came in before owner could write TID
692 tid = 0;
693 } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && is_seqlower(ugen, kwq->kw_lastunlockseq)) {
694 // owner is stale, someone has come in and unlocked since this contended read the TID, so
695 // assume what is known in the kernel is accurate
696 tid = kwq->kw_owner;
697 } else if (tid == PTHREAD_MTX_TID_SWITCHING) {
698 // userspace didn't know the owner because it was being unlocked, but that unlocker hasn't
699 // reached the kernel yet. So assume what is known in the kernel is accurate
700 tid = kwq->kw_owner;
701 } else {
702 // hint is being passed in for a specific thread, and we have no reason not to trust
703 // it (like the kernel unlock sequence being higher
704 }
705
706
707 if (_ksyn_handle_missed_wakeups(kwq, PTH_RW_TYPE_WRITE, lockseq, retval)) {
708 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
709 kwq->kw_owner = thread_tid(current_thread());
710
711 ksyn_wqunlock(kwq);
712 goto out;
713 }
714
715 if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) {
716 /* got preposted lock */
717 kwq->kw_pre_rwwc--;
718 if (kwq->kw_pre_rwwc == 0) {
719 CLEAR_PREPOST_BITS(kwq);
720 if (kwq->kw_inqueue == 0) {
721 updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
722 } else {
723 updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT);
724 }
725 updatebits &= ~PTH_RWL_MTX_WAIT;
726
727 if (updatebits == 0) {
728 __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
729 }
730
731 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
732 kwq->kw_owner = thread_tid(current_thread());
733
734 ksyn_wqunlock(kwq);
735 *retval = updatebits;
736 goto out;
737 } else {
738 __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
739 kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */
740 ksyn_wqunlock(kwq);
741 error = EINVAL;
742 goto out;
743 }
744 }
745
746 ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE);
747 kwq->kw_owner = tid;
748
749 error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue, kThreadWaitPThreadMutex);
750 // ksyn_wait drops wait queue lock
751 out:
752 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
753 return error;
754 }
755
756 void
757 psynch_mtxcontinue(void *parameter, wait_result_t result)
758 {
759 uthread_t uth = current_uthread();
760 ksyn_wait_queue_t kwq = parameter;
761 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
762
763 int error = _wait_result_to_errno(result);
764 if (error != 0) {
765 ksyn_wqlock(kwq);
766 if (kwe->kwe_kwqqueue) {
767 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
768 }
769 ksyn_wqunlock(kwq);
770 } else {
771 uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
772 pthread_kern->uthread_set_returnval(uth, updatebits);
773
774 if (updatebits == 0)
775 __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
776 }
777 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
778 pthread_kern->unix_syscall_return(error);
779 }
780
781 /*
782 * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
783 */
784 int
785 _psynch_mutexdrop(__unused proc_t p,
786 user_addr_t mutex,
787 uint32_t mgen,
788 uint32_t ugen,
789 uint64_t tid __unused,
790 uint32_t flags,
791 uint32_t *retval)
792 {
793 int res;
794 ksyn_wait_queue_t kwq;
795
796 res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
797 if (res == 0) {
798 uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
799 /* drops the kwq reference */
800 if (retval) {
801 *retval = updateval;
802 }
803 }
804
805 return res;
806 }
807
808 static kern_return_t
809 ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t updateval)
810 {
811 kern_return_t ret;
812
813 if (!kwe) {
814 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_kwelist);
815 if (!kwe) {
816 panic("ksyn_mtxsignal: panic signaling empty queue");
817 }
818 }
819
820 ksyn_mtx_transfer_qos_override(kwq, kwe);
821 kwq->kw_owner = kwe->kwe_tid;
822
823 ret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, kwe, updateval);
824
825 // if waking the new owner failed, remove any overrides
826 if (ret != KERN_SUCCESS) {
827 ksyn_mtx_drop_qos_override(kwq);
828 kwq->kw_owner = 0;
829 }
830
831 return ret;
832 }
833
834
835 static void
836 ksyn_prepost(ksyn_wait_queue_t kwq,
837 ksyn_waitq_element_t kwe,
838 uint32_t state,
839 uint32_t lockseq)
840 {
841 bzero(kwe, sizeof(*kwe));
842 kwe->kwe_state = state;
843 kwe->kwe_lockseq = lockseq;
844 kwe->kwe_count = 1;
845
846 (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITER, kwe, lockseq, SEQFIT);
847 kwq->kw_fakecount++;
848 }
849
850 static void
851 ksyn_cvsignal(ksyn_wait_queue_t ckwq,
852 thread_t th,
853 uint32_t uptoseq,
854 uint32_t signalseq,
855 uint32_t *updatebits,
856 int *broadcast,
857 ksyn_waitq_element_t *nkwep)
858 {
859 ksyn_waitq_element_t kwe = NULL;
860 ksyn_waitq_element_t nkwe = NULL;
861 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
862
863 uptoseq &= PTHRW_COUNT_MASK;
864
865 // Find the specified thread to wake.
866 if (th != THREAD_NULL) {
867 uthread_t uth = pthread_kern->get_bsdthread_info(th);
868 kwe = pthread_kern->uthread_get_uukwe(uth);
869 if (kwe->kwe_kwqqueue != ckwq ||
870 is_seqhigher(kwe->kwe_lockseq, uptoseq)) {
871 // Unless it's no longer waiting on this CV...
872 kwe = NULL;
873 // ...in which case we post a broadcast instead.
874 *broadcast = 1;
875 return;
876 }
877 }
878
879 // If no thread was specified, find any thread to wake (with the right
880 // sequence number).
881 while (th == THREAD_NULL) {
882 if (kwe == NULL) {
883 kwe = ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq);
884 }
885 if (kwe == NULL && nkwe == NULL) {
886 // No eligible entries; need to allocate a new
887 // entry to prepost. Loop to rescan after
888 // reacquiring the lock after allocation in
889 // case anything new shows up.
890 ksyn_wqunlock(ckwq);
891 nkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
892 ksyn_wqlock(ckwq);
893 } else {
894 break;
895 }
896 }
897
898 if (kwe != NULL) {
899 // If we found a thread to wake...
900 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
901 if (is_seqlower(kwe->kwe_lockseq, signalseq)) {
902 /*
903 * A valid thread in our range, but lower than our signal.
904 * Matching it may leave our match with nobody to wake it if/when
905 * it arrives (the signal originally meant for this thread might
906 * not successfully wake it).
907 *
908 * Convert to broadcast - may cause some spurious wakeups
909 * (allowed by spec), but avoids starvation (better choice).
910 */
911 *broadcast = 1;
912 } else {
913 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
914 *updatebits += PTHRW_INC;
915 }
916 } else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
917 // Merge with existing prepost at same uptoseq.
918 kwe->kwe_count += 1;
919 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
920 // Existing broadcasts subsume this signal.
921 } else {
922 panic("unknown kwe state\n");
923 }
924 if (nkwe) {
925 /*
926 * If we allocated a new kwe above but then found a different kwe to
927 * use then we need to deallocate the spare one.
928 */
929 pthread_kern->zfree(kwe_zone, nkwe);
930 nkwe = NULL;
931 }
932 } else if (nkwe != NULL) {
933 // ... otherwise, insert the newly allocated prepost.
934 ksyn_prepost(ckwq, nkwe, KWE_THREAD_PREPOST, uptoseq);
935 nkwe = NULL;
936 } else {
937 panic("failed to allocate kwe\n");
938 }
939
940 *nkwep = nkwe;
941 }
942
943 static int
944 __psynch_cvsignal(user_addr_t cv,
945 uint32_t cgen,
946 uint32_t cugen,
947 uint32_t csgen,
948 uint32_t flags,
949 int broadcast,
950 mach_port_name_t threadport,
951 uint32_t *retval)
952 {
953 int error = 0;
954 thread_t th = THREAD_NULL;
955 ksyn_wait_queue_t kwq;
956
957 uint32_t uptoseq = cgen & PTHRW_COUNT_MASK;
958 uint32_t fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
959
960 // validate sane L, U, and S values
961 if ((threadport == 0 && is_seqhigher(fromseq, uptoseq)) || is_seqhigher(csgen, uptoseq)) {
962 __FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n");
963 return EINVAL;
964 }
965
966 if (threadport != 0) {
967 th = port_name_to_thread((mach_port_name_t)threadport);
968 if (th == THREAD_NULL) {
969 return ESRCH;
970 }
971 }
972
973 error = ksyn_wqfind(cv, cgen, cugen, csgen, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &kwq);
974 if (error == 0) {
975 uint32_t updatebits = 0;
976 ksyn_waitq_element_t nkwe = NULL;
977
978 ksyn_wqlock(kwq);
979
980 // update L, U and S...
981 UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
982
983 if (!broadcast) {
984 // No need to signal if the CV is already balanced.
985 if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
986 ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, &broadcast, &nkwe);
987 }
988 }
989
990 if (broadcast) {
991 ksyn_handle_cvbroad(kwq, uptoseq, &updatebits);
992 }
993
994 kwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
995 // set C or P bits and free if needed
996 ksyn_cvupdate_fixup(kwq, &updatebits);
997 *retval = updatebits;
998
999 ksyn_wqunlock(kwq);
1000
1001 if (nkwe != NULL) {
1002 pthread_kern->zfree(kwe_zone, nkwe);
1003 }
1004
1005 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
1006 }
1007
1008 if (th != NULL) {
1009 thread_deallocate(th);
1010 }
1011
1012 return error;
1013 }
1014
1015 /*
1016 * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
1017 */
1018 int
1019 _psynch_cvbroad(__unused proc_t p,
1020 user_addr_t cv,
1021 uint64_t cvlsgen,
1022 uint64_t cvudgen,
1023 uint32_t flags,
1024 __unused user_addr_t mutex,
1025 __unused uint64_t mugen,
1026 __unused uint64_t tid,
1027 uint32_t *retval)
1028 {
1029 uint32_t diffgen = cvudgen & 0xffffffff;
1030 uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
1031 if (count > pthread_kern->get_task_threadmax()) {
1032 __FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n");
1033 return EBUSY;
1034 }
1035
1036 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1037 uint32_t cgen = cvlsgen & 0xffffffff;
1038 uint32_t cugen = (cvudgen >> 32) & 0xffffffff;
1039
1040 return __psynch_cvsignal(cv, cgen, cugen, csgen, flags, 1, 0, retval);
1041 }
1042
1043 /*
1044 * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
1045 */
1046 int
1047 _psynch_cvsignal(__unused proc_t p,
1048 user_addr_t cv,
1049 uint64_t cvlsgen,
1050 uint32_t cvugen,
1051 int threadport,
1052 __unused user_addr_t mutex,
1053 __unused uint64_t mugen,
1054 __unused uint64_t tid,
1055 uint32_t flags,
1056 uint32_t *retval)
1057 {
1058 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1059 uint32_t cgen = cvlsgen & 0xffffffff;
1060
1061 return __psynch_cvsignal(cv, cgen, cvugen, csgen, flags, 0, threadport, retval);
1062 }
1063
1064 /*
1065 * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
1066 */
1067 int
1068 _psynch_cvwait(__unused proc_t p,
1069 user_addr_t cv,
1070 uint64_t cvlsgen,
1071 uint32_t cvugen,
1072 user_addr_t mutex,
1073 uint64_t mugen,
1074 uint32_t flags,
1075 int64_t sec,
1076 uint32_t nsec,
1077 uint32_t *retval)
1078 {
1079 int error = 0;
1080 uint32_t updatebits = 0;
1081 ksyn_wait_queue_t ckwq = NULL;
1082 ksyn_waitq_element_t kwe, nkwe = NULL;
1083
1084 /* for conformance reasons */
1085 pthread_kern->__pthread_testcancel(0);
1086
1087 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1088 uint32_t cgen = cvlsgen & 0xffffffff;
1089 uint32_t ugen = (mugen >> 32) & 0xffffffff;
1090 uint32_t mgen = mugen & 0xffffffff;
1091
1092 uint32_t lockseq = (cgen & PTHRW_COUNT_MASK);
1093
1094 /*
1095 * In cvwait U word can be out of range as cv could be used only for
1096 * timeouts. However S word needs to be within bounds and validated at
1097 * user level as well.
1098 */
1099 if (is_seqhigher_eq(csgen, lockseq) != 0) {
1100 __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
1101 return EINVAL;
1102 }
1103
1104 error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
1105 if (error != 0) {
1106 return error;
1107 }
1108
1109 if (mutex != 0) {
1110 error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, NULL);
1111 if (error != 0) {
1112 goto out;
1113 }
1114 }
1115
1116 ksyn_wqlock(ckwq);
1117
1118 // update L, U and S...
1119 UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
1120
1121 /* Look for the sequence for prepost (or conflicting thread */
1122 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
1123 kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
1124 if (kwe != NULL) {
1125 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
1126 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) {
1127 /* we can safely consume a reference, so do so */
1128 if (--kwe->kwe_count == 0) {
1129 ksyn_queue_remove_item(ckwq, kq, kwe);
1130 ckwq->kw_fakecount--;
1131 nkwe = kwe;
1132 }
1133 } else {
1134 /*
1135 * consuming a prepost higher than our lock sequence is valid, but
1136 * can leave the higher thread without a match. Convert the entry
1137 * to a broadcast to compensate for this.
1138 */
1139 ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits);
1140 #if __TESTPANICS__
1141 if (updatebits != 0)
1142 panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", updatebits);
1143 #endif /* __TESTPANICS__ */
1144 }
1145 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
1146 // XXX
1147 // Nothing to do.
1148 } else if (kwe->kwe_state == KWE_THREAD_INWAIT) {
1149 __FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n");
1150 error = EBUSY;
1151 } else {
1152 panic("psync_cvwait: unexpected wait queue element type\n");
1153 }
1154
1155 if (error == 0) {
1156 updatebits = PTHRW_INC;
1157 ckwq->kw_sword += PTHRW_INC;
1158
1159 /* set C or P bits and free if needed */
1160 ksyn_cvupdate_fixup(ckwq, &updatebits);
1161 *retval = updatebits;
1162 }
1163 } else {
1164 uint64_t abstime = 0;
1165
1166 if (sec != 0 || (nsec & 0x3fffffff) != 0) {
1167 struct timespec ts;
1168 ts.tv_sec = (__darwin_time_t)sec;
1169 ts.tv_nsec = (nsec & 0x3fffffff);
1170 nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
1171 clock_absolutetime_interval_to_deadline(abstime, &abstime);
1172 }
1173
1174 error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue, kThreadWaitPThreadCondVar);
1175 // ksyn_wait drops wait queue lock
1176 }
1177
1178 ksyn_wqunlock(ckwq);
1179
1180 if (nkwe != NULL) {
1181 pthread_kern->zfree(kwe_zone, nkwe);
1182 }
1183 out:
1184 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1185 return error;
1186 }
1187
1188
1189 void
1190 psynch_cvcontinue(void *parameter, wait_result_t result)
1191 {
1192 uthread_t uth = current_uthread();
1193 ksyn_wait_queue_t ckwq = parameter;
1194 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1195
1196 int error = _wait_result_to_errno(result);
1197 if (error != 0) {
1198 ksyn_wqlock(ckwq);
1199 /* just in case it got woken up as we were granting */
1200 pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
1201
1202 if (kwe->kwe_kwqqueue) {
1203 ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
1204 }
1205 if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
1206 /* the condition var granted.
1207 * reset the error so that the thread returns back.
1208 */
1209 error = 0;
1210 /* no need to set any bits just return as cvsig/broad covers this */
1211 } else {
1212 ckwq->kw_sword += PTHRW_INC;
1213
1214 /* set C and P bits, in the local error */
1215 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
1216 error |= ECVCERORR;
1217 if (ckwq->kw_inqueue != 0) {
1218 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 1);
1219 }
1220 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
1221 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
1222 } else {
1223 /* everythig in the queue is a fake entry ? */
1224 if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
1225 error |= ECVPERORR;
1226 }
1227 }
1228 }
1229 ksyn_wqunlock(ckwq);
1230 } else {
1231 int val = 0;
1232 // PTH_RWL_MTX_WAIT is removed
1233 if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
1234 val = PTHRW_INC | PTH_RWS_CV_CBIT;
1235 }
1236 pthread_kern->uthread_set_returnval(uth, val);
1237 }
1238
1239 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1240 pthread_kern->unix_syscall_return(error);
1241 }
1242
1243 /*
1244 * psynch_cvclrprepost: This system call clears pending prepost if present.
1245 */
1246 int
1247 _psynch_cvclrprepost(__unused proc_t p,
1248 user_addr_t cv,
1249 uint32_t cvgen,
1250 uint32_t cvugen,
1251 uint32_t cvsgen,
1252 __unused uint32_t prepocnt,
1253 uint32_t preposeq,
1254 uint32_t flags,
1255 int *retval)
1256 {
1257 int error = 0;
1258 int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
1259 int wqtype = (mutex ? KSYN_WQTYPE_MTX : KSYN_WQTYPE_CVAR) | KSYN_WQTYPE_INDROP;
1260 ksyn_wait_queue_t kwq = NULL;
1261
1262 *retval = 0;
1263
1264 error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, &kwq);
1265 if (error != 0) {
1266 return error;
1267 }
1268
1269 ksyn_wqlock(kwq);
1270
1271 if (mutex) {
1272 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
1273 if (firstfit && kwq->kw_pre_rwwc != 0) {
1274 if (is_seqlower_eq(kwq->kw_pre_lockseq, cvgen)) {
1275 // clear prepost
1276 kwq->kw_pre_rwwc = 0;
1277 kwq->kw_pre_lockseq = 0;
1278 }
1279 }
1280 } else {
1281 ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITER, preposeq, 0);
1282 }
1283
1284 ksyn_wqunlock(kwq);
1285 ksyn_wqrelease(kwq, 1, wqtype);
1286 return error;
1287 }
1288
1289 /* ***************** pthread_rwlock ************************ */
1290
1291 static int
1292 __psynch_rw_lock(int type,
1293 user_addr_t rwlock,
1294 uint32_t lgenval,
1295 uint32_t ugenval,
1296 uint32_t rw_wc,
1297 int flags,
1298 uint32_t *retval)
1299 {
1300 int prepost_type, kqi;
1301
1302 if (type == PTH_RW_TYPE_READ) {
1303 prepost_type = KW_UNLOCK_PREPOST_READLOCK;
1304 kqi = KSYN_QUEUE_READ;
1305 } else {
1306 prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
1307 kqi = KSYN_QUEUE_WRITER;
1308 }
1309
1310 uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
1311
1312 int error;
1313 ksyn_wait_queue_t kwq;
1314 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
1315 if (error == 0) {
1316 ksyn_wqlock(kwq);
1317 _ksyn_check_init(kwq, lgenval);
1318 if (_ksyn_handle_missed_wakeups(kwq, type, lockseq, retval) ||
1319 // handle overlap first as they are not counted against pre_rwwc
1320 (type == PTH_RW_TYPE_READ && _ksyn_handle_overlap(kwq, lgenval, rw_wc, retval)) ||
1321 _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) {
1322 ksyn_wqunlock(kwq);
1323 } else {
1324 block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
1325 kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
1326 error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL, block_hint);
1327 // ksyn_wait drops wait queue lock
1328 if (error == 0) {
1329 uthread_t uth = current_uthread();
1330 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1331 *retval = kwe->kwe_psynchretval;
1332 }
1333 }
1334 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
1335 }
1336 return error;
1337 }
1338
1339 /*
1340 * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
1341 */
1342 int
1343 _psynch_rw_rdlock(__unused proc_t p,
1344 user_addr_t rwlock,
1345 uint32_t lgenval,
1346 uint32_t ugenval,
1347 uint32_t rw_wc,
1348 int flags,
1349 uint32_t *retval)
1350 {
1351 return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, flags, retval);
1352 }
1353
1354 /*
1355 * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
1356 */
1357 int
1358 _psynch_rw_longrdlock(__unused proc_t p,
1359 __unused user_addr_t rwlock,
1360 __unused uint32_t lgenval,
1361 __unused uint32_t ugenval,
1362 __unused uint32_t rw_wc,
1363 __unused int flags,
1364 __unused uint32_t *retval)
1365 {
1366 return ESRCH;
1367 }
1368
1369
1370 /*
1371 * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
1372 */
1373 int
1374 _psynch_rw_wrlock(__unused proc_t p,
1375 user_addr_t rwlock,
1376 uint32_t lgenval,
1377 uint32_t ugenval,
1378 uint32_t rw_wc,
1379 int flags,
1380 uint32_t *retval)
1381 {
1382 return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, rw_wc, flags, retval);
1383 }
1384
1385 /*
1386 * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
1387 */
1388 int
1389 _psynch_rw_yieldwrlock(__unused proc_t p,
1390 __unused user_addr_t rwlock,
1391 __unused uint32_t lgenval,
1392 __unused uint32_t ugenval,
1393 __unused uint32_t rw_wc,
1394 __unused int flags,
1395 __unused uint32_t *retval)
1396 {
1397 return ESRCH;
1398 }
1399
1400 /*
1401 * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate
1402 * reader/writer variety lock.
1403 */
1404 int
1405 _psynch_rw_unlock(__unused proc_t p,
1406 user_addr_t rwlock,
1407 uint32_t lgenval,
1408 uint32_t ugenval,
1409 uint32_t rw_wc,
1410 int flags,
1411 uint32_t *retval)
1412 {
1413 int error = 0;
1414 ksyn_wait_queue_t kwq;
1415 uint32_t updatebits = 0;
1416 int diff;
1417 uint32_t count = 0;
1418 uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
1419 int clearedkflags = 0;
1420
1421 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
1422 if (error != 0) {
1423 return(error);
1424 }
1425
1426 ksyn_wqlock(kwq);
1427 int isinit = _ksyn_check_init(kwq, lgenval);
1428
1429 /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
1430 if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
1431 error = 0;
1432 goto out;
1433 }
1434
1435 /* If L-U != num of waiters, then it needs to be preposted or spr */
1436 diff = find_diff(lgenval, ugenval);
1437
1438 if (find_seq_till(kwq, curgen, diff, &count) == 0) {
1439 if ((count == 0) || (count < (uint32_t)diff))
1440 goto prepost;
1441 }
1442
1443 /* no prepost and all threads are in place, reset the bit */
1444 if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
1445 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
1446 clearedkflags = 1;
1447 }
1448
1449 /* can handle unlock now */
1450
1451 CLEAR_PREPOST_BITS(kwq);
1452
1453 error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
1454 #if __TESTPANICS__
1455 if (error != 0)
1456 panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error);
1457 #endif /* __TESTPANICS__ */
1458 out:
1459 if (error == 0) {
1460 /* update bits?? */
1461 *retval = updatebits;
1462 }
1463
1464 // <rdar://problem/22244050> If any of the wakeups failed because they already
1465 // returned to userspace because of a signal then we need to ensure that the
1466 // reset state is not cleared when that thread returns. Otherwise,
1467 // _pthread_rwlock_lock will clear the interrupted state before it is read.
1468 if (clearedkflags != 0 && kwq->kw_pre_intrcount > 0) {
1469 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
1470 }
1471
1472 ksyn_wqunlock(kwq);
1473 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
1474
1475 return(error);
1476
1477 prepost:
1478 /* update if the new seq is higher than prev prepost, or first set */
1479 if (is_rws_setseq(kwq->kw_pre_sseq) ||
1480 is_seqhigher_eq(rw_wc, kwq->kw_pre_sseq)) {
1481 kwq->kw_pre_rwwc = (diff - count);
1482 kwq->kw_pre_lockseq = curgen;
1483 kwq->kw_pre_sseq = rw_wc;
1484 updatebits = lgenval; /* let this not do unlock handling */
1485 }
1486 error = 0;
1487 goto out;
1488 }
1489
1490
1491 /* ************************************************************************** */
1492 void
1493 pth_global_hashinit(void)
1494 {
1495 pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash);
1496 }
1497
1498 void
1499 _pth_proc_hashinit(proc_t p)
1500 {
1501 void *ptr = hashinit(PTH_HASHSIZE, M_PCB, &pthhash);
1502 if (ptr == NULL) {
1503 panic("pth_proc_hashinit: hash init returned 0\n");
1504 }
1505
1506 pthread_kern->proc_set_pthhash(p, ptr);
1507 }
1508
1509
1510 static int
1511 ksyn_wq_hash_lookup(user_addr_t uaddr,
1512 proc_t p,
1513 int flags,
1514 ksyn_wait_queue_t *out_kwq,
1515 struct pthhashhead **out_hashptr,
1516 uint64_t *out_object,
1517 uint64_t *out_offset)
1518 {
1519 int res = 0;
1520 ksyn_wait_queue_t kwq;
1521 uint64_t object = 0, offset = 0;
1522 struct pthhashhead *hashptr;
1523 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1524 hashptr = pth_glob_hashtbl;
1525 res = ksyn_findobj(uaddr, &object, &offset);
1526 if (res == 0) {
1527 LIST_FOREACH(kwq, &hashptr[object & pthhash], kw_hash) {
1528 if (kwq->kw_object == object && kwq->kw_offset == offset) {
1529 break;
1530 }
1531 }
1532 } else {
1533 kwq = NULL;
1534 }
1535 } else {
1536 hashptr = pthread_kern->proc_get_pthhash(p);
1537 LIST_FOREACH(kwq, &hashptr[uaddr & pthhash], kw_hash) {
1538 if (kwq->kw_addr == uaddr) {
1539 break;
1540 }
1541 }
1542 }
1543 *out_kwq = kwq;
1544 *out_object = object;
1545 *out_offset = offset;
1546 *out_hashptr = hashptr;
1547 return res;
1548 }
1549
1550 void
1551 _pth_proc_hashdelete(proc_t p)
1552 {
1553 struct pthhashhead * hashptr;
1554 ksyn_wait_queue_t kwq;
1555 unsigned long hashsize = pthhash + 1;
1556 unsigned long i;
1557
1558 hashptr = pthread_kern->proc_get_pthhash(p);
1559 pthread_kern->proc_set_pthhash(p, NULL);
1560 if (hashptr == NULL) {
1561 return;
1562 }
1563
1564 pthread_list_lock();
1565 for(i= 0; i < hashsize; i++) {
1566 while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) {
1567 if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) {
1568 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1569 LIST_REMOVE(kwq, kw_hash);
1570 }
1571 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1572 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1573 LIST_REMOVE(kwq, kw_list);
1574 }
1575 pthread_list_unlock();
1576 /* release fake entries if present for cvars */
1577 if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
1578 ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
1579 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
1580 pthread_kern->zfree(kwq_zone, kwq);
1581 pthread_list_lock();
1582 }
1583 }
1584 pthread_list_unlock();
1585 FREE(hashptr, M_PROC);
1586 }
1587
1588 /* no lock held for this as the waitqueue is getting freed */
1589 void
1590 ksyn_freeallkwe(ksyn_queue_t kq)
1591 {
1592 ksyn_waitq_element_t kwe;
1593 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
1594 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
1595 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1596 pthread_kern->zfree(kwe_zone, kwe);
1597 }
1598 }
1599 }
1600
1601 /* find kernel waitqueue, if not present create one. Grants a reference */
1602 int
1603 ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int flags, int wqtype, ksyn_wait_queue_t *kwqp)
1604 {
1605 int res = 0;
1606 ksyn_wait_queue_t kwq = NULL;
1607 ksyn_wait_queue_t nkwq = NULL;
1608 struct pthhashhead *hashptr;
1609 proc_t p = current_proc();
1610
1611 uint64_t object = 0, offset = 0;
1612 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1613 res = ksyn_findobj(uaddr, &object, &offset);
1614 hashptr = pth_glob_hashtbl;
1615 } else {
1616 hashptr = pthread_kern->proc_get_pthhash(p);
1617 }
1618
1619 while (res == 0) {
1620 pthread_list_lock();
1621 res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, &object, &offset);
1622 if (res != 0) {
1623 pthread_list_unlock();
1624 break;
1625 }
1626 if (kwq == NULL && nkwq == NULL) {
1627 // Drop the lock to allocate a new kwq and retry.
1628 pthread_list_unlock();
1629
1630 nkwq = (ksyn_wait_queue_t)pthread_kern->zalloc(kwq_zone);
1631 bzero(nkwq, sizeof(struct ksyn_wait_queue));
1632 int i;
1633 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
1634 ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
1635 }
1636 lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
1637 continue;
1638 } else if (kwq == NULL && nkwq != NULL) {
1639 // Still not found, add the new kwq to the hash.
1640 kwq = nkwq;
1641 nkwq = NULL; // Don't free.
1642 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1643 kwq->kw_pflags |= KSYN_WQ_SHARED;
1644 LIST_INSERT_HEAD(&hashptr[object & pthhash], kwq, kw_hash);
1645 } else {
1646 LIST_INSERT_HEAD(&hashptr[uaddr & pthhash], kwq, kw_hash);
1647 }
1648 kwq->kw_pflags |= KSYN_WQ_INHASH;
1649 } else if (kwq != NULL) {
1650 // Found an existing kwq, use it.
1651 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1652 LIST_REMOVE(kwq, kw_list);
1653 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1654 }
1655 if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
1656 if (kwq->kw_inqueue == 0 && kwq->kw_pre_rwwc == 0 && kwq->kw_pre_intrcount == 0) {
1657 if (kwq->kw_iocount == 0) {
1658 kwq->kw_type = 0; // mark for reinitialization
1659 } else if (kwq->kw_iocount == 1 && kwq->kw_dropcount == kwq->kw_iocount) {
1660 /* if all users are unlockers then wait for it to finish */
1661 kwq->kw_pflags |= KSYN_WQ_WAITING;
1662 // Drop the lock and wait for the kwq to be free.
1663 (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
1664 continue;
1665 } else {
1666 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
1667 res = EINVAL;
1668 }
1669 } else {
1670 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
1671 res = EINVAL;
1672 }
1673 }
1674 }
1675 if (res == 0) {
1676 if (kwq->kw_type == 0) {
1677 kwq->kw_addr = uaddr;
1678 kwq->kw_object = object;
1679 kwq->kw_offset = offset;
1680 kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
1681 CLEAR_REINIT_BITS(kwq);
1682 kwq->kw_lword = mgen;
1683 kwq->kw_uword = ugen;
1684 kwq->kw_sword = sgen;
1685 kwq->kw_owner = 0;
1686 kwq->kw_kflags = 0;
1687 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
1688 }
1689 kwq->kw_iocount++;
1690 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1691 kwq->kw_dropcount++;
1692 }
1693 }
1694 pthread_list_unlock();
1695 break;
1696 }
1697 if (kwqp != NULL) {
1698 *kwqp = kwq;
1699 }
1700 if (nkwq) {
1701 lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
1702 pthread_kern->zfree(kwq_zone, nkwq);
1703 }
1704 return res;
1705 }
1706
1707 /* Reference from find is dropped here. Starts the free process if needed */
1708 void
1709 ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
1710 {
1711 uint64_t deadline;
1712 ksyn_wait_queue_t free_elem = NULL;
1713
1714 pthread_list_lock();
1715 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1716 kwq->kw_dropcount--;
1717 }
1718 if (--kwq->kw_iocount == 0) {
1719 if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
1720 /* some one is waiting for the waitqueue, wake them up */
1721 kwq->kw_pflags &= ~KSYN_WQ_WAITING;
1722 wakeup(&kwq->kw_pflags);
1723 }
1724
1725 if (kwq->kw_pre_rwwc == 0 && kwq->kw_inqueue == 0 && kwq->kw_pre_intrcount == 0) {
1726 if (qfreenow == 0) {
1727 microuptime(&kwq->kw_ts);
1728 LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
1729 kwq->kw_pflags |= KSYN_WQ_FLIST;
1730 if (psynch_cleanupset == 0) {
1731 struct timeval t;
1732 microuptime(&t);
1733 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1734 deadline = tvtoabstime(&t);
1735 thread_call_enter_delayed(psynch_thcall, deadline);
1736 psynch_cleanupset = 1;
1737 }
1738 } else {
1739 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1740 LIST_REMOVE(kwq, kw_hash);
1741 free_elem = kwq;
1742 }
1743 }
1744 }
1745 pthread_list_unlock();
1746 if (free_elem != NULL) {
1747 lck_mtx_destroy(&free_elem->kw_lock, pthread_lck_grp);
1748 pthread_kern->zfree(kwq_zone, free_elem);
1749 }
1750 }
1751
1752 /* responsible to free the waitqueues */
1753 void
1754 psynch_wq_cleanup(__unused void *param, __unused void * param1)
1755 {
1756 ksyn_wait_queue_t kwq;
1757 struct timeval t;
1758 int reschedule = 0;
1759 uint64_t deadline = 0;
1760 LIST_HEAD(, ksyn_wait_queue) freelist;
1761 LIST_INIT(&freelist);
1762
1763 pthread_list_lock();
1764
1765 microuptime(&t);
1766
1767 LIST_FOREACH(kwq, &pth_free_list, kw_list) {
1768 if (kwq->kw_iocount != 0 || kwq->kw_pre_rwwc != 0 || kwq->kw_inqueue != 0 || kwq->kw_pre_intrcount != 0) {
1769 // still in use
1770 continue;
1771 }
1772 __darwin_time_t diff = t.tv_sec - kwq->kw_ts.tv_sec;
1773 if (diff < 0)
1774 diff *= -1;
1775 if (diff >= KSYN_CLEANUP_DEADLINE) {
1776 kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
1777 LIST_REMOVE(kwq, kw_hash);
1778 LIST_REMOVE(kwq, kw_list);
1779 LIST_INSERT_HEAD(&freelist, kwq, kw_list);
1780 } else {
1781 reschedule = 1;
1782 }
1783
1784 }
1785 if (reschedule != 0) {
1786 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1787 deadline = tvtoabstime(&t);
1788 thread_call_enter_delayed(psynch_thcall, deadline);
1789 psynch_cleanupset = 1;
1790 } else {
1791 psynch_cleanupset = 0;
1792 }
1793 pthread_list_unlock();
1794
1795 while ((kwq = LIST_FIRST(&freelist)) != NULL) {
1796 LIST_REMOVE(kwq, kw_list);
1797 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
1798 pthread_kern->zfree(kwq_zone, kwq);
1799 }
1800 }
1801
1802 static int
1803 _wait_result_to_errno(wait_result_t result)
1804 {
1805 int res = 0;
1806 switch (result) {
1807 case THREAD_TIMED_OUT:
1808 res = ETIMEDOUT;
1809 break;
1810 case THREAD_INTERRUPTED:
1811 res = EINTR;
1812 break;
1813 }
1814 return res;
1815 }
1816
1817 int
1818 ksyn_wait(ksyn_wait_queue_t kwq,
1819 int kqi,
1820 uint32_t lockseq,
1821 int fit,
1822 uint64_t abstime,
1823 thread_continue_t continuation,
1824 block_hint_t block_hint)
1825 {
1826 int res;
1827
1828 thread_t th = current_thread();
1829 uthread_t uth = pthread_kern->get_bsdthread_info(th);
1830 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1831 bzero(kwe, sizeof(*kwe));
1832 kwe->kwe_count = 1;
1833 kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
1834 kwe->kwe_state = KWE_THREAD_INWAIT;
1835 kwe->kwe_uth = uth;
1836 kwe->kwe_tid = thread_tid(th);
1837
1838 res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
1839 if (res != 0) {
1840 //panic("psynch_rw_wrlock: failed to enqueue\n"); // XXX
1841 ksyn_wqunlock(kwq);
1842 return res;
1843 }
1844
1845 thread_set_pending_block_hint(th, block_hint);
1846 assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0);
1847 ksyn_wqunlock(kwq);
1848
1849 kern_return_t ret;
1850 if (continuation == THREAD_CONTINUE_NULL) {
1851 ret = thread_block(NULL);
1852 } else {
1853 ret = thread_block_parameter(continuation, kwq);
1854
1855 // If thread_block_parameter returns (interrupted) call the
1856 // continuation manually to clean up.
1857 continuation(kwq, ret);
1858
1859 // NOT REACHED
1860 panic("ksyn_wait continuation returned");
1861 }
1862
1863 res = _wait_result_to_errno(ret);
1864 if (res != 0) {
1865 ksyn_wqlock(kwq);
1866 if (kwe->kwe_kwqqueue) {
1867 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1868 }
1869 ksyn_wqunlock(kwq);
1870 }
1871 return res;
1872 }
1873
1874 kern_return_t
1875 ksyn_signal(ksyn_wait_queue_t kwq,
1876 int kqi,
1877 ksyn_waitq_element_t kwe,
1878 uint32_t updateval)
1879 {
1880 kern_return_t ret;
1881
1882 // If no wait element was specified, wake the first.
1883 if (!kwe) {
1884 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[kqi].ksynq_kwelist);
1885 if (!kwe) {
1886 panic("ksyn_signal: panic signaling empty queue");
1887 }
1888 }
1889
1890 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1891 panic("ksyn_signal: panic signaling non-waiting element");
1892 }
1893
1894 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1895 kwe->kwe_psynchretval = updateval;
1896
1897 ret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval);
1898 if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
1899 panic("ksyn_signal: panic waking up thread %x\n", ret);
1900 }
1901 return ret;
1902 }
1903
1904 int
1905 ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
1906 {
1907 kern_return_t ret;
1908 vm_page_info_basic_data_t info;
1909 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
1910 ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
1911 if (ret != KERN_SUCCESS) {
1912 return EINVAL;
1913 }
1914
1915 if (objectp != NULL) {
1916 *objectp = (uint64_t)info.object_id;
1917 }
1918 if (offsetp != NULL) {
1919 *offsetp = (uint64_t)info.offset;
1920 }
1921
1922 return(0);
1923 }
1924
1925
1926 /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
1927 int
1928 kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *typep, uint32_t lowest[])
1929 {
1930 uint32_t kw_fr, kw_fwr, low;
1931 int type = 0, lowtype, typenum[2] = { 0 };
1932 uint32_t numbers[2] = { 0 };
1933 int count = 0, i;
1934
1935
1936 if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
1937 type |= PTH_RWSHFT_TYPE_READ;
1938 /* read entries are present */
1939 if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
1940 kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
1941 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0))
1942 kw_fr = premgen;
1943 } else
1944 kw_fr = premgen;
1945
1946 lowest[KSYN_QUEUE_READ] = kw_fr;
1947 numbers[count]= kw_fr;
1948 typenum[count] = PTH_RW_TYPE_READ;
1949 count++;
1950 } else
1951 lowest[KSYN_QUEUE_READ] = 0;
1952
1953 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
1954 type |= PTH_RWSHFT_TYPE_WRITE;
1955 /* read entries are present */
1956 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
1957 kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
1958 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0))
1959 kw_fwr = premgen;
1960 } else
1961 kw_fwr = premgen;
1962
1963 lowest[KSYN_QUEUE_WRITER] = kw_fwr;
1964 numbers[count]= kw_fwr;
1965 typenum[count] = PTH_RW_TYPE_WRITE;
1966 count++;
1967 } else
1968 lowest[KSYN_QUEUE_WRITER] = 0;
1969
1970 #if __TESTPANICS__
1971 if (count == 0)
1972 panic("nothing in the queue???\n");
1973 #endif /* __TESTPANICS__ */
1974
1975 low = numbers[0];
1976 lowtype = typenum[0];
1977 if (count > 1) {
1978 for (i = 1; i< count; i++) {
1979 if (is_seqlower(numbers[i] , low) != 0) {
1980 low = numbers[i];
1981 lowtype = typenum[i];
1982 }
1983 }
1984 }
1985 type |= lowtype;
1986
1987 if (typep != 0)
1988 *typep = type;
1989 return(0);
1990 }
1991
1992 /* wakeup readers to upto the writer limits */
1993 int
1994 ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp)
1995 {
1996 ksyn_queue_t kq;
1997 int failedwakeup = 0;
1998 int numwoken = 0;
1999 kern_return_t kret = KERN_SUCCESS;
2000 uint32_t lbits = 0;
2001
2002 lbits = updatebits;
2003
2004 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2005 while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
2006 kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
2007 if (kret == KERN_NOT_WAITING) {
2008 failedwakeup++;
2009 }
2010 numwoken++;
2011 }
2012
2013 if (wokenp != NULL)
2014 *wokenp = numwoken;
2015 return(failedwakeup);
2016 }
2017
2018
2019 /* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */
2020 int
2021 kwq_handle_unlock(ksyn_wait_queue_t kwq,
2022 __unused uint32_t mgen,
2023 uint32_t rw_wc,
2024 uint32_t *updatep,
2025 int flags,
2026 int *blockp,
2027 uint32_t premgen)
2028 {
2029 uint32_t low_writer, limitrdnum;
2030 int rwtype, error=0;
2031 int allreaders, failed;
2032 uint32_t updatebits=0, numneeded = 0;;
2033 int prepost = flags & KW_UNLOCK_PREPOST;
2034 thread_t preth = THREAD_NULL;
2035 ksyn_waitq_element_t kwe;
2036 uthread_t uth;
2037 thread_t th;
2038 int woken = 0;
2039 int block = 1;
2040 uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
2041 kern_return_t kret = KERN_SUCCESS;
2042 ksyn_queue_t kq;
2043 int curthreturns = 0;
2044
2045 if (prepost != 0) {
2046 preth = current_thread();
2047 }
2048
2049 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2050 kwq->kw_lastseqword = rw_wc;
2051 kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
2052 kwq->kw_overlapwatch = 0;
2053
2054 error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
2055 #if __TESTPANICS__
2056 if (error != 0)
2057 panic("rwunlock: cannot fails to slot next round of threads");
2058 #endif /* __TESTPANICS__ */
2059
2060 low_writer = lowest[KSYN_QUEUE_WRITER];
2061
2062 allreaders = 0;
2063 updatebits = 0;
2064
2065 switch (rwtype & PTH_RW_TYPE_MASK) {
2066 case PTH_RW_TYPE_READ: {
2067 // XXX
2068 /* what about the preflight which is LREAD or READ ?? */
2069 if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
2070 if (rwtype & PTH_RWSHFT_TYPE_WRITE) {
2071 updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
2072 }
2073 }
2074 limitrdnum = 0;
2075 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2076 limitrdnum = low_writer;
2077 } else {
2078 allreaders = 1;
2079 }
2080
2081 numneeded = 0;
2082
2083 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2084 limitrdnum = low_writer;
2085 numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
2086 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
2087 curthreturns = 1;
2088 numneeded += 1;
2089 }
2090 } else {
2091 // no writers at all
2092 // no other waiters only readers
2093 kwq->kw_overlapwatch = 1;
2094 numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
2095 if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
2096 curthreturns = 1;
2097 numneeded += 1;
2098 }
2099 }
2100
2101 updatebits += (numneeded << PTHRW_COUNT_SHIFT);
2102
2103 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2104
2105 if (curthreturns != 0) {
2106 block = 0;
2107 uth = current_uthread();
2108 kwe = pthread_kern->uthread_get_uukwe(uth);
2109 kwe->kwe_psynchretval = updatebits;
2110 }
2111
2112
2113 failed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, updatebits, &woken);
2114 if (failed != 0) {
2115 kwq->kw_pre_intrcount = failed; /* actually a count */
2116 kwq->kw_pre_intrseq = limitrdnum;
2117 kwq->kw_pre_intrretbits = updatebits;
2118 kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
2119 }
2120
2121 error = 0;
2122
2123 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0))
2124 panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
2125 }
2126 break;
2127
2128 case PTH_RW_TYPE_WRITE: {
2129
2130 /* only one thread is goin to be granted */
2131 updatebits |= (PTHRW_INC);
2132 updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
2133
2134 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
2135 block = 0;
2136 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
2137 updatebits |= PTH_RWL_WBIT;
2138 }
2139 th = preth;
2140 uth = pthread_kern->get_bsdthread_info(th);
2141 kwe = pthread_kern->uthread_get_uukwe(uth);
2142 kwe->kwe_psynchretval = updatebits;
2143 } else {
2144 /* we are not granting writelock to the preposting thread */
2145 /* if there are writers present or the preposting write thread then W bit is to be set */
2146 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 1 ||
2147 (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
2148 updatebits |= PTH_RWL_WBIT;
2149 }
2150 /* setup next in the queue */
2151 kret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, NULL, updatebits);
2152 if (kret == KERN_NOT_WAITING) {
2153 kwq->kw_pre_intrcount = 1; /* actually a count */
2154 kwq->kw_pre_intrseq = low_writer;
2155 kwq->kw_pre_intrretbits = updatebits;
2156 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
2157 }
2158 error = 0;
2159 }
2160 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2161 if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT))
2162 panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
2163 }
2164 break;
2165
2166 default:
2167 panic("rwunlock: invalid type for lock grants");
2168
2169 };
2170
2171 if (updatep != NULL)
2172 *updatep = updatebits;
2173 if (blockp != NULL)
2174 *blockp = block;
2175 return(error);
2176 }
2177
2178 /************* Indiv queue support routines ************************/
2179 void
2180 ksyn_queue_init(ksyn_queue_t kq)
2181 {
2182 TAILQ_INIT(&kq->ksynq_kwelist);
2183 kq->ksynq_count = 0;
2184 kq->ksynq_firstnum = 0;
2185 kq->ksynq_lastnum = 0;
2186 }
2187
2188 int
2189 ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int fit)
2190 {
2191 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2192 uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
2193 int res = 0;
2194
2195 if (kwe->kwe_kwqqueue != NULL) {
2196 panic("adding enqueued item to another queue");
2197 }
2198
2199 if (kq->ksynq_count == 0) {
2200 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2201 kq->ksynq_firstnum = lockseq;
2202 kq->ksynq_lastnum = lockseq;
2203 } else if (fit == FIRSTFIT) {
2204 /* TBD: if retry bit is set for mutex, add it to the head */
2205 /* firstfit, arriving order */
2206 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2207 if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2208 kq->ksynq_firstnum = lockseq;
2209 }
2210 if (is_seqhigher(lockseq, kq->ksynq_lastnum)) {
2211 kq->ksynq_lastnum = lockseq;
2212 }
2213 } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
2214 /* During prepost when a thread is getting cancelled, we could have two with same seq */
2215 res = EBUSY;
2216 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
2217 ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
2218 if (tmp != NULL && tmp->kwe_uth != NULL && pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
2219 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2220 res = 0;
2221 }
2222 }
2223 } else if (is_seqlower(kq->ksynq_lastnum, lockseq)) { // XXX is_seqhigher
2224 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2225 kq->ksynq_lastnum = lockseq;
2226 } else if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2227 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2228 kq->ksynq_firstnum = lockseq;
2229 } else {
2230 ksyn_waitq_element_t q_kwe, r_kwe;
2231
2232 res = ESRCH;
2233 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2234 if (is_seqhigher(q_kwe->kwe_lockseq, lockseq)) {
2235 TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list);
2236 res = 0;
2237 break;
2238 }
2239 }
2240 }
2241
2242 if (res == 0) {
2243 kwe->kwe_kwqqueue = kwq;
2244 kq->ksynq_count++;
2245 kwq->kw_inqueue++;
2246 update_low_high(kwq, lockseq);
2247 }
2248 return res;
2249 }
2250
2251 void
2252 ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe)
2253 {
2254 if (kq->ksynq_count == 0) {
2255 panic("removing item from empty queue");
2256 }
2257
2258 if (kwe->kwe_kwqqueue != kwq) {
2259 panic("removing item from wrong queue");
2260 }
2261
2262 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
2263 kwe->kwe_list.tqe_next = NULL;
2264 kwe->kwe_list.tqe_prev = NULL;
2265 kwe->kwe_kwqqueue = NULL;
2266
2267 if (--kq->ksynq_count > 0) {
2268 ksyn_waitq_element_t tmp;
2269 tmp = TAILQ_FIRST(&kq->ksynq_kwelist);
2270 kq->ksynq_firstnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2271 tmp = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head);
2272 kq->ksynq_lastnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2273 } else {
2274 kq->ksynq_firstnum = 0;
2275 kq->ksynq_lastnum = 0;
2276 }
2277
2278 if (--kwq->kw_inqueue > 0) {
2279 uint32_t curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
2280 if (kwq->kw_lowseq == curseq) {
2281 kwq->kw_lowseq = find_nextlowseq(kwq);
2282 }
2283 if (kwq->kw_highseq == curseq) {
2284 kwq->kw_highseq = find_nexthighseq(kwq);
2285 }
2286 } else {
2287 kwq->kw_lowseq = 0;
2288 kwq->kw_highseq = 0;
2289 }
2290 }
2291
2292 ksyn_waitq_element_t
2293 ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq)
2294 {
2295 ksyn_waitq_element_t kwe;
2296
2297 // XXX: should stop searching when higher sequence number is seen
2298 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2299 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) {
2300 return kwe;
2301 }
2302 }
2303 return NULL;
2304 }
2305
2306 /* find the thread at the target sequence (or a broadcast/prepost at or above) */
2307 ksyn_waitq_element_t
2308 ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
2309 {
2310 ksyn_waitq_element_t result = NULL;
2311 ksyn_waitq_element_t kwe;
2312 uint32_t lgen = (cgen & PTHRW_COUNT_MASK);
2313
2314 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2315 if (is_seqhigher_eq(kwe->kwe_lockseq, cgen)) {
2316 result = kwe;
2317
2318 // KWE_THREAD_INWAIT must be strictly equal
2319 if (kwe->kwe_state == KWE_THREAD_INWAIT && (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
2320 result = NULL;
2321 }
2322 break;
2323 }
2324 }
2325 return result;
2326 }
2327
2328 /* look for a thread at lockseq, a */
2329 ksyn_waitq_element_t
2330 ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq)
2331 {
2332 ksyn_waitq_element_t result = NULL;
2333 ksyn_waitq_element_t q_kwe, r_kwe;
2334
2335 // XXX
2336 /* case where wrap in the tail of the queue exists */
2337 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2338 if (q_kwe->kwe_state == KWE_THREAD_PREPOST) {
2339 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2340 return result;
2341 }
2342 }
2343 if (q_kwe->kwe_state == KWE_THREAD_PREPOST || q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
2344 /* match any prepost at our same uptoseq or any broadcast above */
2345 if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
2346 continue;
2347 }
2348 return q_kwe;
2349 } else if (q_kwe->kwe_state == KWE_THREAD_INWAIT) {
2350 /*
2351 * Match any (non-cancelled) thread at or below our upto sequence -
2352 * but prefer an exact match to our signal sequence (if present) to
2353 * keep exact matches happening.
2354 */
2355 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2356 return result;
2357 }
2358 if (q_kwe->kwe_kwqqueue == kwq) {
2359 if (!pthread_kern->uthread_is_cancelled(q_kwe->kwe_uth)) {
2360 /* if equal or higher than our signal sequence, return this one */
2361 if (is_seqhigher_eq(q_kwe->kwe_lockseq, signalseq)) {
2362 return q_kwe;
2363 }
2364
2365 /* otherwise, just remember this eligible thread and move on */
2366 if (result == NULL) {
2367 result = q_kwe;
2368 }
2369 }
2370 }
2371 } else {
2372 panic("ksyn_queue_find_signalseq(): unknown wait queue element type (%d)\n", q_kwe->kwe_state);
2373 }
2374 }
2375 return result;
2376 }
2377
2378 void
2379 ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
2380 {
2381 ksyn_waitq_element_t kwe;
2382 uint32_t tseq = upto & PTHRW_COUNT_MASK;
2383 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2384
2385 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
2386 if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
2387 break;
2388 }
2389 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2390 /*
2391 * This scenario is typically noticed when the cvar is
2392 * reinited and the new waiters are waiting. We can
2393 * return them as spurious wait so the cvar state gets
2394 * reset correctly.
2395 */
2396
2397 /* skip canceled ones */
2398 /* wake the rest */
2399 /* set M bit to indicate to waking CV to retun Inc val */
2400 (void)ksyn_signal(kwq, kqi, kwe, PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
2401 } else {
2402 ksyn_queue_remove_item(kwq, kq, kwe);
2403 pthread_kern->zfree(kwe_zone, kwe);
2404 kwq->kw_fakecount--;
2405 }
2406 }
2407 }
2408
2409 /*************************************************************************/
2410
2411 void
2412 update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
2413 {
2414 if (kwq->kw_inqueue == 1) {
2415 kwq->kw_lowseq = lockseq;
2416 kwq->kw_highseq = lockseq;
2417 } else {
2418 if (is_seqlower(lockseq, kwq->kw_lowseq)) {
2419 kwq->kw_lowseq = lockseq;
2420 }
2421 if (is_seqhigher(lockseq, kwq->kw_highseq)) {
2422 kwq->kw_highseq = lockseq;
2423 }
2424 }
2425 }
2426
2427 uint32_t
2428 find_nextlowseq(ksyn_wait_queue_t kwq)
2429 {
2430 uint32_t lowest = 0;
2431 int first = 1;
2432 int i;
2433
2434 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2435 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2436 uint32_t current = kwq->kw_ksynqueues[i].ksynq_firstnum;
2437 if (first || is_seqlower(current, lowest)) {
2438 lowest = current;
2439 first = 0;
2440 }
2441 }
2442 }
2443
2444 return lowest;
2445 }
2446
2447 uint32_t
2448 find_nexthighseq(ksyn_wait_queue_t kwq)
2449 {
2450 uint32_t highest = 0;
2451 int first = 1;
2452 int i;
2453
2454 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2455 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2456 uint32_t current = kwq->kw_ksynqueues[i].ksynq_lastnum;
2457 if (first || is_seqhigher(current, highest)) {
2458 highest = current;
2459 first = 0;
2460 }
2461 }
2462 }
2463
2464 return highest;
2465 }
2466
2467 int
2468 find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp)
2469 {
2470 int i;
2471 uint32_t count = 0;
2472
2473 for (i = 0; i< KSYN_QUEUE_MAX; i++) {
2474 count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto);
2475 if (count >= nwaiters) {
2476 break;
2477 }
2478 }
2479
2480 if (countp != NULL) {
2481 *countp = count;
2482 }
2483
2484 if (count == 0) {
2485 return 0;
2486 } else if (count >= nwaiters) {
2487 return 1;
2488 } else {
2489 return 0;
2490 }
2491 }
2492
2493
2494 uint32_t
2495 ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
2496 {
2497 uint32_t i = 0;
2498 ksyn_waitq_element_t kwe, newkwe;
2499
2500 if (kq->ksynq_count == 0 || is_seqhigher(kq->ksynq_firstnum, upto)) {
2501 return 0;
2502 }
2503 if (upto == kq->ksynq_firstnum) {
2504 return 1;
2505 }
2506 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2507 uint32_t curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK);
2508 if (is_seqhigher(curval, upto)) {
2509 break;
2510 }
2511 ++i;
2512 if (upto == curval) {
2513 break;
2514 }
2515 }
2516 return i;
2517 }
2518
2519 /* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */
2520 void
2521 ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
2522 {
2523 ksyn_waitq_element_t kwe, newkwe;
2524 uint32_t updatebits = 0;
2525 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
2526
2527 struct ksyn_queue kfreeq;
2528 ksyn_queue_init(&kfreeq);
2529
2530 retry:
2531 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2532 if (is_seqhigher(kwe->kwe_lockseq, upto)) {
2533 // outside our range
2534 break;
2535 }
2536
2537 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2538 // Wake only non-canceled threads waiting on this CV.
2539 if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
2540 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
2541 updatebits += PTHRW_INC;
2542 }
2543 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
2544 kwe->kwe_state == KWE_THREAD_PREPOST) {
2545 ksyn_queue_remove_item(ckwq, kq, kwe);
2546 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2547 ckwq->kw_fakecount--;
2548 } else {
2549 panic("unknown kwe state\n");
2550 }
2551 }
2552
2553 /* Need to enter a broadcast in the queue (if not already at L == S) */
2554
2555 if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
2556 newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
2557 if (newkwe == NULL) {
2558 ksyn_wqunlock(ckwq);
2559 newkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
2560 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2561 ksyn_wqlock(ckwq);
2562 goto retry;
2563 } else {
2564 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2565 ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
2566 }
2567 }
2568
2569 // free up any remaining things stumbled across above
2570 while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
2571 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2572 pthread_kern->zfree(kwe_zone, kwe);
2573 }
2574
2575 if (updatep != NULL) {
2576 *updatep = updatebits;
2577 }
2578 }
2579
2580 void
2581 ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
2582 {
2583 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
2584 if (ckwq->kw_inqueue != 0) {
2585 /* FREE THE QUEUE */
2586 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 0);
2587 #if __TESTPANICS__
2588 if (ckwq->kw_inqueue != 0)
2589 panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
2590 #endif /* __TESTPANICS__ */
2591 }
2592 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
2593 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
2594 *updatebits |= PTH_RWS_CV_CBIT;
2595 } else if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
2596 // only fake entries are present in the queue
2597 *updatebits |= PTH_RWS_CV_PBIT;
2598 }
2599 }
2600
2601 void
2602 psynch_zoneinit(void)
2603 {
2604 kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
2605 kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
2606 }
2607
2608 void *
2609 _pthread_get_thread_kwq(thread_t thread)
2610 {
2611 assert(thread);
2612 struct uthread * uthread = pthread_kern->get_bsdthread_info(thread);
2613 assert(uthread);
2614 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uthread);
2615 assert(kwe);
2616 ksyn_wait_queue_t kwq = kwe->kwe_kwqqueue;
2617 return kwq;
2618 }
2619
2620 /* This function is used by stackshot to determine why a thread is blocked, and report
2621 * who owns the object that the thread is blocked on. It should *only* be called if the
2622 * `block_hint' field in the relevant thread's struct is populated with something related
2623 * to pthread sync objects.
2624 */
2625 void
2626 _pthread_find_owner(thread_t thread, struct stackshot_thread_waitinfo * waitinfo)
2627 {
2628 ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread);
2629 switch (waitinfo->wait_type) {
2630 case kThreadWaitPThreadMutex:
2631 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX);
2632 waitinfo->owner = kwq->kw_owner;
2633 waitinfo->context = kwq->kw_addr;
2634 break;
2635 /* Owner of rwlock not stored in kernel space due to races. Punt
2636 * and hope that the userspace address is helpful enough. */
2637 case kThreadWaitPThreadRWLockRead:
2638 case kThreadWaitPThreadRWLockWrite:
2639 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK);
2640 waitinfo->owner = 0;
2641 waitinfo->context = kwq->kw_addr;
2642 break;
2643 /* Condvars don't have owners, so just give the userspace address. */
2644 case kThreadWaitPThreadCondVar:
2645 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
2646 waitinfo->owner = 0;
2647 waitinfo->context = kwq->kw_addr;
2648 break;
2649 case kThreadWaitNone:
2650 default:
2651 waitinfo->owner = 0;
2652 waitinfo->context = 0;
2653 break;
2654 }
2655 }