]> git.saurik.com Git - apple/libpthread.git/blob - kern/kern_synch.c
libpthread-218.30.1.tar.gz
[apple/libpthread.git] / kern / kern_synch.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * pthread_support.c
31 */
32
33 #include <sys/param.h>
34 #include <sys/queue.h>
35 #include <sys/resourcevar.h>
36 //#include <sys/proc_internal.h>
37 #include <sys/kauth.h>
38 #include <sys/systm.h>
39 #include <sys/timeb.h>
40 #include <sys/times.h>
41 #include <sys/time.h>
42 #include <sys/acct.h>
43 #include <sys/kernel.h>
44 #include <sys/wait.h>
45 #include <sys/signalvar.h>
46 #include <sys/syslog.h>
47 #include <sys/stat.h>
48 #include <sys/lock.h>
49 #include <sys/kdebug.h>
50 //#include <sys/sysproto.h>
51 //#include <sys/pthread_internal.h>
52 #include <sys/vm.h>
53 #include <sys/user.h>
54
55 #include <mach/mach_types.h>
56 #include <mach/vm_prot.h>
57 #include <mach/semaphore.h>
58 #include <mach/sync_policy.h>
59 #include <mach/task.h>
60 #include <kern/kern_types.h>
61 #include <kern/task.h>
62 #include <kern/clock.h>
63 #include <mach/kern_return.h>
64 #include <kern/thread.h>
65 #include <kern/sched_prim.h>
66 #include <kern/thread_call.h>
67 #include <kern/kalloc.h>
68 #include <kern/zalloc.h>
69 #include <kern/sched_prim.h>
70 #include <kern/processor.h>
71 //#include <kern/mach_param.h>
72 #include <mach/mach_vm.h>
73 #include <mach/mach_param.h>
74 #include <mach/thread_policy.h>
75 #include <mach/message.h>
76 #include <mach/port.h>
77 //#include <vm/vm_protos.h>
78 #include <vm/vm_map.h>
79 #include <mach/vm_region.h>
80
81 #include <libkern/OSAtomic.h>
82
83 #include <pexpert/pexpert.h>
84 #include <sys/pthread_shims.h>
85
86 #include "kern_internal.h"
87 #include "synch_internal.h"
88 #include "kern_trace.h"
89
90 typedef struct uthread *uthread_t;
91
92 //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
93 #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
94
95 #define ECVCERORR 256
96 #define ECVPERORR 512
97
98 lck_mtx_t *pthread_list_mlock;
99
100 #define PTH_HASHSIZE 100
101
102 static LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl;
103 static unsigned long pthhash;
104
105 static LIST_HEAD(, ksyn_wait_queue) pth_free_list;
106
107 static zone_t kwq_zone; /* zone for allocation of ksyn_queue */
108 static zone_t kwe_zone; /* zone for allocation of ksyn_waitq_element */
109
110 #define SEQFIT 0
111 #define FIRSTFIT 1
112
113 struct ksyn_queue {
114 TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist;
115 uint32_t ksynq_count; /* number of entries in queue */
116 uint32_t ksynq_firstnum; /* lowest seq in queue */
117 uint32_t ksynq_lastnum; /* highest seq in queue */
118 };
119 typedef struct ksyn_queue *ksyn_queue_t;
120
121 enum {
122 KSYN_QUEUE_READ = 0,
123 KSYN_QUEUE_WRITER,
124 KSYN_QUEUE_MAX,
125 };
126
127 struct ksyn_wait_queue {
128 LIST_ENTRY(ksyn_wait_queue) kw_hash;
129 LIST_ENTRY(ksyn_wait_queue) kw_list;
130 user_addr_t kw_addr;
131 uint64_t kw_owner;
132 uint64_t kw_object; /* object backing in shared mode */
133 uint64_t kw_offset; /* offset inside the object in shared mode */
134 int kw_pflags; /* flags under listlock protection */
135 struct timeval kw_ts; /* timeval need for upkeep before free */
136 int kw_iocount; /* inuse reference */
137 int kw_dropcount; /* current users unlocking... */
138
139 int kw_type; /* queue type like mutex, cvar, etc */
140 uint32_t kw_inqueue; /* num of waiters held */
141 uint32_t kw_fakecount; /* number of error/prepost fakes */
142 uint32_t kw_highseq; /* highest seq in the queue */
143 uint32_t kw_lowseq; /* lowest seq in the queue */
144 uint32_t kw_lword; /* L value from userland */
145 uint32_t kw_uword; /* U world value from userland */
146 uint32_t kw_sword; /* S word value from userland */
147 uint32_t kw_lastunlockseq; /* the last seq that unlocked */
148 /* for CV to be used as the seq kernel has seen so far */
149 #define kw_cvkernelseq kw_lastunlockseq
150 uint32_t kw_lastseqword; /* the last seq that unlocked */
151 /* for mutex and cvar we need to track I bit values */
152 uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */
153 uint32_t kw_overlapwatch; /* chance for overlaps */
154 uint32_t kw_pre_rwwc; /* prepost count */
155 uint32_t kw_pre_lockseq; /* prepost target seq */
156 uint32_t kw_pre_sseq; /* prepost target sword, in cvar used for mutexowned */
157 uint32_t kw_pre_intrcount; /* prepost of missed wakeup due to intrs */
158 uint32_t kw_pre_intrseq; /* prepost of missed wakeup limit seq */
159 uint32_t kw_pre_intrretbits; /* return bits value for missed wakeup threads */
160 uint32_t kw_pre_intrtype; /* type of failed wakueps*/
161
162 int kw_kflags;
163 int kw_qos_override; /* QoS of max waiter during contention period */
164 struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */
165 lck_mtx_t kw_lock; /* mutex lock protecting this structure */
166 };
167 typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
168
169 #define TID_ZERO (uint64_t)0
170
171 /* bits needed in handling the rwlock unlock */
172 #define PTH_RW_TYPE_READ 0x01
173 #define PTH_RW_TYPE_WRITE 0x04
174 #define PTH_RW_TYPE_MASK 0xff
175 #define PTH_RW_TYPE_SHIFT 8
176
177 #define PTH_RWSHFT_TYPE_READ 0x0100
178 #define PTH_RWSHFT_TYPE_WRITE 0x0400
179 #define PTH_RWSHFT_TYPE_MASK 0xff00
180
181 /*
182 * Mutex pshared attributes
183 */
184 #define PTHREAD_PROCESS_SHARED _PTHREAD_MTX_OPT_PSHARED
185 #define PTHREAD_PROCESS_PRIVATE 0x20
186 #define PTHREAD_PSHARED_FLAGS_MASK 0x30
187
188 /*
189 * Mutex policy attributes
190 */
191 #define _PTHREAD_MUTEX_POLICY_NONE 0
192 #define _PTHREAD_MUTEX_POLICY_FAIRSHARE 0x040 /* 1 */
193 #define _PTHREAD_MUTEX_POLICY_FIRSTFIT 0x080 /* 2 */
194 #define _PTHREAD_MUTEX_POLICY_REALTIME 0x0c0 /* 3 */
195 #define _PTHREAD_MUTEX_POLICY_ADAPTIVE 0x100 /* 4 */
196 #define _PTHREAD_MUTEX_POLICY_PRIPROTECT 0x140 /* 5 */
197 #define _PTHREAD_MUTEX_POLICY_PRIINHERIT 0x180 /* 6 */
198 #define PTHREAD_POLICY_FLAGS_MASK 0x1c0
199
200 /* pflags */
201 #define KSYN_WQ_INHASH 2
202 #define KSYN_WQ_SHARED 4
203 #define KSYN_WQ_WAITING 8 /* threads waiting for this wq to be available */
204 #define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */
205
206 /* kflags */
207 #define KSYN_KWF_INITCLEARED 1 /* the init status found and preposts cleared */
208 #define KSYN_KWF_ZEROEDOUT 2 /* the lword, etc are inited to 0 */
209 #define KSYN_KWF_QOS_APPLIED 4 /* QoS override applied to owner */
210
211 #define KSYN_CLEANUP_DEADLINE 10
212 static int psynch_cleanupset;
213 thread_call_t psynch_thcall;
214
215 #define KSYN_WQTYPE_INWAIT 0x1000
216 #define KSYN_WQTYPE_INDROP 0x2000
217 #define KSYN_WQTYPE_MTX 0x01
218 #define KSYN_WQTYPE_CVAR 0x02
219 #define KSYN_WQTYPE_RWLOCK 0x04
220 #define KSYN_WQTYPE_SEMA 0x08
221 #define KSYN_WQTYPE_MASK 0xff
222
223 #define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
224
225 #define KW_UNLOCK_PREPOST 0x01
226 #define KW_UNLOCK_PREPOST_READLOCK 0x08
227 #define KW_UNLOCK_PREPOST_WRLOCK 0x20
228
229 static void
230 CLEAR_PREPOST_BITS(ksyn_wait_queue_t kwq)
231 {
232 kwq->kw_pre_lockseq = 0;
233 kwq->kw_pre_sseq = PTHRW_RWS_INIT;
234 kwq->kw_pre_rwwc = 0;
235 }
236
237 static void
238 CLEAR_INTR_PREPOST_BITS(ksyn_wait_queue_t kwq)
239 {
240 kwq->kw_pre_intrcount = 0;
241 kwq->kw_pre_intrseq = 0;
242 kwq->kw_pre_intrretbits = 0;
243 kwq->kw_pre_intrtype = 0;
244 }
245
246 static void
247 CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
248 {
249 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
250 if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
251 panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
252 }
253 };
254 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
255 kwq->kw_nextseqword = PTHRW_RWS_INIT;
256 kwq->kw_overlapwatch = 0;
257 };
258 CLEAR_PREPOST_BITS(kwq);
259 kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
260 kwq->kw_lastseqword = PTHRW_RWS_INIT;
261 CLEAR_INTR_PREPOST_BITS(kwq);
262 kwq->kw_lword = 0;
263 kwq->kw_uword = 0;
264 kwq->kw_sword = PTHRW_RWS_INIT;
265 }
266
267 static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset);
268 static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
269 static void ksyn_wqrelease(ksyn_wait_queue_t mkwq, int qfreenow, int wqtype);
270 static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp);
271
272 static int _wait_result_to_errno(wait_result_t result);
273
274 static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t);
275 static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t);
276 static void ksyn_freeallkwe(ksyn_queue_t kq);
277
278 static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t);
279 static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t, uint64_t tid, boolean_t prepost);
280 static void ksyn_mtx_transfer_qos_override(ksyn_wait_queue_t, ksyn_waitq_element_t);
281 static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t);
282
283 static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
284
285 static void ksyn_queue_init(ksyn_queue_t kq);
286 static int ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int firstfit);
287 static void ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe);
288 static void ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all);
289
290 static void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq);
291 static uint32_t find_nextlowseq(ksyn_wait_queue_t kwq);
292 static uint32_t find_nexthighseq(ksyn_wait_queue_t kwq);
293 static int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp);
294
295 static uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto);
296
297 static ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen);
298 static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep);
299 static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
300 static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
301
302 static void psynch_cvcontinue(void *, wait_result_t);
303 static void psynch_mtxcontinue(void *, wait_result_t);
304
305 static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
306 static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
307 static ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq);
308
309 static void
310 UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc)
311 {
312 int sinit = ((rw_wc & PTH_RWS_CV_CBIT) != 0);
313
314 // assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
315
316 if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) {
317 /* the values of L,U and S are cleared out due to L==S in previous transition */
318 kwq->kw_lword = mgen;
319 kwq->kw_uword = ugen;
320 kwq->kw_sword = rw_wc;
321 kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT;
322 } else {
323 if (is_seqhigher(mgen, kwq->kw_lword)) {
324 kwq->kw_lword = mgen;
325 }
326 if (is_seqhigher(ugen, kwq->kw_uword)) {
327 kwq->kw_uword = ugen;
328 }
329 if (sinit && is_seqhigher(rw_wc, kwq->kw_sword)) {
330 kwq->kw_sword = rw_wc;
331 }
332 }
333 if (sinit && is_seqlower(kwq->kw_cvkernelseq, rw_wc)) {
334 kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK);
335 }
336 }
337
338 static void
339 pthread_list_lock(void)
340 {
341 lck_mtx_lock(pthread_list_mlock);
342 }
343
344 static void
345 pthread_list_unlock(void)
346 {
347 lck_mtx_unlock(pthread_list_mlock);
348 }
349
350 static void
351 ksyn_wqlock(ksyn_wait_queue_t kwq)
352 {
353
354 lck_mtx_lock(&kwq->kw_lock);
355 }
356
357 static void
358 ksyn_wqunlock(ksyn_wait_queue_t kwq)
359 {
360 lck_mtx_unlock(&kwq->kw_lock);
361 }
362
363
364 /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
365 static uint32_t
366 _psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, int flags)
367 {
368 kern_return_t ret;
369 uint32_t returnbits = 0;
370 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
371 uint32_t nextgen = (ugen + PTHRW_INC);
372
373 ksyn_wqlock(kwq);
374 kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
375 uint32_t updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT);
376
377 redrive:
378 if (firstfit) {
379 if (kwq->kw_inqueue == 0) {
380 // not set or the new lock sequence is higher
381 if (kwq->kw_pre_rwwc == 0 || is_seqhigher(mgen, kwq->kw_pre_lockseq)) {
382 kwq->kw_pre_lockseq = (mgen & PTHRW_COUNT_MASK);
383 }
384 kwq->kw_pre_rwwc = 1;
385 ksyn_mtx_drop_qos_override(kwq);
386 kwq->kw_owner = 0;
387 // indicate prepost content in kernel
388 returnbits = mgen | PTH_RWL_PBIT;
389 } else {
390 // signal first waiter
391 ret = ksyn_mtxsignal(kwq, NULL, updatebits);
392 if (ret == KERN_NOT_WAITING) {
393 goto redrive;
394 }
395 }
396 } else {
397 int prepost = 0;
398 if (kwq->kw_inqueue == 0) {
399 // No waiters in the queue.
400 prepost = 1;
401 } else {
402 uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum & PTHRW_COUNT_MASK);
403 if (low_writer == nextgen) {
404 /* next seq to be granted found */
405 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
406 ret = ksyn_mtxsignal(kwq, NULL, updatebits | PTH_RWL_MTX_WAIT);
407 if (ret == KERN_NOT_WAITING) {
408 /* interrupt post */
409 kwq->kw_pre_intrcount = 1;
410 kwq->kw_pre_intrseq = nextgen;
411 kwq->kw_pre_intrretbits = updatebits;
412 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
413 }
414
415 } else if (is_seqhigher(low_writer, nextgen)) {
416 prepost = 1;
417 } else {
418 //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
419 ksyn_waitq_element_t kwe;
420 kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen);
421 if (kwe != NULL) {
422 /* next seq to be granted found */
423 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
424 ret = ksyn_mtxsignal(kwq, kwe, updatebits | PTH_RWL_MTX_WAIT);
425 if (ret == KERN_NOT_WAITING) {
426 goto redrive;
427 }
428 } else {
429 prepost = 1;
430 }
431 }
432 }
433 if (prepost) {
434 ksyn_mtx_drop_qos_override(kwq);
435 kwq->kw_owner = 0;
436 if (++kwq->kw_pre_rwwc > 1) {
437 __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
438 } else {
439 kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
440 }
441 }
442 }
443
444 ksyn_wqunlock(kwq);
445 ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
446 return returnbits;
447 }
448
449 static int
450 _ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
451 {
452 int res = (lgenval & PTHRW_RWL_INIT) != 0;
453 if (res) {
454 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
455 /* first to notice the reset of the lock, clear preposts */
456 CLEAR_REINIT_BITS(kwq);
457 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
458 }
459 }
460 return res;
461 }
462
463 static int
464 _ksyn_handle_missed_wakeups(ksyn_wait_queue_t kwq,
465 uint32_t type,
466 uint32_t lockseq,
467 uint32_t *retval)
468 {
469 int res = 0;
470 if (kwq->kw_pre_intrcount != 0 &&
471 kwq->kw_pre_intrtype == type &&
472 (kwq->kw_pre_intrseq == 0 || is_seqlower_eq(lockseq, kwq->kw_pre_intrseq))) {
473 kwq->kw_pre_intrcount--;
474 *retval = kwq->kw_pre_intrretbits;
475 if (kwq->kw_pre_intrcount == 0) {
476 CLEAR_INTR_PREPOST_BITS(kwq);
477 }
478 res = 1;
479 }
480 return res;
481 }
482
483 static int
484 _ksyn_handle_overlap(ksyn_wait_queue_t kwq,
485 uint32_t lgenval,
486 uint32_t rw_wc,
487 uint32_t *retval)
488 {
489 int res = 0;
490
491 // check for overlap and no pending W bit (indicates writers)
492 if (kwq->kw_overlapwatch != 0 &&
493 (rw_wc & PTHRW_RWS_SAVEMASK) == 0 &&
494 (lgenval & PTH_RWL_WBIT) == 0) {
495 /* overlap is set, so no need to check for valid state for overlap */
496
497 if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
498 /* increase the next expected seq by one */
499 kwq->kw_nextseqword += PTHRW_INC;
500 /* set count by one & bits from the nextseq and add M bit */
501 *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
502 res = 1;
503 }
504 }
505 return res;
506 }
507
508 static int
509 _ksyn_handle_prepost(ksyn_wait_queue_t kwq,
510 uint32_t type,
511 uint32_t lockseq,
512 uint32_t *retval)
513 {
514 int res = 0;
515 if (kwq->kw_pre_rwwc != 0 && is_seqlower_eq(lockseq, kwq->kw_pre_lockseq)) {
516 kwq->kw_pre_rwwc--;
517 if (kwq->kw_pre_rwwc == 0) {
518 uint32_t preseq = kwq->kw_pre_lockseq;
519 uint32_t prerw_wc = kwq->kw_pre_sseq;
520 CLEAR_PREPOST_BITS(kwq);
521 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
522 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
523 }
524
525 int error, block;
526 uint32_t updatebits;
527 error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (type|KW_UNLOCK_PREPOST), &block, lockseq);
528 if (error != 0) {
529 panic("kwq_handle_unlock failed %d\n", error);
530 }
531
532 if (block == 0) {
533 *retval = updatebits;
534 res = 1;
535 }
536 }
537 }
538 return res;
539 }
540
541 /* Helpers for QoS override management. Only applies to mutexes */
542 static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t kwq, uint64_t tid, boolean_t prepost)
543 {
544 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
545 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
546 int waiter_qos = pthread_kern->proc_usynch_get_requested_thread_qos(current_uthread());
547
548 kwq->kw_qos_override = MAX(waiter_qos, kwq->kw_qos_override);
549
550 if (prepost && kwq->kw_inqueue == 0) {
551 // if there are no more waiters in the queue after the new (prepost-receiving) owner, we do not set an
552 // override, because the receiving owner may not re-enter the kernel to signal someone else if it is
553 // the last one to unlock. If other waiters end up entering the kernel, they will boost the owner
554 tid = 0;
555 }
556
557 if (tid != 0) {
558 if ((tid == kwq->kw_owner) && (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED)) {
559 // hint continues to be accurate, and a boost was already applied
560 pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, FALSE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
561 } else {
562 // either hint did not match previous owner, or hint was accurate but mutex was not contended enough for a boost previously
563 boolean_t boostsucceded;
564
565 boostsucceded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
566
567 if (boostsucceded) {
568 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
569 }
570
571 if (wasboosted && (tid != kwq->kw_owner) && (kwq->kw_owner != 0)) {
572 // the hint did not match the previous owner, so drop overrides
573 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
574 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
575 }
576 }
577 } else {
578 // new hint tells us that we don't know the owner, so drop any existing overrides
579 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
580 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
581
582 if (wasboosted && (kwq->kw_owner != 0)) {
583 // the hint did not match the previous owner, so drop overrides
584 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
585 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
586 }
587 }
588 }
589 }
590
591 static void ksyn_mtx_transfer_qos_override(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe)
592 {
593 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
594 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
595
596 if (kwq->kw_inqueue > 1) {
597 boolean_t boostsucceeded;
598
599 // More than one waiter, so resource will still be contended after handing off ownership
600 boostsucceeded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), kwe->kwe_uth, 0, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
601
602 if (boostsucceeded) {
603 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
604 }
605 } else {
606 // kw_inqueue == 1 to get to this point, which means there will be no contention after this point
607 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
608 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
609 }
610
611 // Remove the override that was applied to kw_owner. There may have been a race,
612 // in which case it may not match the current thread
613 if (wasboosted) {
614 if (kwq->kw_owner == 0) {
615 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
616 } else if (thread_tid(current_thread()) != kwq->kw_owner) {
617 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
618 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
619 } else {
620 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
621 }
622 }
623 }
624 }
625
626 static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t kwq)
627 {
628 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
629 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
630
631 // assume nobody else in queue if this routine was called
632 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
633 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
634
635 // Remove the override that was applied to kw_owner. There may have been a race,
636 // in which case it may not match the current thread
637 if (wasboosted) {
638 if (kwq->kw_owner == 0) {
639 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
640 } else if (thread_tid(current_thread()) != kwq->kw_owner) {
641 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
642 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
643 } else {
644 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
645 }
646 }
647 }
648 }
649
650 /*
651 * psynch_mutexwait: This system call is used for contended psynch mutexes to block.
652 */
653
654 int
655 _psynch_mutexwait(__unused proc_t p,
656 user_addr_t mutex,
657 uint32_t mgen,
658 uint32_t ugen,
659 uint64_t tid,
660 uint32_t flags,
661 uint32_t *retval)
662 {
663 ksyn_wait_queue_t kwq;
664 int error=0;
665 int ins_flags;
666
667 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
668 uint32_t updatebits = 0;
669
670 uint32_t lockseq = (mgen & PTHRW_COUNT_MASK);
671
672 if (firstfit == 0) {
673 ins_flags = SEQFIT;
674 } else {
675 /* first fit */
676 ins_flags = FIRSTFIT;
677 }
678
679 error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq);
680 if (error != 0) {
681 return(error);
682 }
683
684 ksyn_wqlock(kwq);
685
686 // mutexwait passes in an owner hint at the time userspace contended for the mutex, however, the
687 // owner tid in the userspace data structure may be unset or SWITCHING (-1), or it may correspond
688 // to a stale snapshot after the lock has subsequently been unlocked by another thread.
689 if (tid == 0) {
690 // contender came in before owner could write TID
691 tid = 0;
692 } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && is_seqlower(ugen, kwq->kw_lastunlockseq)) {
693 // owner is stale, someone has come in and unlocked since this contended read the TID, so
694 // assume what is known in the kernel is accurate
695 tid = kwq->kw_owner;
696 } else if (tid == PTHREAD_MTX_TID_SWITCHING) {
697 // userspace didn't know the owner because it was being unlocked, but that unlocker hasn't
698 // reached the kernel yet. So assume what is known in the kernel is accurate
699 tid = kwq->kw_owner;
700 } else {
701 // hint is being passed in for a specific thread, and we have no reason not to trust
702 // it (like the kernel unlock sequence being higher
703 }
704
705
706 if (_ksyn_handle_missed_wakeups(kwq, PTH_RW_TYPE_WRITE, lockseq, retval)) {
707 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
708 kwq->kw_owner = thread_tid(current_thread());
709
710 ksyn_wqunlock(kwq);
711 goto out;
712 }
713
714 if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) {
715 /* got preposted lock */
716 kwq->kw_pre_rwwc--;
717 if (kwq->kw_pre_rwwc == 0) {
718 CLEAR_PREPOST_BITS(kwq);
719 if (kwq->kw_inqueue == 0) {
720 updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
721 } else {
722 updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT);
723 }
724 updatebits &= ~PTH_RWL_MTX_WAIT;
725
726 if (updatebits == 0) {
727 __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
728 }
729
730 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
731 kwq->kw_owner = thread_tid(current_thread());
732
733 ksyn_wqunlock(kwq);
734 *retval = updatebits;
735 goto out;
736 } else {
737 __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
738 kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */
739 ksyn_wqunlock(kwq);
740 error = EINVAL;
741 goto out;
742 }
743 }
744
745 ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE);
746 kwq->kw_owner = tid;
747
748 error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue);
749 // ksyn_wait drops wait queue lock
750 out:
751 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
752 return error;
753 }
754
755 void
756 psynch_mtxcontinue(void *parameter, wait_result_t result)
757 {
758 uthread_t uth = current_uthread();
759 ksyn_wait_queue_t kwq = parameter;
760 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
761
762 int error = _wait_result_to_errno(result);
763 if (error != 0) {
764 ksyn_wqlock(kwq);
765 if (kwe->kwe_kwqqueue) {
766 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
767 }
768 ksyn_wqunlock(kwq);
769 } else {
770 uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
771 pthread_kern->uthread_set_returnval(uth, updatebits);
772
773 if (updatebits == 0)
774 __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
775 }
776 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
777 pthread_kern->unix_syscall_return(error);
778 }
779
780 /*
781 * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
782 */
783 int
784 _psynch_mutexdrop(__unused proc_t p,
785 user_addr_t mutex,
786 uint32_t mgen,
787 uint32_t ugen,
788 uint64_t tid __unused,
789 uint32_t flags,
790 uint32_t *retval)
791 {
792 int res;
793 ksyn_wait_queue_t kwq;
794
795 res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
796 if (res == 0) {
797 uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
798 /* drops the kwq reference */
799 if (retval) {
800 *retval = updateval;
801 }
802 }
803
804 return res;
805 }
806
807 static kern_return_t
808 ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t updateval)
809 {
810 kern_return_t ret;
811
812 if (!kwe) {
813 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_kwelist);
814 if (!kwe) {
815 panic("ksyn_mtxsignal: panic signaling empty queue");
816 }
817 }
818
819 ksyn_mtx_transfer_qos_override(kwq, kwe);
820 kwq->kw_owner = kwe->kwe_tid;
821
822 ret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, kwe, updateval);
823
824 // if waking the new owner failed, remove any overrides
825 if (ret != KERN_SUCCESS) {
826 ksyn_mtx_drop_qos_override(kwq);
827 kwq->kw_owner = 0;
828 }
829
830 return ret;
831 }
832
833
834 static void
835 ksyn_prepost(ksyn_wait_queue_t kwq,
836 ksyn_waitq_element_t kwe,
837 uint32_t state,
838 uint32_t lockseq)
839 {
840 bzero(kwe, sizeof(*kwe));
841 kwe->kwe_state = state;
842 kwe->kwe_lockseq = lockseq;
843 kwe->kwe_count = 1;
844
845 (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITER, kwe, lockseq, SEQFIT);
846 kwq->kw_fakecount++;
847 }
848
849 static void
850 ksyn_cvsignal(ksyn_wait_queue_t ckwq,
851 thread_t th,
852 uint32_t uptoseq,
853 uint32_t signalseq,
854 uint32_t *updatebits,
855 int *broadcast,
856 ksyn_waitq_element_t *nkwep)
857 {
858 ksyn_waitq_element_t kwe = NULL;
859 ksyn_waitq_element_t nkwe = NULL;
860 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
861
862 uptoseq &= PTHRW_COUNT_MASK;
863
864 // Find the specified thread to wake.
865 if (th != THREAD_NULL) {
866 uthread_t uth = pthread_kern->get_bsdthread_info(th);
867 kwe = pthread_kern->uthread_get_uukwe(uth);
868 if (kwe->kwe_kwqqueue != ckwq ||
869 is_seqhigher(kwe->kwe_lockseq, uptoseq)) {
870 // Unless it's no longer waiting on this CV...
871 kwe = NULL;
872 // ...in which case we post a broadcast instead.
873 *broadcast = 1;
874 return;
875 }
876 }
877
878 // If no thread was specified, find any thread to wake (with the right
879 // sequence number).
880 while (th == THREAD_NULL) {
881 if (kwe == NULL) {
882 kwe = ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq);
883 }
884 if (kwe == NULL && nkwe == NULL) {
885 // No eligible entries; need to allocate a new
886 // entry to prepost. Loop to rescan after
887 // reacquiring the lock after allocation in
888 // case anything new shows up.
889 ksyn_wqunlock(ckwq);
890 nkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
891 ksyn_wqlock(ckwq);
892 } else {
893 break;
894 }
895 }
896
897 if (kwe != NULL) {
898 // If we found a thread to wake...
899 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
900 if (is_seqlower(kwe->kwe_lockseq, signalseq)) {
901 /*
902 * A valid thread in our range, but lower than our signal.
903 * Matching it may leave our match with nobody to wake it if/when
904 * it arrives (the signal originally meant for this thread might
905 * not successfully wake it).
906 *
907 * Convert to broadcast - may cause some spurious wakeups
908 * (allowed by spec), but avoids starvation (better choice).
909 */
910 *broadcast = 1;
911 } else {
912 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
913 *updatebits += PTHRW_INC;
914 }
915 } else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
916 // Merge with existing prepost at same uptoseq.
917 kwe->kwe_count += 1;
918 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
919 // Existing broadcasts subsume this signal.
920 } else {
921 panic("unknown kwe state\n");
922 }
923 if (nkwe) {
924 /*
925 * If we allocated a new kwe above but then found a different kwe to
926 * use then we need to deallocate the spare one.
927 */
928 pthread_kern->zfree(kwe_zone, nkwe);
929 nkwe = NULL;
930 }
931 } else if (nkwe != NULL) {
932 // ... otherwise, insert the newly allocated prepost.
933 ksyn_prepost(ckwq, nkwe, KWE_THREAD_PREPOST, uptoseq);
934 nkwe = NULL;
935 } else {
936 panic("failed to allocate kwe\n");
937 }
938
939 *nkwep = nkwe;
940 }
941
942 static int
943 __psynch_cvsignal(user_addr_t cv,
944 uint32_t cgen,
945 uint32_t cugen,
946 uint32_t csgen,
947 uint32_t flags,
948 int broadcast,
949 mach_port_name_t threadport,
950 uint32_t *retval)
951 {
952 int error = 0;
953 thread_t th = THREAD_NULL;
954 ksyn_wait_queue_t kwq;
955
956 uint32_t uptoseq = cgen & PTHRW_COUNT_MASK;
957 uint32_t fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
958
959 // validate sane L, U, and S values
960 if ((threadport == 0 && is_seqhigher(fromseq, uptoseq)) || is_seqhigher(csgen, uptoseq)) {
961 __FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n");
962 return EINVAL;
963 }
964
965 if (threadport != 0) {
966 th = port_name_to_thread((mach_port_name_t)threadport);
967 if (th == THREAD_NULL) {
968 return ESRCH;
969 }
970 }
971
972 error = ksyn_wqfind(cv, cgen, cugen, csgen, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &kwq);
973 if (error == 0) {
974 uint32_t updatebits = 0;
975 ksyn_waitq_element_t nkwe = NULL;
976
977 ksyn_wqlock(kwq);
978
979 // update L, U and S...
980 UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
981
982 if (!broadcast) {
983 // No need to signal if the CV is already balanced.
984 if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
985 ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, &broadcast, &nkwe);
986 }
987 }
988
989 if (broadcast) {
990 ksyn_handle_cvbroad(kwq, uptoseq, &updatebits);
991 }
992
993 kwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
994 // set C or P bits and free if needed
995 ksyn_cvupdate_fixup(kwq, &updatebits);
996 *retval = updatebits;
997
998 ksyn_wqunlock(kwq);
999
1000 if (nkwe != NULL) {
1001 pthread_kern->zfree(kwe_zone, nkwe);
1002 }
1003
1004 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
1005 }
1006
1007 if (th != NULL) {
1008 thread_deallocate(th);
1009 }
1010
1011 return error;
1012 }
1013
1014 /*
1015 * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
1016 */
1017 int
1018 _psynch_cvbroad(__unused proc_t p,
1019 user_addr_t cv,
1020 uint64_t cvlsgen,
1021 uint64_t cvudgen,
1022 uint32_t flags,
1023 __unused user_addr_t mutex,
1024 __unused uint64_t mugen,
1025 __unused uint64_t tid,
1026 uint32_t *retval)
1027 {
1028 uint32_t diffgen = cvudgen & 0xffffffff;
1029 uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
1030 if (count > pthread_kern->get_task_threadmax()) {
1031 __FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n");
1032 return EBUSY;
1033 }
1034
1035 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1036 uint32_t cgen = cvlsgen & 0xffffffff;
1037 uint32_t cugen = (cvudgen >> 32) & 0xffffffff;
1038
1039 return __psynch_cvsignal(cv, cgen, cugen, csgen, flags, 1, 0, retval);
1040 }
1041
1042 /*
1043 * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
1044 */
1045 int
1046 _psynch_cvsignal(__unused proc_t p,
1047 user_addr_t cv,
1048 uint64_t cvlsgen,
1049 uint32_t cvugen,
1050 int threadport,
1051 __unused user_addr_t mutex,
1052 __unused uint64_t mugen,
1053 __unused uint64_t tid,
1054 uint32_t flags,
1055 uint32_t *retval)
1056 {
1057 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1058 uint32_t cgen = cvlsgen & 0xffffffff;
1059
1060 return __psynch_cvsignal(cv, cgen, cvugen, csgen, flags, 0, threadport, retval);
1061 }
1062
1063 /*
1064 * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
1065 */
1066 int
1067 _psynch_cvwait(__unused proc_t p,
1068 user_addr_t cv,
1069 uint64_t cvlsgen,
1070 uint32_t cvugen,
1071 user_addr_t mutex,
1072 uint64_t mugen,
1073 uint32_t flags,
1074 int64_t sec,
1075 uint32_t nsec,
1076 uint32_t *retval)
1077 {
1078 int error = 0;
1079 uint32_t updatebits = 0;
1080 ksyn_wait_queue_t ckwq = NULL;
1081 ksyn_waitq_element_t kwe, nkwe = NULL;
1082
1083 /* for conformance reasons */
1084 pthread_kern->__pthread_testcancel(0);
1085
1086 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1087 uint32_t cgen = cvlsgen & 0xffffffff;
1088 uint32_t ugen = (mugen >> 32) & 0xffffffff;
1089 uint32_t mgen = mugen & 0xffffffff;
1090
1091 uint32_t lockseq = (cgen & PTHRW_COUNT_MASK);
1092
1093 /*
1094 * In cvwait U word can be out of range as cv could be used only for
1095 * timeouts. However S word needs to be within bounds and validated at
1096 * user level as well.
1097 */
1098 if (is_seqhigher_eq(csgen, lockseq) != 0) {
1099 __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
1100 return EINVAL;
1101 }
1102
1103 error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
1104 if (error != 0) {
1105 return error;
1106 }
1107
1108 if (mutex != 0) {
1109 error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, NULL);
1110 if (error != 0) {
1111 goto out;
1112 }
1113 }
1114
1115 ksyn_wqlock(ckwq);
1116
1117 // update L, U and S...
1118 UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
1119
1120 /* Look for the sequence for prepost (or conflicting thread */
1121 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
1122 kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
1123 if (kwe != NULL) {
1124 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
1125 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) {
1126 /* we can safely consume a reference, so do so */
1127 if (--kwe->kwe_count == 0) {
1128 ksyn_queue_remove_item(ckwq, kq, kwe);
1129 ckwq->kw_fakecount--;
1130 nkwe = kwe;
1131 }
1132 } else {
1133 /*
1134 * consuming a prepost higher than our lock sequence is valid, but
1135 * can leave the higher thread without a match. Convert the entry
1136 * to a broadcast to compensate for this.
1137 */
1138 ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits);
1139 #if __TESTPANICS__
1140 if (updatebits != 0)
1141 panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", updatebits);
1142 #endif /* __TESTPANICS__ */
1143 }
1144 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
1145 // XXX
1146 // Nothing to do.
1147 } else if (kwe->kwe_state == KWE_THREAD_INWAIT) {
1148 __FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n");
1149 error = EBUSY;
1150 } else {
1151 panic("psync_cvwait: unexpected wait queue element type\n");
1152 }
1153
1154 if (error == 0) {
1155 updatebits = PTHRW_INC;
1156 ckwq->kw_sword += PTHRW_INC;
1157
1158 /* set C or P bits and free if needed */
1159 ksyn_cvupdate_fixup(ckwq, &updatebits);
1160 *retval = updatebits;
1161 }
1162 } else {
1163 uint64_t abstime = 0;
1164
1165 if (sec != 0 || (nsec & 0x3fffffff) != 0) {
1166 struct timespec ts;
1167 ts.tv_sec = (__darwin_time_t)sec;
1168 ts.tv_nsec = (nsec & 0x3fffffff);
1169 nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
1170 clock_absolutetime_interval_to_deadline(abstime, &abstime);
1171 }
1172
1173 error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue);
1174 // ksyn_wait drops wait queue lock
1175 }
1176
1177 ksyn_wqunlock(ckwq);
1178
1179 if (nkwe != NULL) {
1180 pthread_kern->zfree(kwe_zone, nkwe);
1181 }
1182 out:
1183 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1184 return error;
1185 }
1186
1187
1188 void
1189 psynch_cvcontinue(void *parameter, wait_result_t result)
1190 {
1191 uthread_t uth = current_uthread();
1192 ksyn_wait_queue_t ckwq = parameter;
1193 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1194
1195 int error = _wait_result_to_errno(result);
1196 if (error != 0) {
1197 ksyn_wqlock(ckwq);
1198 /* just in case it got woken up as we were granting */
1199 pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
1200
1201 if (kwe->kwe_kwqqueue) {
1202 ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
1203 }
1204 if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
1205 /* the condition var granted.
1206 * reset the error so that the thread returns back.
1207 */
1208 error = 0;
1209 /* no need to set any bits just return as cvsig/broad covers this */
1210 } else {
1211 ckwq->kw_sword += PTHRW_INC;
1212
1213 /* set C and P bits, in the local error */
1214 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
1215 error |= ECVCERORR;
1216 if (ckwq->kw_inqueue != 0) {
1217 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 1);
1218 }
1219 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
1220 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
1221 } else {
1222 /* everythig in the queue is a fake entry ? */
1223 if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
1224 error |= ECVPERORR;
1225 }
1226 }
1227 }
1228 ksyn_wqunlock(ckwq);
1229 } else {
1230 int val = 0;
1231 // PTH_RWL_MTX_WAIT is removed
1232 if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
1233 val = PTHRW_INC | PTH_RWS_CV_CBIT;
1234 }
1235 pthread_kern->uthread_set_returnval(uth, val);
1236 }
1237
1238 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1239 pthread_kern->unix_syscall_return(error);
1240 }
1241
1242 /*
1243 * psynch_cvclrprepost: This system call clears pending prepost if present.
1244 */
1245 int
1246 _psynch_cvclrprepost(__unused proc_t p,
1247 user_addr_t cv,
1248 uint32_t cvgen,
1249 uint32_t cvugen,
1250 uint32_t cvsgen,
1251 __unused uint32_t prepocnt,
1252 uint32_t preposeq,
1253 uint32_t flags,
1254 int *retval)
1255 {
1256 int error = 0;
1257 int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
1258 int wqtype = (mutex ? KSYN_WQTYPE_MTX : KSYN_WQTYPE_CVAR) | KSYN_WQTYPE_INDROP;
1259 ksyn_wait_queue_t kwq = NULL;
1260
1261 *retval = 0;
1262
1263 error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, &kwq);
1264 if (error != 0) {
1265 return error;
1266 }
1267
1268 ksyn_wqlock(kwq);
1269
1270 if (mutex) {
1271 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
1272 if (firstfit && kwq->kw_pre_rwwc != 0) {
1273 if (is_seqlower_eq(kwq->kw_pre_lockseq, cvgen)) {
1274 // clear prepost
1275 kwq->kw_pre_rwwc = 0;
1276 kwq->kw_pre_lockseq = 0;
1277 }
1278 }
1279 } else {
1280 ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITER, preposeq, 0);
1281 }
1282
1283 ksyn_wqunlock(kwq);
1284 ksyn_wqrelease(kwq, 1, wqtype);
1285 return error;
1286 }
1287
1288 /* ***************** pthread_rwlock ************************ */
1289
1290 static int
1291 __psynch_rw_lock(int type,
1292 user_addr_t rwlock,
1293 uint32_t lgenval,
1294 uint32_t ugenval,
1295 uint32_t rw_wc,
1296 int flags,
1297 uint32_t *retval)
1298 {
1299 int prepost_type, kqi;
1300
1301 if (type == PTH_RW_TYPE_READ) {
1302 prepost_type = KW_UNLOCK_PREPOST_READLOCK;
1303 kqi = KSYN_QUEUE_READ;
1304 } else {
1305 prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
1306 kqi = KSYN_QUEUE_WRITER;
1307 }
1308
1309 uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
1310
1311 int error;
1312 ksyn_wait_queue_t kwq;
1313 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
1314 if (error == 0) {
1315 ksyn_wqlock(kwq);
1316 _ksyn_check_init(kwq, lgenval);
1317 if (_ksyn_handle_missed_wakeups(kwq, type, lockseq, retval) ||
1318 // handle overlap first as they are not counted against pre_rwwc
1319 (type == PTH_RW_TYPE_READ && _ksyn_handle_overlap(kwq, lgenval, rw_wc, retval)) ||
1320 _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) {
1321 ksyn_wqunlock(kwq);
1322 } else {
1323 error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL);
1324 // ksyn_wait drops wait queue lock
1325 if (error == 0) {
1326 uthread_t uth = current_uthread();
1327 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1328 *retval = kwe->kwe_psynchretval;
1329 }
1330 }
1331 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
1332 }
1333 return error;
1334 }
1335
1336 /*
1337 * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
1338 */
1339 int
1340 _psynch_rw_rdlock(__unused proc_t p,
1341 user_addr_t rwlock,
1342 uint32_t lgenval,
1343 uint32_t ugenval,
1344 uint32_t rw_wc,
1345 int flags,
1346 uint32_t *retval)
1347 {
1348 return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, flags, retval);
1349 }
1350
1351 /*
1352 * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
1353 */
1354 int
1355 _psynch_rw_longrdlock(__unused proc_t p,
1356 __unused user_addr_t rwlock,
1357 __unused uint32_t lgenval,
1358 __unused uint32_t ugenval,
1359 __unused uint32_t rw_wc,
1360 __unused int flags,
1361 __unused uint32_t *retval)
1362 {
1363 return ESRCH;
1364 }
1365
1366
1367 /*
1368 * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
1369 */
1370 int
1371 _psynch_rw_wrlock(__unused proc_t p,
1372 user_addr_t rwlock,
1373 uint32_t lgenval,
1374 uint32_t ugenval,
1375 uint32_t rw_wc,
1376 int flags,
1377 uint32_t *retval)
1378 {
1379 return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, rw_wc, flags, retval);
1380 }
1381
1382 /*
1383 * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
1384 */
1385 int
1386 _psynch_rw_yieldwrlock(__unused proc_t p,
1387 __unused user_addr_t rwlock,
1388 __unused uint32_t lgenval,
1389 __unused uint32_t ugenval,
1390 __unused uint32_t rw_wc,
1391 __unused int flags,
1392 __unused uint32_t *retval)
1393 {
1394 return ESRCH;
1395 }
1396
1397 /*
1398 * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate
1399 * reader/writer variety lock.
1400 */
1401 int
1402 _psynch_rw_unlock(__unused proc_t p,
1403 user_addr_t rwlock,
1404 uint32_t lgenval,
1405 uint32_t ugenval,
1406 uint32_t rw_wc,
1407 int flags,
1408 uint32_t *retval)
1409 {
1410 int error = 0;
1411 ksyn_wait_queue_t kwq;
1412 uint32_t updatebits = 0;
1413 int diff;
1414 uint32_t count = 0;
1415 uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
1416 int clearedkflags = 0;
1417
1418 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
1419 if (error != 0) {
1420 return(error);
1421 }
1422
1423 ksyn_wqlock(kwq);
1424 int isinit = _ksyn_check_init(kwq, lgenval);
1425
1426 /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
1427 if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
1428 error = 0;
1429 goto out;
1430 }
1431
1432 /* If L-U != num of waiters, then it needs to be preposted or spr */
1433 diff = find_diff(lgenval, ugenval);
1434
1435 if (find_seq_till(kwq, curgen, diff, &count) == 0) {
1436 if ((count == 0) || (count < (uint32_t)diff))
1437 goto prepost;
1438 }
1439
1440 /* no prepost and all threads are in place, reset the bit */
1441 if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
1442 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
1443 clearedkflags = 1;
1444 }
1445
1446 /* can handle unlock now */
1447
1448 CLEAR_PREPOST_BITS(kwq);
1449
1450 error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
1451 #if __TESTPANICS__
1452 if (error != 0)
1453 panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error);
1454 #endif /* __TESTPANICS__ */
1455 out:
1456 if (error == 0) {
1457 /* update bits?? */
1458 *retval = updatebits;
1459 }
1460
1461 // <rdar://problem/22244050> If any of the wakeups failed because they already
1462 // returned to userspace because of a signal then we need to ensure that the
1463 // reset state is not cleared when that thread returns. Otherwise,
1464 // _pthread_rwlock_lock will clear the interrupted state before it is read.
1465 if (clearedkflags != 0 && kwq->kw_pre_intrcount > 0) {
1466 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
1467 }
1468
1469 ksyn_wqunlock(kwq);
1470 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
1471
1472 return(error);
1473
1474 prepost:
1475 /* update if the new seq is higher than prev prepost, or first set */
1476 if (is_rws_setseq(kwq->kw_pre_sseq) ||
1477 is_seqhigher_eq(rw_wc, kwq->kw_pre_sseq)) {
1478 kwq->kw_pre_rwwc = (diff - count);
1479 kwq->kw_pre_lockseq = curgen;
1480 kwq->kw_pre_sseq = rw_wc;
1481 updatebits = lgenval; /* let this not do unlock handling */
1482 }
1483 error = 0;
1484 goto out;
1485 }
1486
1487
1488 /* ************************************************************************** */
1489 void
1490 pth_global_hashinit(void)
1491 {
1492 pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash);
1493 }
1494
1495 void
1496 _pth_proc_hashinit(proc_t p)
1497 {
1498 void *ptr = hashinit(PTH_HASHSIZE, M_PCB, &pthhash);
1499 if (ptr == NULL) {
1500 panic("pth_proc_hashinit: hash init returned 0\n");
1501 }
1502
1503 pthread_kern->proc_set_pthhash(p, ptr);
1504 }
1505
1506
1507 static int
1508 ksyn_wq_hash_lookup(user_addr_t uaddr,
1509 proc_t p,
1510 int flags,
1511 ksyn_wait_queue_t *out_kwq,
1512 struct pthhashhead **out_hashptr,
1513 uint64_t *out_object,
1514 uint64_t *out_offset)
1515 {
1516 int res = 0;
1517 ksyn_wait_queue_t kwq;
1518 uint64_t object = 0, offset = 0;
1519 struct pthhashhead *hashptr;
1520 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1521 hashptr = pth_glob_hashtbl;
1522 res = ksyn_findobj(uaddr, &object, &offset);
1523 if (res == 0) {
1524 LIST_FOREACH(kwq, &hashptr[object & pthhash], kw_hash) {
1525 if (kwq->kw_object == object && kwq->kw_offset == offset) {
1526 break;
1527 }
1528 }
1529 } else {
1530 kwq = NULL;
1531 }
1532 } else {
1533 hashptr = pthread_kern->proc_get_pthhash(p);
1534 LIST_FOREACH(kwq, &hashptr[uaddr & pthhash], kw_hash) {
1535 if (kwq->kw_addr == uaddr) {
1536 break;
1537 }
1538 }
1539 }
1540 *out_kwq = kwq;
1541 *out_object = object;
1542 *out_offset = offset;
1543 *out_hashptr = hashptr;
1544 return res;
1545 }
1546
1547 void
1548 _pth_proc_hashdelete(proc_t p)
1549 {
1550 struct pthhashhead * hashptr;
1551 ksyn_wait_queue_t kwq;
1552 unsigned long hashsize = pthhash + 1;
1553 unsigned long i;
1554
1555 hashptr = pthread_kern->proc_get_pthhash(p);
1556 pthread_kern->proc_set_pthhash(p, NULL);
1557 if (hashptr == NULL) {
1558 return;
1559 }
1560
1561 pthread_list_lock();
1562 for(i= 0; i < hashsize; i++) {
1563 while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) {
1564 if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) {
1565 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1566 LIST_REMOVE(kwq, kw_hash);
1567 }
1568 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1569 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1570 LIST_REMOVE(kwq, kw_list);
1571 }
1572 pthread_list_unlock();
1573 /* release fake entries if present for cvars */
1574 if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
1575 ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
1576 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
1577 pthread_kern->zfree(kwq_zone, kwq);
1578 pthread_list_lock();
1579 }
1580 }
1581 pthread_list_unlock();
1582 FREE(hashptr, M_PROC);
1583 }
1584
1585 /* no lock held for this as the waitqueue is getting freed */
1586 void
1587 ksyn_freeallkwe(ksyn_queue_t kq)
1588 {
1589 ksyn_waitq_element_t kwe;
1590 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
1591 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
1592 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1593 pthread_kern->zfree(kwe_zone, kwe);
1594 }
1595 }
1596 }
1597
1598 /* find kernel waitqueue, if not present create one. Grants a reference */
1599 int
1600 ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int flags, int wqtype, ksyn_wait_queue_t *kwqp)
1601 {
1602 int res = 0;
1603 ksyn_wait_queue_t kwq = NULL;
1604 ksyn_wait_queue_t nkwq = NULL;
1605 struct pthhashhead *hashptr;
1606 proc_t p = current_proc();
1607
1608 uint64_t object = 0, offset = 0;
1609 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1610 res = ksyn_findobj(uaddr, &object, &offset);
1611 hashptr = pth_glob_hashtbl;
1612 } else {
1613 hashptr = pthread_kern->proc_get_pthhash(p);
1614 }
1615
1616 while (res == 0) {
1617 pthread_list_lock();
1618 res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, &object, &offset);
1619 if (res != 0) {
1620 pthread_list_unlock();
1621 break;
1622 }
1623 if (kwq == NULL && nkwq == NULL) {
1624 // Drop the lock to allocate a new kwq and retry.
1625 pthread_list_unlock();
1626
1627 nkwq = (ksyn_wait_queue_t)pthread_kern->zalloc(kwq_zone);
1628 bzero(nkwq, sizeof(struct ksyn_wait_queue));
1629 int i;
1630 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
1631 ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
1632 }
1633 lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
1634 continue;
1635 } else if (kwq == NULL && nkwq != NULL) {
1636 // Still not found, add the new kwq to the hash.
1637 kwq = nkwq;
1638 nkwq = NULL; // Don't free.
1639 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1640 kwq->kw_pflags |= KSYN_WQ_SHARED;
1641 LIST_INSERT_HEAD(&hashptr[object & pthhash], kwq, kw_hash);
1642 } else {
1643 LIST_INSERT_HEAD(&hashptr[uaddr & pthhash], kwq, kw_hash);
1644 }
1645 kwq->kw_pflags |= KSYN_WQ_INHASH;
1646 } else if (kwq != NULL) {
1647 // Found an existing kwq, use it.
1648 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1649 LIST_REMOVE(kwq, kw_list);
1650 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1651 }
1652 if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
1653 if (kwq->kw_inqueue == 0 && kwq->kw_pre_rwwc == 0 && kwq->kw_pre_intrcount == 0) {
1654 if (kwq->kw_iocount == 0) {
1655 kwq->kw_type = 0; // mark for reinitialization
1656 } else if (kwq->kw_iocount == 1 && kwq->kw_dropcount == kwq->kw_iocount) {
1657 /* if all users are unlockers then wait for it to finish */
1658 kwq->kw_pflags |= KSYN_WQ_WAITING;
1659 // Drop the lock and wait for the kwq to be free.
1660 (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
1661 continue;
1662 } else {
1663 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
1664 res = EINVAL;
1665 }
1666 } else {
1667 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
1668 res = EINVAL;
1669 }
1670 }
1671 }
1672 if (res == 0) {
1673 if (kwq->kw_type == 0) {
1674 kwq->kw_addr = uaddr;
1675 kwq->kw_object = object;
1676 kwq->kw_offset = offset;
1677 kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
1678 CLEAR_REINIT_BITS(kwq);
1679 kwq->kw_lword = mgen;
1680 kwq->kw_uword = ugen;
1681 kwq->kw_sword = sgen;
1682 kwq->kw_owner = 0;
1683 kwq->kw_kflags = 0;
1684 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
1685 }
1686 kwq->kw_iocount++;
1687 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1688 kwq->kw_dropcount++;
1689 }
1690 }
1691 pthread_list_unlock();
1692 break;
1693 }
1694 if (kwqp != NULL) {
1695 *kwqp = kwq;
1696 }
1697 if (nkwq) {
1698 lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
1699 pthread_kern->zfree(kwq_zone, nkwq);
1700 }
1701 return res;
1702 }
1703
1704 /* Reference from find is dropped here. Starts the free process if needed */
1705 void
1706 ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
1707 {
1708 uint64_t deadline;
1709 ksyn_wait_queue_t free_elem = NULL;
1710
1711 pthread_list_lock();
1712 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1713 kwq->kw_dropcount--;
1714 }
1715 if (--kwq->kw_iocount == 0) {
1716 if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
1717 /* some one is waiting for the waitqueue, wake them up */
1718 kwq->kw_pflags &= ~KSYN_WQ_WAITING;
1719 wakeup(&kwq->kw_pflags);
1720 }
1721
1722 if (kwq->kw_pre_rwwc == 0 && kwq->kw_inqueue == 0 && kwq->kw_pre_intrcount == 0) {
1723 if (qfreenow == 0) {
1724 microuptime(&kwq->kw_ts);
1725 LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
1726 kwq->kw_pflags |= KSYN_WQ_FLIST;
1727 if (psynch_cleanupset == 0) {
1728 struct timeval t;
1729 microuptime(&t);
1730 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1731 deadline = tvtoabstime(&t);
1732 thread_call_enter_delayed(psynch_thcall, deadline);
1733 psynch_cleanupset = 1;
1734 }
1735 } else {
1736 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1737 LIST_REMOVE(kwq, kw_hash);
1738 free_elem = kwq;
1739 }
1740 }
1741 }
1742 pthread_list_unlock();
1743 if (free_elem != NULL) {
1744 lck_mtx_destroy(&free_elem->kw_lock, pthread_lck_grp);
1745 pthread_kern->zfree(kwq_zone, free_elem);
1746 }
1747 }
1748
1749 /* responsible to free the waitqueues */
1750 void
1751 psynch_wq_cleanup(__unused void *param, __unused void * param1)
1752 {
1753 ksyn_wait_queue_t kwq;
1754 struct timeval t;
1755 int reschedule = 0;
1756 uint64_t deadline = 0;
1757 LIST_HEAD(, ksyn_wait_queue) freelist;
1758 LIST_INIT(&freelist);
1759
1760 pthread_list_lock();
1761
1762 microuptime(&t);
1763
1764 LIST_FOREACH(kwq, &pth_free_list, kw_list) {
1765 if (kwq->kw_iocount != 0 || kwq->kw_pre_rwwc != 0 || kwq->kw_inqueue != 0 || kwq->kw_pre_intrcount != 0) {
1766 // still in use
1767 continue;
1768 }
1769 __darwin_time_t diff = t.tv_sec - kwq->kw_ts.tv_sec;
1770 if (diff < 0)
1771 diff *= -1;
1772 if (diff >= KSYN_CLEANUP_DEADLINE) {
1773 kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
1774 LIST_REMOVE(kwq, kw_hash);
1775 LIST_REMOVE(kwq, kw_list);
1776 LIST_INSERT_HEAD(&freelist, kwq, kw_list);
1777 } else {
1778 reschedule = 1;
1779 }
1780
1781 }
1782 if (reschedule != 0) {
1783 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1784 deadline = tvtoabstime(&t);
1785 thread_call_enter_delayed(psynch_thcall, deadline);
1786 psynch_cleanupset = 1;
1787 } else {
1788 psynch_cleanupset = 0;
1789 }
1790 pthread_list_unlock();
1791
1792 while ((kwq = LIST_FIRST(&freelist)) != NULL) {
1793 LIST_REMOVE(kwq, kw_list);
1794 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
1795 pthread_kern->zfree(kwq_zone, kwq);
1796 }
1797 }
1798
1799 static int
1800 _wait_result_to_errno(wait_result_t result)
1801 {
1802 int res = 0;
1803 switch (result) {
1804 case THREAD_TIMED_OUT:
1805 res = ETIMEDOUT;
1806 break;
1807 case THREAD_INTERRUPTED:
1808 res = EINTR;
1809 break;
1810 }
1811 return res;
1812 }
1813
1814 int
1815 ksyn_wait(ksyn_wait_queue_t kwq,
1816 int kqi,
1817 uint32_t lockseq,
1818 int fit,
1819 uint64_t abstime,
1820 thread_continue_t continuation)
1821 {
1822 int res;
1823
1824 thread_t th = current_thread();
1825 uthread_t uth = pthread_kern->get_bsdthread_info(th);
1826 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1827 bzero(kwe, sizeof(*kwe));
1828 kwe->kwe_count = 1;
1829 kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
1830 kwe->kwe_state = KWE_THREAD_INWAIT;
1831 kwe->kwe_uth = uth;
1832 kwe->kwe_tid = thread_tid(th);
1833
1834 res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
1835 if (res != 0) {
1836 //panic("psynch_rw_wrlock: failed to enqueue\n"); // XXX
1837 ksyn_wqunlock(kwq);
1838 return res;
1839 }
1840
1841 assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0);
1842 ksyn_wqunlock(kwq);
1843
1844 kern_return_t ret;
1845 if (continuation == THREAD_CONTINUE_NULL) {
1846 ret = thread_block(NULL);
1847 } else {
1848 ret = thread_block_parameter(continuation, kwq);
1849
1850 // If thread_block_parameter returns (interrupted) call the
1851 // continuation manually to clean up.
1852 continuation(kwq, ret);
1853
1854 // NOT REACHED
1855 panic("ksyn_wait continuation returned");
1856 }
1857
1858 res = _wait_result_to_errno(ret);
1859 if (res != 0) {
1860 ksyn_wqlock(kwq);
1861 if (kwe->kwe_kwqqueue) {
1862 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1863 }
1864 ksyn_wqunlock(kwq);
1865 }
1866 return res;
1867 }
1868
1869 kern_return_t
1870 ksyn_signal(ksyn_wait_queue_t kwq,
1871 int kqi,
1872 ksyn_waitq_element_t kwe,
1873 uint32_t updateval)
1874 {
1875 kern_return_t ret;
1876
1877 // If no wait element was specified, wake the first.
1878 if (!kwe) {
1879 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[kqi].ksynq_kwelist);
1880 if (!kwe) {
1881 panic("ksyn_signal: panic signaling empty queue");
1882 }
1883 }
1884
1885 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1886 panic("ksyn_signal: panic signaling non-waiting element");
1887 }
1888
1889 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1890 kwe->kwe_psynchretval = updateval;
1891
1892 ret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval);
1893 if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
1894 panic("ksyn_signal: panic waking up thread %x\n", ret);
1895 }
1896 return ret;
1897 }
1898
1899 int
1900 ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
1901 {
1902 kern_return_t ret;
1903 vm_page_info_basic_data_t info;
1904 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
1905 ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
1906 if (ret != KERN_SUCCESS) {
1907 return EINVAL;
1908 }
1909
1910 if (objectp != NULL) {
1911 *objectp = (uint64_t)info.object_id;
1912 }
1913 if (offsetp != NULL) {
1914 *offsetp = (uint64_t)info.offset;
1915 }
1916
1917 return(0);
1918 }
1919
1920
1921 /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
1922 int
1923 kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *typep, uint32_t lowest[])
1924 {
1925 uint32_t kw_fr, kw_fwr, low;
1926 int type = 0, lowtype, typenum[2] = { 0 };
1927 uint32_t numbers[2] = { 0 };
1928 int count = 0, i;
1929
1930
1931 if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
1932 type |= PTH_RWSHFT_TYPE_READ;
1933 /* read entries are present */
1934 if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
1935 kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
1936 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0))
1937 kw_fr = premgen;
1938 } else
1939 kw_fr = premgen;
1940
1941 lowest[KSYN_QUEUE_READ] = kw_fr;
1942 numbers[count]= kw_fr;
1943 typenum[count] = PTH_RW_TYPE_READ;
1944 count++;
1945 } else
1946 lowest[KSYN_QUEUE_READ] = 0;
1947
1948 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
1949 type |= PTH_RWSHFT_TYPE_WRITE;
1950 /* read entries are present */
1951 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
1952 kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
1953 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0))
1954 kw_fwr = premgen;
1955 } else
1956 kw_fwr = premgen;
1957
1958 lowest[KSYN_QUEUE_WRITER] = kw_fwr;
1959 numbers[count]= kw_fwr;
1960 typenum[count] = PTH_RW_TYPE_WRITE;
1961 count++;
1962 } else
1963 lowest[KSYN_QUEUE_WRITER] = 0;
1964
1965 #if __TESTPANICS__
1966 if (count == 0)
1967 panic("nothing in the queue???\n");
1968 #endif /* __TESTPANICS__ */
1969
1970 low = numbers[0];
1971 lowtype = typenum[0];
1972 if (count > 1) {
1973 for (i = 1; i< count; i++) {
1974 if (is_seqlower(numbers[i] , low) != 0) {
1975 low = numbers[i];
1976 lowtype = typenum[i];
1977 }
1978 }
1979 }
1980 type |= lowtype;
1981
1982 if (typep != 0)
1983 *typep = type;
1984 return(0);
1985 }
1986
1987 /* wakeup readers to upto the writer limits */
1988 int
1989 ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp)
1990 {
1991 ksyn_queue_t kq;
1992 int failedwakeup = 0;
1993 int numwoken = 0;
1994 kern_return_t kret = KERN_SUCCESS;
1995 uint32_t lbits = 0;
1996
1997 lbits = updatebits;
1998
1999 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2000 while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
2001 kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
2002 if (kret == KERN_NOT_WAITING) {
2003 failedwakeup++;
2004 }
2005 numwoken++;
2006 }
2007
2008 if (wokenp != NULL)
2009 *wokenp = numwoken;
2010 return(failedwakeup);
2011 }
2012
2013
2014 /* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */
2015 int
2016 kwq_handle_unlock(ksyn_wait_queue_t kwq,
2017 __unused uint32_t mgen,
2018 uint32_t rw_wc,
2019 uint32_t *updatep,
2020 int flags,
2021 int *blockp,
2022 uint32_t premgen)
2023 {
2024 uint32_t low_writer, limitrdnum;
2025 int rwtype, error=0;
2026 int allreaders, failed;
2027 uint32_t updatebits=0, numneeded = 0;;
2028 int prepost = flags & KW_UNLOCK_PREPOST;
2029 thread_t preth = THREAD_NULL;
2030 ksyn_waitq_element_t kwe;
2031 uthread_t uth;
2032 thread_t th;
2033 int woken = 0;
2034 int block = 1;
2035 uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
2036 kern_return_t kret = KERN_SUCCESS;
2037 ksyn_queue_t kq;
2038 int curthreturns = 0;
2039
2040 if (prepost != 0) {
2041 preth = current_thread();
2042 }
2043
2044 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2045 kwq->kw_lastseqword = rw_wc;
2046 kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
2047 kwq->kw_overlapwatch = 0;
2048
2049 error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
2050 #if __TESTPANICS__
2051 if (error != 0)
2052 panic("rwunlock: cannot fails to slot next round of threads");
2053 #endif /* __TESTPANICS__ */
2054
2055 low_writer = lowest[KSYN_QUEUE_WRITER];
2056
2057 allreaders = 0;
2058 updatebits = 0;
2059
2060 switch (rwtype & PTH_RW_TYPE_MASK) {
2061 case PTH_RW_TYPE_READ: {
2062 // XXX
2063 /* what about the preflight which is LREAD or READ ?? */
2064 if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
2065 if (rwtype & PTH_RWSHFT_TYPE_WRITE) {
2066 updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
2067 }
2068 }
2069 limitrdnum = 0;
2070 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2071 limitrdnum = low_writer;
2072 } else {
2073 allreaders = 1;
2074 }
2075
2076 numneeded = 0;
2077
2078 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2079 limitrdnum = low_writer;
2080 numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
2081 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
2082 curthreturns = 1;
2083 numneeded += 1;
2084 }
2085 } else {
2086 // no writers at all
2087 // no other waiters only readers
2088 kwq->kw_overlapwatch = 1;
2089 numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
2090 if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
2091 curthreturns = 1;
2092 numneeded += 1;
2093 }
2094 }
2095
2096 updatebits += (numneeded << PTHRW_COUNT_SHIFT);
2097
2098 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2099
2100 if (curthreturns != 0) {
2101 block = 0;
2102 uth = current_uthread();
2103 kwe = pthread_kern->uthread_get_uukwe(uth);
2104 kwe->kwe_psynchretval = updatebits;
2105 }
2106
2107
2108 failed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, updatebits, &woken);
2109 if (failed != 0) {
2110 kwq->kw_pre_intrcount = failed; /* actually a count */
2111 kwq->kw_pre_intrseq = limitrdnum;
2112 kwq->kw_pre_intrretbits = updatebits;
2113 kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
2114 }
2115
2116 error = 0;
2117
2118 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0))
2119 panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
2120 }
2121 break;
2122
2123 case PTH_RW_TYPE_WRITE: {
2124
2125 /* only one thread is goin to be granted */
2126 updatebits |= (PTHRW_INC);
2127 updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
2128
2129 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
2130 block = 0;
2131 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
2132 updatebits |= PTH_RWL_WBIT;
2133 }
2134 th = preth;
2135 uth = pthread_kern->get_bsdthread_info(th);
2136 kwe = pthread_kern->uthread_get_uukwe(uth);
2137 kwe->kwe_psynchretval = updatebits;
2138 } else {
2139 /* we are not granting writelock to the preposting thread */
2140 /* if there are writers present or the preposting write thread then W bit is to be set */
2141 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 1 ||
2142 (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
2143 updatebits |= PTH_RWL_WBIT;
2144 }
2145 /* setup next in the queue */
2146 kret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, NULL, updatebits);
2147 if (kret == KERN_NOT_WAITING) {
2148 kwq->kw_pre_intrcount = 1; /* actually a count */
2149 kwq->kw_pre_intrseq = low_writer;
2150 kwq->kw_pre_intrretbits = updatebits;
2151 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
2152 }
2153 error = 0;
2154 }
2155 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2156 if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT))
2157 panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
2158 }
2159 break;
2160
2161 default:
2162 panic("rwunlock: invalid type for lock grants");
2163
2164 };
2165
2166 if (updatep != NULL)
2167 *updatep = updatebits;
2168 if (blockp != NULL)
2169 *blockp = block;
2170 return(error);
2171 }
2172
2173 /************* Indiv queue support routines ************************/
2174 void
2175 ksyn_queue_init(ksyn_queue_t kq)
2176 {
2177 TAILQ_INIT(&kq->ksynq_kwelist);
2178 kq->ksynq_count = 0;
2179 kq->ksynq_firstnum = 0;
2180 kq->ksynq_lastnum = 0;
2181 }
2182
2183 int
2184 ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int fit)
2185 {
2186 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2187 uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
2188 int res = 0;
2189
2190 if (kwe->kwe_kwqqueue != NULL) {
2191 panic("adding enqueued item to another queue");
2192 }
2193
2194 if (kq->ksynq_count == 0) {
2195 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2196 kq->ksynq_firstnum = lockseq;
2197 kq->ksynq_lastnum = lockseq;
2198 } else if (fit == FIRSTFIT) {
2199 /* TBD: if retry bit is set for mutex, add it to the head */
2200 /* firstfit, arriving order */
2201 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2202 if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2203 kq->ksynq_firstnum = lockseq;
2204 }
2205 if (is_seqhigher(lockseq, kq->ksynq_lastnum)) {
2206 kq->ksynq_lastnum = lockseq;
2207 }
2208 } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
2209 /* During prepost when a thread is getting cancelled, we could have two with same seq */
2210 res = EBUSY;
2211 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
2212 ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
2213 if (tmp != NULL && tmp->kwe_uth != NULL && pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
2214 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2215 res = 0;
2216 }
2217 }
2218 } else if (is_seqlower(kq->ksynq_lastnum, lockseq)) { // XXX is_seqhigher
2219 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2220 kq->ksynq_lastnum = lockseq;
2221 } else if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2222 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2223 kq->ksynq_firstnum = lockseq;
2224 } else {
2225 ksyn_waitq_element_t q_kwe, r_kwe;
2226
2227 res = ESRCH;
2228 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2229 if (is_seqhigher(q_kwe->kwe_lockseq, lockseq)) {
2230 TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list);
2231 res = 0;
2232 break;
2233 }
2234 }
2235 }
2236
2237 if (res == 0) {
2238 kwe->kwe_kwqqueue = kwq;
2239 kq->ksynq_count++;
2240 kwq->kw_inqueue++;
2241 update_low_high(kwq, lockseq);
2242 }
2243 return res;
2244 }
2245
2246 void
2247 ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe)
2248 {
2249 if (kq->ksynq_count == 0) {
2250 panic("removing item from empty queue");
2251 }
2252
2253 if (kwe->kwe_kwqqueue != kwq) {
2254 panic("removing item from wrong queue");
2255 }
2256
2257 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
2258 kwe->kwe_list.tqe_next = NULL;
2259 kwe->kwe_list.tqe_prev = NULL;
2260 kwe->kwe_kwqqueue = NULL;
2261
2262 if (--kq->ksynq_count > 0) {
2263 ksyn_waitq_element_t tmp;
2264 tmp = TAILQ_FIRST(&kq->ksynq_kwelist);
2265 kq->ksynq_firstnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2266 tmp = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head);
2267 kq->ksynq_lastnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2268 } else {
2269 kq->ksynq_firstnum = 0;
2270 kq->ksynq_lastnum = 0;
2271 }
2272
2273 if (--kwq->kw_inqueue > 0) {
2274 uint32_t curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
2275 if (kwq->kw_lowseq == curseq) {
2276 kwq->kw_lowseq = find_nextlowseq(kwq);
2277 }
2278 if (kwq->kw_highseq == curseq) {
2279 kwq->kw_highseq = find_nexthighseq(kwq);
2280 }
2281 } else {
2282 kwq->kw_lowseq = 0;
2283 kwq->kw_highseq = 0;
2284 }
2285 }
2286
2287 ksyn_waitq_element_t
2288 ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq)
2289 {
2290 ksyn_waitq_element_t kwe;
2291
2292 // XXX: should stop searching when higher sequence number is seen
2293 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2294 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) {
2295 return kwe;
2296 }
2297 }
2298 return NULL;
2299 }
2300
2301 /* find the thread at the target sequence (or a broadcast/prepost at or above) */
2302 ksyn_waitq_element_t
2303 ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
2304 {
2305 ksyn_waitq_element_t result = NULL;
2306 ksyn_waitq_element_t kwe;
2307 uint32_t lgen = (cgen & PTHRW_COUNT_MASK);
2308
2309 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2310 if (is_seqhigher_eq(kwe->kwe_lockseq, cgen)) {
2311 result = kwe;
2312
2313 // KWE_THREAD_INWAIT must be strictly equal
2314 if (kwe->kwe_state == KWE_THREAD_INWAIT && (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
2315 result = NULL;
2316 }
2317 break;
2318 }
2319 }
2320 return result;
2321 }
2322
2323 /* look for a thread at lockseq, a */
2324 ksyn_waitq_element_t
2325 ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq)
2326 {
2327 ksyn_waitq_element_t result = NULL;
2328 ksyn_waitq_element_t q_kwe, r_kwe;
2329
2330 // XXX
2331 /* case where wrap in the tail of the queue exists */
2332 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2333 if (q_kwe->kwe_state == KWE_THREAD_PREPOST) {
2334 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2335 return result;
2336 }
2337 }
2338 if (q_kwe->kwe_state == KWE_THREAD_PREPOST || q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
2339 /* match any prepost at our same uptoseq or any broadcast above */
2340 if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
2341 continue;
2342 }
2343 return q_kwe;
2344 } else if (q_kwe->kwe_state == KWE_THREAD_INWAIT) {
2345 /*
2346 * Match any (non-cancelled) thread at or below our upto sequence -
2347 * but prefer an exact match to our signal sequence (if present) to
2348 * keep exact matches happening.
2349 */
2350 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2351 return result;
2352 }
2353 if (q_kwe->kwe_kwqqueue == kwq) {
2354 if (!pthread_kern->uthread_is_cancelled(q_kwe->kwe_uth)) {
2355 /* if equal or higher than our signal sequence, return this one */
2356 if (is_seqhigher_eq(q_kwe->kwe_lockseq, signalseq)) {
2357 return q_kwe;
2358 }
2359
2360 /* otherwise, just remember this eligible thread and move on */
2361 if (result == NULL) {
2362 result = q_kwe;
2363 }
2364 }
2365 }
2366 } else {
2367 panic("ksyn_queue_find_signalseq(): unknown wait queue element type (%d)\n", q_kwe->kwe_state);
2368 }
2369 }
2370 return result;
2371 }
2372
2373 void
2374 ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
2375 {
2376 ksyn_waitq_element_t kwe;
2377 uint32_t tseq = upto & PTHRW_COUNT_MASK;
2378 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2379
2380 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
2381 if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
2382 break;
2383 }
2384 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2385 /*
2386 * This scenario is typically noticed when the cvar is
2387 * reinited and the new waiters are waiting. We can
2388 * return them as spurious wait so the cvar state gets
2389 * reset correctly.
2390 */
2391
2392 /* skip canceled ones */
2393 /* wake the rest */
2394 /* set M bit to indicate to waking CV to retun Inc val */
2395 (void)ksyn_signal(kwq, kqi, kwe, PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
2396 } else {
2397 ksyn_queue_remove_item(kwq, kq, kwe);
2398 pthread_kern->zfree(kwe_zone, kwe);
2399 kwq->kw_fakecount--;
2400 }
2401 }
2402 }
2403
2404 /*************************************************************************/
2405
2406 void
2407 update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
2408 {
2409 if (kwq->kw_inqueue == 1) {
2410 kwq->kw_lowseq = lockseq;
2411 kwq->kw_highseq = lockseq;
2412 } else {
2413 if (is_seqlower(lockseq, kwq->kw_lowseq)) {
2414 kwq->kw_lowseq = lockseq;
2415 }
2416 if (is_seqhigher(lockseq, kwq->kw_highseq)) {
2417 kwq->kw_highseq = lockseq;
2418 }
2419 }
2420 }
2421
2422 uint32_t
2423 find_nextlowseq(ksyn_wait_queue_t kwq)
2424 {
2425 uint32_t lowest = 0;
2426 int first = 1;
2427 int i;
2428
2429 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2430 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2431 uint32_t current = kwq->kw_ksynqueues[i].ksynq_firstnum;
2432 if (first || is_seqlower(current, lowest)) {
2433 lowest = current;
2434 first = 0;
2435 }
2436 }
2437 }
2438
2439 return lowest;
2440 }
2441
2442 uint32_t
2443 find_nexthighseq(ksyn_wait_queue_t kwq)
2444 {
2445 uint32_t highest = 0;
2446 int first = 1;
2447 int i;
2448
2449 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2450 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2451 uint32_t current = kwq->kw_ksynqueues[i].ksynq_lastnum;
2452 if (first || is_seqhigher(current, highest)) {
2453 highest = current;
2454 first = 0;
2455 }
2456 }
2457 }
2458
2459 return highest;
2460 }
2461
2462 int
2463 find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp)
2464 {
2465 int i;
2466 uint32_t count = 0;
2467
2468 for (i = 0; i< KSYN_QUEUE_MAX; i++) {
2469 count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto);
2470 if (count >= nwaiters) {
2471 break;
2472 }
2473 }
2474
2475 if (countp != NULL) {
2476 *countp = count;
2477 }
2478
2479 if (count == 0) {
2480 return 0;
2481 } else if (count >= nwaiters) {
2482 return 1;
2483 } else {
2484 return 0;
2485 }
2486 }
2487
2488
2489 uint32_t
2490 ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
2491 {
2492 uint32_t i = 0;
2493 ksyn_waitq_element_t kwe, newkwe;
2494
2495 if (kq->ksynq_count == 0 || is_seqhigher(kq->ksynq_firstnum, upto)) {
2496 return 0;
2497 }
2498 if (upto == kq->ksynq_firstnum) {
2499 return 1;
2500 }
2501 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2502 uint32_t curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK);
2503 if (is_seqhigher(curval, upto)) {
2504 break;
2505 }
2506 ++i;
2507 if (upto == curval) {
2508 break;
2509 }
2510 }
2511 return i;
2512 }
2513
2514 /* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */
2515 void
2516 ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
2517 {
2518 ksyn_waitq_element_t kwe, newkwe;
2519 uint32_t updatebits = 0;
2520 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
2521
2522 struct ksyn_queue kfreeq;
2523 ksyn_queue_init(&kfreeq);
2524
2525 retry:
2526 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2527 if (is_seqhigher(kwe->kwe_lockseq, upto)) {
2528 // outside our range
2529 break;
2530 }
2531
2532 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2533 // Wake only non-canceled threads waiting on this CV.
2534 if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
2535 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
2536 updatebits += PTHRW_INC;
2537 }
2538 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
2539 kwe->kwe_state == KWE_THREAD_PREPOST) {
2540 ksyn_queue_remove_item(ckwq, kq, kwe);
2541 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2542 ckwq->kw_fakecount--;
2543 } else {
2544 panic("unknown kwe state\n");
2545 }
2546 }
2547
2548 /* Need to enter a broadcast in the queue (if not already at L == S) */
2549
2550 if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
2551 newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
2552 if (newkwe == NULL) {
2553 ksyn_wqunlock(ckwq);
2554 newkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
2555 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2556 ksyn_wqlock(ckwq);
2557 goto retry;
2558 } else {
2559 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2560 ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
2561 }
2562 }
2563
2564 // free up any remaining things stumbled across above
2565 while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
2566 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2567 pthread_kern->zfree(kwe_zone, kwe);
2568 }
2569
2570 if (updatep != NULL) {
2571 *updatep = updatebits;
2572 }
2573 }
2574
2575 void
2576 ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
2577 {
2578 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
2579 if (ckwq->kw_inqueue != 0) {
2580 /* FREE THE QUEUE */
2581 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 0);
2582 #if __TESTPANICS__
2583 if (ckwq->kw_inqueue != 0)
2584 panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
2585 #endif /* __TESTPANICS__ */
2586 }
2587 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
2588 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
2589 *updatebits |= PTH_RWS_CV_CBIT;
2590 } else if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
2591 // only fake entries are present in the queue
2592 *updatebits |= PTH_RWS_CV_PBIT;
2593 }
2594 }
2595
2596 void
2597 psynch_zoneinit(void)
2598 {
2599 kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
2600 kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
2601 }