]> git.saurik.com Git - apple/libpthread.git/blob - kern/kern_synch.c
libpthread-301.30.1.tar.gz
[apple/libpthread.git] / kern / kern_synch.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * pthread_support.c
31 */
32
33 #include <sys/param.h>
34 #include <sys/queue.h>
35 #include <sys/resourcevar.h>
36 //#include <sys/proc_internal.h>
37 #include <sys/kauth.h>
38 #include <sys/systm.h>
39 #include <sys/timeb.h>
40 #include <sys/times.h>
41 #include <sys/time.h>
42 #include <sys/acct.h>
43 #include <sys/kernel.h>
44 #include <sys/wait.h>
45 #include <sys/signalvar.h>
46 #include <sys/syslog.h>
47 #include <sys/stat.h>
48 #include <sys/lock.h>
49 #include <sys/kdebug.h>
50 //#include <sys/sysproto.h>
51 //#include <sys/pthread_internal.h>
52 #include <sys/vm.h>
53 #include <sys/user.h>
54
55 #include <mach/mach_types.h>
56 #include <mach/vm_prot.h>
57 #include <mach/semaphore.h>
58 #include <mach/sync_policy.h>
59 #include <mach/task.h>
60 #include <kern/kern_types.h>
61 #include <kern/task.h>
62 #include <kern/clock.h>
63 #include <mach/kern_return.h>
64 #include <kern/thread.h>
65 #include <kern/sched_prim.h>
66 #include <kern/thread_call.h>
67 #include <kern/kalloc.h>
68 #include <kern/zalloc.h>
69 #include <kern/sched_prim.h>
70 #include <kern/processor.h>
71 #include <kern/block_hint.h>
72 //#include <kern/mach_param.h>
73 #include <mach/mach_vm.h>
74 #include <mach/mach_param.h>
75 #include <mach/thread_policy.h>
76 #include <mach/message.h>
77 #include <mach/port.h>
78 //#include <vm/vm_protos.h>
79 #include <vm/vm_map.h>
80 #include <mach/vm_region.h>
81
82 #include <libkern/OSAtomic.h>
83
84 #include <pexpert/pexpert.h>
85 #include <sys/pthread_shims.h>
86
87 #include "kern_internal.h"
88 #include "synch_internal.h"
89 #include "kern_trace.h"
90
91 typedef struct uthread *uthread_t;
92
93 //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
94 #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
95
96 #define ECVCERORR 256
97 #define ECVPERORR 512
98
99 lck_mtx_t *pthread_list_mlock;
100
101 #define PTH_HASHSIZE 100
102
103 static LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl;
104 static unsigned long pthhash;
105
106 static LIST_HEAD(, ksyn_wait_queue) pth_free_list;
107
108 static zone_t kwq_zone; /* zone for allocation of ksyn_queue */
109 static zone_t kwe_zone; /* zone for allocation of ksyn_waitq_element */
110
111 #define SEQFIT 0
112 #define FIRSTFIT 1
113
114 struct ksyn_queue {
115 TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist;
116 uint32_t ksynq_count; /* number of entries in queue */
117 uint32_t ksynq_firstnum; /* lowest seq in queue */
118 uint32_t ksynq_lastnum; /* highest seq in queue */
119 };
120 typedef struct ksyn_queue *ksyn_queue_t;
121
122 enum {
123 KSYN_QUEUE_READ = 0,
124 KSYN_QUEUE_WRITER,
125 KSYN_QUEUE_MAX,
126 };
127
128 struct ksyn_wait_queue {
129 LIST_ENTRY(ksyn_wait_queue) kw_hash;
130 LIST_ENTRY(ksyn_wait_queue) kw_list;
131 user_addr_t kw_addr;
132 uint64_t kw_owner;
133 uint64_t kw_object; /* object backing in shared mode */
134 uint64_t kw_offset; /* offset inside the object in shared mode */
135 int kw_pflags; /* flags under listlock protection */
136 struct timeval kw_ts; /* timeval need for upkeep before free */
137 int kw_iocount; /* inuse reference */
138 int kw_dropcount; /* current users unlocking... */
139
140 int kw_type; /* queue type like mutex, cvar, etc */
141 uint32_t kw_inqueue; /* num of waiters held */
142 uint32_t kw_fakecount; /* number of error/prepost fakes */
143 uint32_t kw_highseq; /* highest seq in the queue */
144 uint32_t kw_lowseq; /* lowest seq in the queue */
145 uint32_t kw_lword; /* L value from userland */
146 uint32_t kw_uword; /* U world value from userland */
147 uint32_t kw_sword; /* S word value from userland */
148 uint32_t kw_lastunlockseq; /* the last seq that unlocked */
149 /* for CV to be used as the seq kernel has seen so far */
150 #define kw_cvkernelseq kw_lastunlockseq
151 uint32_t kw_lastseqword; /* the last seq that unlocked */
152 /* for mutex and cvar we need to track I bit values */
153 uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */
154 uint32_t kw_overlapwatch; /* chance for overlaps */
155 uint32_t kw_pre_rwwc; /* prepost count */
156 uint32_t kw_pre_lockseq; /* prepost target seq */
157 uint32_t kw_pre_sseq; /* prepost target sword, in cvar used for mutexowned */
158 uint32_t kw_pre_intrcount; /* prepost of missed wakeup due to intrs */
159 uint32_t kw_pre_intrseq; /* prepost of missed wakeup limit seq */
160 uint32_t kw_pre_intrretbits; /* return bits value for missed wakeup threads */
161 uint32_t kw_pre_intrtype; /* type of failed wakueps*/
162
163 int kw_kflags;
164 int kw_qos_override; /* QoS of max waiter during contention period */
165 struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */
166 lck_mtx_t kw_lock; /* mutex lock protecting this structure */
167 };
168 typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
169
170 #define TID_ZERO (uint64_t)0
171
172 /* bits needed in handling the rwlock unlock */
173 #define PTH_RW_TYPE_READ 0x01
174 #define PTH_RW_TYPE_WRITE 0x04
175 #define PTH_RW_TYPE_MASK 0xff
176 #define PTH_RW_TYPE_SHIFT 8
177
178 #define PTH_RWSHFT_TYPE_READ 0x0100
179 #define PTH_RWSHFT_TYPE_WRITE 0x0400
180 #define PTH_RWSHFT_TYPE_MASK 0xff00
181
182 /*
183 * Mutex pshared attributes
184 */
185 #define PTHREAD_PROCESS_SHARED _PTHREAD_MTX_OPT_PSHARED
186 #define PTHREAD_PROCESS_PRIVATE 0x20
187 #define PTHREAD_PSHARED_FLAGS_MASK 0x30
188
189 /*
190 * Mutex policy attributes
191 */
192 #define _PTHREAD_MUTEX_POLICY_NONE 0
193 #define _PTHREAD_MUTEX_POLICY_FAIRSHARE 0x040 /* 1 */
194 #define _PTHREAD_MUTEX_POLICY_FIRSTFIT 0x080 /* 2 */
195 #define _PTHREAD_MUTEX_POLICY_REALTIME 0x0c0 /* 3 */
196 #define _PTHREAD_MUTEX_POLICY_ADAPTIVE 0x100 /* 4 */
197 #define _PTHREAD_MUTEX_POLICY_PRIPROTECT 0x140 /* 5 */
198 #define _PTHREAD_MUTEX_POLICY_PRIINHERIT 0x180 /* 6 */
199 #define PTHREAD_POLICY_FLAGS_MASK 0x1c0
200
201 /* pflags */
202 #define KSYN_WQ_INHASH 2
203 #define KSYN_WQ_SHARED 4
204 #define KSYN_WQ_WAITING 8 /* threads waiting for this wq to be available */
205 #define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */
206
207 /* kflags */
208 #define KSYN_KWF_INITCLEARED 1 /* the init status found and preposts cleared */
209 #define KSYN_KWF_ZEROEDOUT 2 /* the lword, etc are inited to 0 */
210 #define KSYN_KWF_QOS_APPLIED 4 /* QoS override applied to owner */
211
212 #define KSYN_CLEANUP_DEADLINE 10
213 static int psynch_cleanupset;
214 thread_call_t psynch_thcall;
215
216 #define KSYN_WQTYPE_INWAIT 0x1000
217 #define KSYN_WQTYPE_INDROP 0x2000
218 #define KSYN_WQTYPE_MTX 0x01
219 #define KSYN_WQTYPE_CVAR 0x02
220 #define KSYN_WQTYPE_RWLOCK 0x04
221 #define KSYN_WQTYPE_SEMA 0x08
222 #define KSYN_WQTYPE_MASK 0xff
223
224 #define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
225
226 #define KW_UNLOCK_PREPOST 0x01
227 #define KW_UNLOCK_PREPOST_READLOCK 0x08
228 #define KW_UNLOCK_PREPOST_WRLOCK 0x20
229
230 static void
231 CLEAR_PREPOST_BITS(ksyn_wait_queue_t kwq)
232 {
233 kwq->kw_pre_lockseq = 0;
234 kwq->kw_pre_sseq = PTHRW_RWS_INIT;
235 kwq->kw_pre_rwwc = 0;
236 }
237
238 static void
239 CLEAR_INTR_PREPOST_BITS(ksyn_wait_queue_t kwq)
240 {
241 kwq->kw_pre_intrcount = 0;
242 kwq->kw_pre_intrseq = 0;
243 kwq->kw_pre_intrretbits = 0;
244 kwq->kw_pre_intrtype = 0;
245 }
246
247 static void
248 CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
249 {
250 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
251 if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
252 panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
253 }
254 };
255 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
256 kwq->kw_nextseqword = PTHRW_RWS_INIT;
257 kwq->kw_overlapwatch = 0;
258 };
259 CLEAR_PREPOST_BITS(kwq);
260 kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
261 kwq->kw_lastseqword = PTHRW_RWS_INIT;
262 CLEAR_INTR_PREPOST_BITS(kwq);
263 kwq->kw_lword = 0;
264 kwq->kw_uword = 0;
265 kwq->kw_sword = PTHRW_RWS_INIT;
266 }
267
268 static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset);
269 static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
270 static void ksyn_wqrelease(ksyn_wait_queue_t mkwq, int qfreenow, int wqtype);
271 static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp);
272
273 static int _wait_result_to_errno(wait_result_t result);
274
275 static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t, block_hint_t);
276 static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t);
277 static void ksyn_freeallkwe(ksyn_queue_t kq);
278
279 static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t);
280 static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t, uint64_t tid, boolean_t prepost);
281 static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t);
282
283 static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
284
285 static void ksyn_queue_init(ksyn_queue_t kq);
286 static int ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int firstfit);
287 static void ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe);
288 static void ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all);
289
290 static void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq);
291 static uint32_t find_nextlowseq(ksyn_wait_queue_t kwq);
292 static uint32_t find_nexthighseq(ksyn_wait_queue_t kwq);
293 static int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp);
294
295 static uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto);
296
297 static ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen);
298 static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep);
299 static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
300 static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
301
302 static void psynch_cvcontinue(void *, wait_result_t);
303 static void psynch_mtxcontinue(void *, wait_result_t);
304
305 static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
306 static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
307 static ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq);
308
309 static void
310 UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc)
311 {
312 int sinit = ((rw_wc & PTH_RWS_CV_CBIT) != 0);
313
314 // assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
315
316 if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) {
317 /* the values of L,U and S are cleared out due to L==S in previous transition */
318 kwq->kw_lword = mgen;
319 kwq->kw_uword = ugen;
320 kwq->kw_sword = rw_wc;
321 kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT;
322 } else {
323 if (is_seqhigher(mgen, kwq->kw_lword)) {
324 kwq->kw_lword = mgen;
325 }
326 if (is_seqhigher(ugen, kwq->kw_uword)) {
327 kwq->kw_uword = ugen;
328 }
329 if (sinit && is_seqhigher(rw_wc, kwq->kw_sword)) {
330 kwq->kw_sword = rw_wc;
331 }
332 }
333 if (sinit && is_seqlower(kwq->kw_cvkernelseq, rw_wc)) {
334 kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK);
335 }
336 }
337
338 static void
339 pthread_list_lock(void)
340 {
341 lck_mtx_lock(pthread_list_mlock);
342 }
343
344 static void
345 pthread_list_unlock(void)
346 {
347 lck_mtx_unlock(pthread_list_mlock);
348 }
349
350 static void
351 ksyn_wqlock(ksyn_wait_queue_t kwq)
352 {
353
354 lck_mtx_lock(&kwq->kw_lock);
355 }
356
357 static void
358 ksyn_wqunlock(ksyn_wait_queue_t kwq)
359 {
360 lck_mtx_unlock(&kwq->kw_lock);
361 }
362
363
364 /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
365 static uint32_t
366 _psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, int flags)
367 {
368 kern_return_t ret;
369 uint32_t returnbits = 0;
370 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
371 uint32_t nextgen = (ugen + PTHRW_INC);
372
373 ksyn_wqlock(kwq);
374 kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
375 uint32_t updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT);
376
377 redrive:
378 if (firstfit) {
379 if (kwq->kw_inqueue == 0) {
380 // not set or the new lock sequence is higher
381 if (kwq->kw_pre_rwwc == 0 || is_seqhigher(mgen, kwq->kw_pre_lockseq)) {
382 kwq->kw_pre_lockseq = (mgen & PTHRW_COUNT_MASK);
383 }
384 kwq->kw_pre_rwwc = 1;
385 ksyn_mtx_drop_qos_override(kwq);
386 kwq->kw_owner = 0;
387 // indicate prepost content in kernel
388 returnbits = mgen | PTH_RWL_PBIT;
389 } else {
390 // signal first waiter
391 ret = ksyn_mtxsignal(kwq, NULL, updatebits);
392 if (ret == KERN_NOT_WAITING) {
393 goto redrive;
394 }
395 }
396 } else {
397 int prepost = 0;
398 if (kwq->kw_inqueue == 0) {
399 // No waiters in the queue.
400 prepost = 1;
401 } else {
402 uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum & PTHRW_COUNT_MASK);
403 if (low_writer == nextgen) {
404 /* next seq to be granted found */
405 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
406 ret = ksyn_mtxsignal(kwq, NULL, updatebits | PTH_RWL_MTX_WAIT);
407 if (ret == KERN_NOT_WAITING) {
408 /* interrupt post */
409 kwq->kw_pre_intrcount = 1;
410 kwq->kw_pre_intrseq = nextgen;
411 kwq->kw_pre_intrretbits = updatebits;
412 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
413 }
414
415 } else if (is_seqhigher(low_writer, nextgen)) {
416 prepost = 1;
417 } else {
418 //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
419 ksyn_waitq_element_t kwe;
420 kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen);
421 if (kwe != NULL) {
422 /* next seq to be granted found */
423 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
424 ret = ksyn_mtxsignal(kwq, kwe, updatebits | PTH_RWL_MTX_WAIT);
425 if (ret == KERN_NOT_WAITING) {
426 goto redrive;
427 }
428 } else {
429 prepost = 1;
430 }
431 }
432 }
433 if (prepost) {
434 ksyn_mtx_drop_qos_override(kwq);
435 kwq->kw_owner = 0;
436 if (++kwq->kw_pre_rwwc > 1) {
437 __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
438 } else {
439 kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
440 }
441 }
442 }
443
444 ksyn_wqunlock(kwq);
445 ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
446 return returnbits;
447 }
448
449 static int
450 _ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
451 {
452 int res = (lgenval & PTHRW_RWL_INIT) != 0;
453 if (res) {
454 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
455 /* first to notice the reset of the lock, clear preposts */
456 CLEAR_REINIT_BITS(kwq);
457 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
458 }
459 }
460 return res;
461 }
462
463 static int
464 _ksyn_handle_missed_wakeups(ksyn_wait_queue_t kwq,
465 uint32_t type,
466 uint32_t lockseq,
467 uint32_t *retval)
468 {
469 int res = 0;
470 if (kwq->kw_pre_intrcount != 0 &&
471 kwq->kw_pre_intrtype == type &&
472 (kwq->kw_pre_intrseq == 0 || is_seqlower_eq(lockseq, kwq->kw_pre_intrseq))) {
473 kwq->kw_pre_intrcount--;
474 *retval = kwq->kw_pre_intrretbits;
475 if (kwq->kw_pre_intrcount == 0) {
476 CLEAR_INTR_PREPOST_BITS(kwq);
477 }
478 res = 1;
479 }
480 return res;
481 }
482
483 static int
484 _ksyn_handle_overlap(ksyn_wait_queue_t kwq,
485 uint32_t lgenval,
486 uint32_t rw_wc,
487 uint32_t *retval)
488 {
489 int res = 0;
490
491 // check for overlap and no pending W bit (indicates writers)
492 if (kwq->kw_overlapwatch != 0 &&
493 (rw_wc & PTHRW_RWS_SAVEMASK) == 0 &&
494 (lgenval & PTH_RWL_WBIT) == 0) {
495 /* overlap is set, so no need to check for valid state for overlap */
496
497 if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
498 /* increase the next expected seq by one */
499 kwq->kw_nextseqword += PTHRW_INC;
500 /* set count by one & bits from the nextseq and add M bit */
501 *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
502 res = 1;
503 }
504 }
505 return res;
506 }
507
508 static int
509 _ksyn_handle_prepost(ksyn_wait_queue_t kwq,
510 uint32_t type,
511 uint32_t lockseq,
512 uint32_t *retval)
513 {
514 int res = 0;
515 if (kwq->kw_pre_rwwc != 0 && is_seqlower_eq(lockseq, kwq->kw_pre_lockseq)) {
516 kwq->kw_pre_rwwc--;
517 if (kwq->kw_pre_rwwc == 0) {
518 uint32_t preseq = kwq->kw_pre_lockseq;
519 uint32_t prerw_wc = kwq->kw_pre_sseq;
520 CLEAR_PREPOST_BITS(kwq);
521 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
522 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
523 }
524
525 int error, block;
526 uint32_t updatebits;
527 error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (type|KW_UNLOCK_PREPOST), &block, lockseq);
528 if (error != 0) {
529 panic("kwq_handle_unlock failed %d\n", error);
530 }
531
532 if (block == 0) {
533 *retval = updatebits;
534 res = 1;
535 }
536 }
537 }
538 return res;
539 }
540
541 /* Helpers for QoS override management. Only applies to mutexes */
542 static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t kwq, uint64_t tid, boolean_t prepost)
543 {
544 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
545 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
546 int waiter_qos = pthread_kern->proc_usynch_get_requested_thread_qos(current_uthread());
547
548 kwq->kw_qos_override = MAX(waiter_qos, kwq->kw_qos_override);
549
550 if (prepost && kwq->kw_inqueue == 0) {
551 // if there are no more waiters in the queue after the new (prepost-receiving) owner, we do not set an
552 // override, because the receiving owner may not re-enter the kernel to signal someone else if it is
553 // the last one to unlock. If other waiters end up entering the kernel, they will boost the owner
554 tid = 0;
555 }
556
557 if (tid != 0) {
558 if ((tid == kwq->kw_owner) && (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED)) {
559 // hint continues to be accurate, and a boost was already applied
560 pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, FALSE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
561 } else {
562 // either hint did not match previous owner, or hint was accurate but mutex was not contended enough for a boost previously
563 boolean_t boostsucceded;
564
565 boostsucceded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
566
567 if (boostsucceded) {
568 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
569 }
570
571 if (wasboosted && (tid != kwq->kw_owner) && (kwq->kw_owner != 0)) {
572 // the hint did not match the previous owner, so drop overrides
573 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
574 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
575 }
576 }
577 } else {
578 // new hint tells us that we don't know the owner, so drop any existing overrides
579 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
580 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
581
582 if (wasboosted && (kwq->kw_owner != 0)) {
583 // the hint did not match the previous owner, so drop overrides
584 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
585 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
586 }
587 }
588 }
589 }
590
591 static boolean_t
592 ksyn_mtx_transfer_qos_override_begin(ksyn_wait_queue_t kwq,
593 ksyn_waitq_element_t kwe, uint64_t *kw_owner)
594 {
595 boolean_t needs_commit = FALSE;
596 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
597 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
598
599 if (kwq->kw_inqueue > 1) {
600 boolean_t boostsucceeded;
601
602 // More than one waiter, so resource will still be contended after handing off ownership
603 boostsucceeded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), kwe->kwe_uth, 0, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
604
605 if (boostsucceeded) {
606 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
607 }
608 } else {
609 // kw_inqueue == 1 to get to this point, which means there will be no contention after this point
610 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
611 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
612 }
613
614 // Remove the override that was applied to kw_owner. There may have been a race,
615 // in which case it may not match the current thread
616 if (wasboosted) {
617 if (kwq->kw_owner == 0) {
618 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
619 } else if (thread_tid(current_thread()) != kwq->kw_owner) {
620 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
621 *kw_owner = kwq->kw_owner;
622 needs_commit = TRUE;
623 } else {
624 *kw_owner = 0;
625 needs_commit = TRUE;
626 }
627 }
628 }
629 return needs_commit;
630 }
631
632 static void
633 ksyn_mtx_transfer_qos_override_commit(ksyn_wait_queue_t kwq, uint64_t kw_owner)
634 {
635 struct uthread *uthread = kw_owner ? NULL : current_uthread();
636
637 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(
638 current_task(), uthread, kw_owner, kwq->kw_addr,
639 THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
640 }
641
642 static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t kwq)
643 {
644 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
645 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
646
647 // assume nobody else in queue if this routine was called
648 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
649 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
650
651 // Remove the override that was applied to kw_owner. There may have been a race,
652 // in which case it may not match the current thread
653 if (wasboosted) {
654 if (kwq->kw_owner == 0) {
655 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
656 } else if (thread_tid(current_thread()) != kwq->kw_owner) {
657 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
658 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
659 } else {
660 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
661 }
662 }
663 }
664 }
665
666 /*
667 * psynch_mutexwait: This system call is used for contended psynch mutexes to block.
668 */
669
670 int
671 _psynch_mutexwait(__unused proc_t p,
672 user_addr_t mutex,
673 uint32_t mgen,
674 uint32_t ugen,
675 uint64_t tid,
676 uint32_t flags,
677 uint32_t *retval)
678 {
679 ksyn_wait_queue_t kwq;
680 int error=0;
681 int ins_flags;
682
683 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
684 uint32_t updatebits = 0;
685
686 uint32_t lockseq = (mgen & PTHRW_COUNT_MASK);
687
688 if (firstfit == 0) {
689 ins_flags = SEQFIT;
690 } else {
691 /* first fit */
692 ins_flags = FIRSTFIT;
693 }
694
695 error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq);
696 if (error != 0) {
697 return(error);
698 }
699
700 ksyn_wqlock(kwq);
701
702 // mutexwait passes in an owner hint at the time userspace contended for the mutex, however, the
703 // owner tid in the userspace data structure may be unset or SWITCHING (-1), or it may correspond
704 // to a stale snapshot after the lock has subsequently been unlocked by another thread.
705 if (tid == 0) {
706 // contender came in before owner could write TID
707 tid = 0;
708 } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && is_seqlower(ugen, kwq->kw_lastunlockseq)) {
709 // owner is stale, someone has come in and unlocked since this contended read the TID, so
710 // assume what is known in the kernel is accurate
711 tid = kwq->kw_owner;
712 } else if (tid == PTHREAD_MTX_TID_SWITCHING) {
713 // userspace didn't know the owner because it was being unlocked, but that unlocker hasn't
714 // reached the kernel yet. So assume what is known in the kernel is accurate
715 tid = kwq->kw_owner;
716 } else {
717 // hint is being passed in for a specific thread, and we have no reason not to trust
718 // it (like the kernel unlock sequence being higher
719 }
720
721
722 if (_ksyn_handle_missed_wakeups(kwq, PTH_RW_TYPE_WRITE, lockseq, retval)) {
723 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
724 kwq->kw_owner = thread_tid(current_thread());
725
726 ksyn_wqunlock(kwq);
727 goto out;
728 }
729
730 if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) {
731 /* got preposted lock */
732 kwq->kw_pre_rwwc--;
733 if (kwq->kw_pre_rwwc == 0) {
734 CLEAR_PREPOST_BITS(kwq);
735 if (kwq->kw_inqueue == 0) {
736 updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
737 } else {
738 updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT);
739 }
740 updatebits &= ~PTH_RWL_MTX_WAIT;
741
742 if (updatebits == 0) {
743 __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
744 }
745
746 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
747 kwq->kw_owner = thread_tid(current_thread());
748
749 ksyn_wqunlock(kwq);
750 *retval = updatebits;
751 goto out;
752 } else {
753 __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
754 kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */
755 ksyn_wqunlock(kwq);
756 error = EINVAL;
757 goto out;
758 }
759 }
760
761 ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE);
762 kwq->kw_owner = tid;
763
764 error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue, kThreadWaitPThreadMutex);
765 // ksyn_wait drops wait queue lock
766 out:
767 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
768 return error;
769 }
770
771 void
772 psynch_mtxcontinue(void *parameter, wait_result_t result)
773 {
774 uthread_t uth = current_uthread();
775 ksyn_wait_queue_t kwq = parameter;
776 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
777
778 int error = _wait_result_to_errno(result);
779 if (error != 0) {
780 ksyn_wqlock(kwq);
781 if (kwe->kwe_kwqqueue) {
782 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
783 }
784 ksyn_wqunlock(kwq);
785 } else {
786 uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
787 pthread_kern->uthread_set_returnval(uth, updatebits);
788
789 if (updatebits == 0)
790 __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
791 }
792 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
793 pthread_kern->unix_syscall_return(error);
794 }
795
796 /*
797 * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
798 */
799 int
800 _psynch_mutexdrop(__unused proc_t p,
801 user_addr_t mutex,
802 uint32_t mgen,
803 uint32_t ugen,
804 uint64_t tid __unused,
805 uint32_t flags,
806 uint32_t *retval)
807 {
808 int res;
809 ksyn_wait_queue_t kwq;
810
811 res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
812 if (res == 0) {
813 uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
814 /* drops the kwq reference */
815 if (retval) {
816 *retval = updateval;
817 }
818 }
819
820 return res;
821 }
822
823 static kern_return_t
824 ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t updateval)
825 {
826 kern_return_t ret;
827 boolean_t needs_commit;
828 uint64_t kw_owner;
829
830 if (!kwe) {
831 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_kwelist);
832 if (!kwe) {
833 panic("ksyn_mtxsignal: panic signaling empty queue");
834 }
835 }
836
837 needs_commit = ksyn_mtx_transfer_qos_override_begin(kwq, kwe, &kw_owner);
838 kwq->kw_owner = kwe->kwe_tid;
839
840 ret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, kwe, updateval);
841
842 // if waking the new owner failed, remove any overrides
843 if (ret != KERN_SUCCESS) {
844 ksyn_mtx_drop_qos_override(kwq);
845 kwq->kw_owner = 0;
846 } else if (needs_commit) {
847 ksyn_mtx_transfer_qos_override_commit(kwq, kw_owner);
848 }
849 return ret;
850 }
851
852
853 static void
854 ksyn_prepost(ksyn_wait_queue_t kwq,
855 ksyn_waitq_element_t kwe,
856 uint32_t state,
857 uint32_t lockseq)
858 {
859 bzero(kwe, sizeof(*kwe));
860 kwe->kwe_state = state;
861 kwe->kwe_lockseq = lockseq;
862 kwe->kwe_count = 1;
863
864 (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITER, kwe, lockseq, SEQFIT);
865 kwq->kw_fakecount++;
866 }
867
868 static void
869 ksyn_cvsignal(ksyn_wait_queue_t ckwq,
870 thread_t th,
871 uint32_t uptoseq,
872 uint32_t signalseq,
873 uint32_t *updatebits,
874 int *broadcast,
875 ksyn_waitq_element_t *nkwep)
876 {
877 ksyn_waitq_element_t kwe = NULL;
878 ksyn_waitq_element_t nkwe = NULL;
879 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
880
881 uptoseq &= PTHRW_COUNT_MASK;
882
883 // Find the specified thread to wake.
884 if (th != THREAD_NULL) {
885 uthread_t uth = pthread_kern->get_bsdthread_info(th);
886 kwe = pthread_kern->uthread_get_uukwe(uth);
887 if (kwe->kwe_kwqqueue != ckwq ||
888 is_seqhigher(kwe->kwe_lockseq, uptoseq)) {
889 // Unless it's no longer waiting on this CV...
890 kwe = NULL;
891 // ...in which case we post a broadcast instead.
892 *broadcast = 1;
893 return;
894 }
895 }
896
897 // If no thread was specified, find any thread to wake (with the right
898 // sequence number).
899 while (th == THREAD_NULL) {
900 if (kwe == NULL) {
901 kwe = ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq);
902 }
903 if (kwe == NULL && nkwe == NULL) {
904 // No eligible entries; need to allocate a new
905 // entry to prepost. Loop to rescan after
906 // reacquiring the lock after allocation in
907 // case anything new shows up.
908 ksyn_wqunlock(ckwq);
909 nkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
910 ksyn_wqlock(ckwq);
911 } else {
912 break;
913 }
914 }
915
916 if (kwe != NULL) {
917 // If we found a thread to wake...
918 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
919 if (is_seqlower(kwe->kwe_lockseq, signalseq)) {
920 /*
921 * A valid thread in our range, but lower than our signal.
922 * Matching it may leave our match with nobody to wake it if/when
923 * it arrives (the signal originally meant for this thread might
924 * not successfully wake it).
925 *
926 * Convert to broadcast - may cause some spurious wakeups
927 * (allowed by spec), but avoids starvation (better choice).
928 */
929 *broadcast = 1;
930 } else {
931 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
932 *updatebits += PTHRW_INC;
933 }
934 } else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
935 // Merge with existing prepost at same uptoseq.
936 kwe->kwe_count += 1;
937 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
938 // Existing broadcasts subsume this signal.
939 } else {
940 panic("unknown kwe state\n");
941 }
942 if (nkwe) {
943 /*
944 * If we allocated a new kwe above but then found a different kwe to
945 * use then we need to deallocate the spare one.
946 */
947 pthread_kern->zfree(kwe_zone, nkwe);
948 nkwe = NULL;
949 }
950 } else if (nkwe != NULL) {
951 // ... otherwise, insert the newly allocated prepost.
952 ksyn_prepost(ckwq, nkwe, KWE_THREAD_PREPOST, uptoseq);
953 nkwe = NULL;
954 } else {
955 panic("failed to allocate kwe\n");
956 }
957
958 *nkwep = nkwe;
959 }
960
961 static int
962 __psynch_cvsignal(user_addr_t cv,
963 uint32_t cgen,
964 uint32_t cugen,
965 uint32_t csgen,
966 uint32_t flags,
967 int broadcast,
968 mach_port_name_t threadport,
969 uint32_t *retval)
970 {
971 int error = 0;
972 thread_t th = THREAD_NULL;
973 ksyn_wait_queue_t kwq;
974
975 uint32_t uptoseq = cgen & PTHRW_COUNT_MASK;
976 uint32_t fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
977
978 // validate sane L, U, and S values
979 if ((threadport == 0 && is_seqhigher(fromseq, uptoseq)) || is_seqhigher(csgen, uptoseq)) {
980 __FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n");
981 return EINVAL;
982 }
983
984 if (threadport != 0) {
985 th = port_name_to_thread((mach_port_name_t)threadport);
986 if (th == THREAD_NULL) {
987 return ESRCH;
988 }
989 }
990
991 error = ksyn_wqfind(cv, cgen, cugen, csgen, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &kwq);
992 if (error == 0) {
993 uint32_t updatebits = 0;
994 ksyn_waitq_element_t nkwe = NULL;
995
996 ksyn_wqlock(kwq);
997
998 // update L, U and S...
999 UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
1000
1001 if (!broadcast) {
1002 // No need to signal if the CV is already balanced.
1003 if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
1004 ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, &broadcast, &nkwe);
1005 }
1006 }
1007
1008 if (broadcast) {
1009 ksyn_handle_cvbroad(kwq, uptoseq, &updatebits);
1010 }
1011
1012 kwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
1013 // set C or P bits and free if needed
1014 ksyn_cvupdate_fixup(kwq, &updatebits);
1015 *retval = updatebits;
1016
1017 ksyn_wqunlock(kwq);
1018
1019 if (nkwe != NULL) {
1020 pthread_kern->zfree(kwe_zone, nkwe);
1021 }
1022
1023 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
1024 }
1025
1026 if (th != NULL) {
1027 thread_deallocate(th);
1028 }
1029
1030 return error;
1031 }
1032
1033 /*
1034 * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
1035 */
1036 int
1037 _psynch_cvbroad(__unused proc_t p,
1038 user_addr_t cv,
1039 uint64_t cvlsgen,
1040 uint64_t cvudgen,
1041 uint32_t flags,
1042 __unused user_addr_t mutex,
1043 __unused uint64_t mugen,
1044 __unused uint64_t tid,
1045 uint32_t *retval)
1046 {
1047 uint32_t diffgen = cvudgen & 0xffffffff;
1048 uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
1049 if (count > pthread_kern->get_task_threadmax()) {
1050 __FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n");
1051 return EBUSY;
1052 }
1053
1054 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1055 uint32_t cgen = cvlsgen & 0xffffffff;
1056 uint32_t cugen = (cvudgen >> 32) & 0xffffffff;
1057
1058 return __psynch_cvsignal(cv, cgen, cugen, csgen, flags, 1, 0, retval);
1059 }
1060
1061 /*
1062 * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
1063 */
1064 int
1065 _psynch_cvsignal(__unused proc_t p,
1066 user_addr_t cv,
1067 uint64_t cvlsgen,
1068 uint32_t cvugen,
1069 int threadport,
1070 __unused user_addr_t mutex,
1071 __unused uint64_t mugen,
1072 __unused uint64_t tid,
1073 uint32_t flags,
1074 uint32_t *retval)
1075 {
1076 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1077 uint32_t cgen = cvlsgen & 0xffffffff;
1078
1079 return __psynch_cvsignal(cv, cgen, cvugen, csgen, flags, 0, threadport, retval);
1080 }
1081
1082 /*
1083 * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
1084 */
1085 int
1086 _psynch_cvwait(__unused proc_t p,
1087 user_addr_t cv,
1088 uint64_t cvlsgen,
1089 uint32_t cvugen,
1090 user_addr_t mutex,
1091 uint64_t mugen,
1092 uint32_t flags,
1093 int64_t sec,
1094 uint32_t nsec,
1095 uint32_t *retval)
1096 {
1097 int error = 0;
1098 uint32_t updatebits = 0;
1099 ksyn_wait_queue_t ckwq = NULL;
1100 ksyn_waitq_element_t kwe, nkwe = NULL;
1101
1102 /* for conformance reasons */
1103 pthread_kern->__pthread_testcancel(0);
1104
1105 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1106 uint32_t cgen = cvlsgen & 0xffffffff;
1107 uint32_t ugen = (mugen >> 32) & 0xffffffff;
1108 uint32_t mgen = mugen & 0xffffffff;
1109
1110 uint32_t lockseq = (cgen & PTHRW_COUNT_MASK);
1111
1112 /*
1113 * In cvwait U word can be out of range as cv could be used only for
1114 * timeouts. However S word needs to be within bounds and validated at
1115 * user level as well.
1116 */
1117 if (is_seqhigher_eq(csgen, lockseq) != 0) {
1118 __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
1119 return EINVAL;
1120 }
1121
1122 error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
1123 if (error != 0) {
1124 return error;
1125 }
1126
1127 if (mutex != 0) {
1128 error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, NULL);
1129 if (error != 0) {
1130 goto out;
1131 }
1132 }
1133
1134 ksyn_wqlock(ckwq);
1135
1136 // update L, U and S...
1137 UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
1138
1139 /* Look for the sequence for prepost (or conflicting thread */
1140 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
1141 kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
1142 if (kwe != NULL) {
1143 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
1144 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) {
1145 /* we can safely consume a reference, so do so */
1146 if (--kwe->kwe_count == 0) {
1147 ksyn_queue_remove_item(ckwq, kq, kwe);
1148 ckwq->kw_fakecount--;
1149 nkwe = kwe;
1150 }
1151 } else {
1152 /*
1153 * consuming a prepost higher than our lock sequence is valid, but
1154 * can leave the higher thread without a match. Convert the entry
1155 * to a broadcast to compensate for this.
1156 */
1157 ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits);
1158 #if __TESTPANICS__
1159 if (updatebits != 0)
1160 panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", updatebits);
1161 #endif /* __TESTPANICS__ */
1162 }
1163 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
1164 // XXX
1165 // Nothing to do.
1166 } else if (kwe->kwe_state == KWE_THREAD_INWAIT) {
1167 __FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n");
1168 error = EBUSY;
1169 } else {
1170 panic("psync_cvwait: unexpected wait queue element type\n");
1171 }
1172
1173 if (error == 0) {
1174 updatebits = PTHRW_INC;
1175 ckwq->kw_sword += PTHRW_INC;
1176
1177 /* set C or P bits and free if needed */
1178 ksyn_cvupdate_fixup(ckwq, &updatebits);
1179 *retval = updatebits;
1180 }
1181 } else {
1182 uint64_t abstime = 0;
1183
1184 if (sec != 0 || (nsec & 0x3fffffff) != 0) {
1185 struct timespec ts;
1186 ts.tv_sec = (__darwin_time_t)sec;
1187 ts.tv_nsec = (nsec & 0x3fffffff);
1188 nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
1189 clock_absolutetime_interval_to_deadline(abstime, &abstime);
1190 }
1191
1192 error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue, kThreadWaitPThreadCondVar);
1193 // ksyn_wait drops wait queue lock
1194 }
1195
1196 ksyn_wqunlock(ckwq);
1197
1198 if (nkwe != NULL) {
1199 pthread_kern->zfree(kwe_zone, nkwe);
1200 }
1201 out:
1202 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1203 return error;
1204 }
1205
1206
1207 void
1208 psynch_cvcontinue(void *parameter, wait_result_t result)
1209 {
1210 uthread_t uth = current_uthread();
1211 ksyn_wait_queue_t ckwq = parameter;
1212 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1213
1214 int error = _wait_result_to_errno(result);
1215 if (error != 0) {
1216 ksyn_wqlock(ckwq);
1217 /* just in case it got woken up as we were granting */
1218 pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
1219
1220 if (kwe->kwe_kwqqueue) {
1221 ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
1222 }
1223 if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
1224 /* the condition var granted.
1225 * reset the error so that the thread returns back.
1226 */
1227 error = 0;
1228 /* no need to set any bits just return as cvsig/broad covers this */
1229 } else {
1230 ckwq->kw_sword += PTHRW_INC;
1231
1232 /* set C and P bits, in the local error */
1233 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
1234 error |= ECVCERORR;
1235 if (ckwq->kw_inqueue != 0) {
1236 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 1);
1237 }
1238 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
1239 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
1240 } else {
1241 /* everythig in the queue is a fake entry ? */
1242 if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
1243 error |= ECVPERORR;
1244 }
1245 }
1246 }
1247 ksyn_wqunlock(ckwq);
1248 } else {
1249 int val = 0;
1250 // PTH_RWL_MTX_WAIT is removed
1251 if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
1252 val = PTHRW_INC | PTH_RWS_CV_CBIT;
1253 }
1254 pthread_kern->uthread_set_returnval(uth, val);
1255 }
1256
1257 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1258 pthread_kern->unix_syscall_return(error);
1259 }
1260
1261 /*
1262 * psynch_cvclrprepost: This system call clears pending prepost if present.
1263 */
1264 int
1265 _psynch_cvclrprepost(__unused proc_t p,
1266 user_addr_t cv,
1267 uint32_t cvgen,
1268 uint32_t cvugen,
1269 uint32_t cvsgen,
1270 __unused uint32_t prepocnt,
1271 uint32_t preposeq,
1272 uint32_t flags,
1273 int *retval)
1274 {
1275 int error = 0;
1276 int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
1277 int wqtype = (mutex ? KSYN_WQTYPE_MTX : KSYN_WQTYPE_CVAR) | KSYN_WQTYPE_INDROP;
1278 ksyn_wait_queue_t kwq = NULL;
1279
1280 *retval = 0;
1281
1282 error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, &kwq);
1283 if (error != 0) {
1284 return error;
1285 }
1286
1287 ksyn_wqlock(kwq);
1288
1289 if (mutex) {
1290 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
1291 if (firstfit && kwq->kw_pre_rwwc != 0) {
1292 if (is_seqlower_eq(kwq->kw_pre_lockseq, cvgen)) {
1293 // clear prepost
1294 kwq->kw_pre_rwwc = 0;
1295 kwq->kw_pre_lockseq = 0;
1296 }
1297 }
1298 } else {
1299 ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITER, preposeq, 0);
1300 }
1301
1302 ksyn_wqunlock(kwq);
1303 ksyn_wqrelease(kwq, 1, wqtype);
1304 return error;
1305 }
1306
1307 /* ***************** pthread_rwlock ************************ */
1308
1309 static int
1310 __psynch_rw_lock(int type,
1311 user_addr_t rwlock,
1312 uint32_t lgenval,
1313 uint32_t ugenval,
1314 uint32_t rw_wc,
1315 int flags,
1316 uint32_t *retval)
1317 {
1318 int prepost_type, kqi;
1319
1320 if (type == PTH_RW_TYPE_READ) {
1321 prepost_type = KW_UNLOCK_PREPOST_READLOCK;
1322 kqi = KSYN_QUEUE_READ;
1323 } else {
1324 prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
1325 kqi = KSYN_QUEUE_WRITER;
1326 }
1327
1328 uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
1329
1330 int error;
1331 ksyn_wait_queue_t kwq;
1332 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
1333 if (error == 0) {
1334 ksyn_wqlock(kwq);
1335 _ksyn_check_init(kwq, lgenval);
1336 if (_ksyn_handle_missed_wakeups(kwq, type, lockseq, retval) ||
1337 // handle overlap first as they are not counted against pre_rwwc
1338 (type == PTH_RW_TYPE_READ && _ksyn_handle_overlap(kwq, lgenval, rw_wc, retval)) ||
1339 _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) {
1340 ksyn_wqunlock(kwq);
1341 } else {
1342 block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
1343 kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
1344 error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL, block_hint);
1345 // ksyn_wait drops wait queue lock
1346 if (error == 0) {
1347 uthread_t uth = current_uthread();
1348 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1349 *retval = kwe->kwe_psynchretval;
1350 }
1351 }
1352 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
1353 }
1354 return error;
1355 }
1356
1357 /*
1358 * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
1359 */
1360 int
1361 _psynch_rw_rdlock(__unused proc_t p,
1362 user_addr_t rwlock,
1363 uint32_t lgenval,
1364 uint32_t ugenval,
1365 uint32_t rw_wc,
1366 int flags,
1367 uint32_t *retval)
1368 {
1369 return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, flags, retval);
1370 }
1371
1372 /*
1373 * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
1374 */
1375 int
1376 _psynch_rw_longrdlock(__unused proc_t p,
1377 __unused user_addr_t rwlock,
1378 __unused uint32_t lgenval,
1379 __unused uint32_t ugenval,
1380 __unused uint32_t rw_wc,
1381 __unused int flags,
1382 __unused uint32_t *retval)
1383 {
1384 return ESRCH;
1385 }
1386
1387
1388 /*
1389 * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
1390 */
1391 int
1392 _psynch_rw_wrlock(__unused proc_t p,
1393 user_addr_t rwlock,
1394 uint32_t lgenval,
1395 uint32_t ugenval,
1396 uint32_t rw_wc,
1397 int flags,
1398 uint32_t *retval)
1399 {
1400 return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, rw_wc, flags, retval);
1401 }
1402
1403 /*
1404 * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
1405 */
1406 int
1407 _psynch_rw_yieldwrlock(__unused proc_t p,
1408 __unused user_addr_t rwlock,
1409 __unused uint32_t lgenval,
1410 __unused uint32_t ugenval,
1411 __unused uint32_t rw_wc,
1412 __unused int flags,
1413 __unused uint32_t *retval)
1414 {
1415 return ESRCH;
1416 }
1417
1418 /*
1419 * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate
1420 * reader/writer variety lock.
1421 */
1422 int
1423 _psynch_rw_unlock(__unused proc_t p,
1424 user_addr_t rwlock,
1425 uint32_t lgenval,
1426 uint32_t ugenval,
1427 uint32_t rw_wc,
1428 int flags,
1429 uint32_t *retval)
1430 {
1431 int error = 0;
1432 ksyn_wait_queue_t kwq;
1433 uint32_t updatebits = 0;
1434 int diff;
1435 uint32_t count = 0;
1436 uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
1437 int clearedkflags = 0;
1438
1439 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
1440 if (error != 0) {
1441 return(error);
1442 }
1443
1444 ksyn_wqlock(kwq);
1445 int isinit = _ksyn_check_init(kwq, lgenval);
1446
1447 /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
1448 if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
1449 error = 0;
1450 goto out;
1451 }
1452
1453 /* If L-U != num of waiters, then it needs to be preposted or spr */
1454 diff = find_diff(lgenval, ugenval);
1455
1456 if (find_seq_till(kwq, curgen, diff, &count) == 0) {
1457 if ((count == 0) || (count < (uint32_t)diff))
1458 goto prepost;
1459 }
1460
1461 /* no prepost and all threads are in place, reset the bit */
1462 if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
1463 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
1464 clearedkflags = 1;
1465 }
1466
1467 /* can handle unlock now */
1468
1469 CLEAR_PREPOST_BITS(kwq);
1470
1471 error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
1472 #if __TESTPANICS__
1473 if (error != 0)
1474 panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error);
1475 #endif /* __TESTPANICS__ */
1476 out:
1477 if (error == 0) {
1478 /* update bits?? */
1479 *retval = updatebits;
1480 }
1481
1482 // <rdar://problem/22244050> If any of the wakeups failed because they already
1483 // returned to userspace because of a signal then we need to ensure that the
1484 // reset state is not cleared when that thread returns. Otherwise,
1485 // _pthread_rwlock_lock will clear the interrupted state before it is read.
1486 if (clearedkflags != 0 && kwq->kw_pre_intrcount > 0) {
1487 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
1488 }
1489
1490 ksyn_wqunlock(kwq);
1491 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
1492
1493 return(error);
1494
1495 prepost:
1496 /* update if the new seq is higher than prev prepost, or first set */
1497 if (is_rws_setseq(kwq->kw_pre_sseq) ||
1498 is_seqhigher_eq(rw_wc, kwq->kw_pre_sseq)) {
1499 kwq->kw_pre_rwwc = (diff - count);
1500 kwq->kw_pre_lockseq = curgen;
1501 kwq->kw_pre_sseq = rw_wc;
1502 updatebits = lgenval; /* let this not do unlock handling */
1503 }
1504 error = 0;
1505 goto out;
1506 }
1507
1508
1509 /* ************************************************************************** */
1510 void
1511 pth_global_hashinit(void)
1512 {
1513 pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash);
1514 }
1515
1516 void
1517 _pth_proc_hashinit(proc_t p)
1518 {
1519 void *ptr = hashinit(PTH_HASHSIZE, M_PCB, &pthhash);
1520 if (ptr == NULL) {
1521 panic("pth_proc_hashinit: hash init returned 0\n");
1522 }
1523
1524 pthread_kern->proc_set_pthhash(p, ptr);
1525 }
1526
1527
1528 static int
1529 ksyn_wq_hash_lookup(user_addr_t uaddr,
1530 proc_t p,
1531 int flags,
1532 ksyn_wait_queue_t *out_kwq,
1533 struct pthhashhead **out_hashptr,
1534 uint64_t *out_object,
1535 uint64_t *out_offset)
1536 {
1537 int res = 0;
1538 ksyn_wait_queue_t kwq;
1539 uint64_t object = 0, offset = 0;
1540 struct pthhashhead *hashptr;
1541 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1542 hashptr = pth_glob_hashtbl;
1543 res = ksyn_findobj(uaddr, &object, &offset);
1544 if (res == 0) {
1545 LIST_FOREACH(kwq, &hashptr[object & pthhash], kw_hash) {
1546 if (kwq->kw_object == object && kwq->kw_offset == offset) {
1547 break;
1548 }
1549 }
1550 } else {
1551 kwq = NULL;
1552 }
1553 } else {
1554 hashptr = pthread_kern->proc_get_pthhash(p);
1555 LIST_FOREACH(kwq, &hashptr[uaddr & pthhash], kw_hash) {
1556 if (kwq->kw_addr == uaddr) {
1557 break;
1558 }
1559 }
1560 }
1561 *out_kwq = kwq;
1562 *out_object = object;
1563 *out_offset = offset;
1564 *out_hashptr = hashptr;
1565 return res;
1566 }
1567
1568 void
1569 _pth_proc_hashdelete(proc_t p)
1570 {
1571 struct pthhashhead * hashptr;
1572 ksyn_wait_queue_t kwq;
1573 unsigned long hashsize = pthhash + 1;
1574 unsigned long i;
1575
1576 hashptr = pthread_kern->proc_get_pthhash(p);
1577 pthread_kern->proc_set_pthhash(p, NULL);
1578 if (hashptr == NULL) {
1579 return;
1580 }
1581
1582 pthread_list_lock();
1583 for(i= 0; i < hashsize; i++) {
1584 while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) {
1585 if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) {
1586 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1587 LIST_REMOVE(kwq, kw_hash);
1588 }
1589 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1590 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1591 LIST_REMOVE(kwq, kw_list);
1592 }
1593 pthread_list_unlock();
1594 /* release fake entries if present for cvars */
1595 if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
1596 ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
1597 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
1598 pthread_kern->zfree(kwq_zone, kwq);
1599 pthread_list_lock();
1600 }
1601 }
1602 pthread_list_unlock();
1603 FREE(hashptr, M_PROC);
1604 }
1605
1606 /* no lock held for this as the waitqueue is getting freed */
1607 void
1608 ksyn_freeallkwe(ksyn_queue_t kq)
1609 {
1610 ksyn_waitq_element_t kwe;
1611 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
1612 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
1613 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1614 pthread_kern->zfree(kwe_zone, kwe);
1615 }
1616 }
1617 }
1618
1619 /* find kernel waitqueue, if not present create one. Grants a reference */
1620 int
1621 ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int flags, int wqtype, ksyn_wait_queue_t *kwqp)
1622 {
1623 int res = 0;
1624 ksyn_wait_queue_t kwq = NULL;
1625 ksyn_wait_queue_t nkwq = NULL;
1626 struct pthhashhead *hashptr;
1627 proc_t p = current_proc();
1628
1629 uint64_t object = 0, offset = 0;
1630 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1631 res = ksyn_findobj(uaddr, &object, &offset);
1632 hashptr = pth_glob_hashtbl;
1633 } else {
1634 hashptr = pthread_kern->proc_get_pthhash(p);
1635 }
1636
1637 while (res == 0) {
1638 pthread_list_lock();
1639 res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, &object, &offset);
1640 if (res != 0) {
1641 pthread_list_unlock();
1642 break;
1643 }
1644 if (kwq == NULL && nkwq == NULL) {
1645 // Drop the lock to allocate a new kwq and retry.
1646 pthread_list_unlock();
1647
1648 nkwq = (ksyn_wait_queue_t)pthread_kern->zalloc(kwq_zone);
1649 bzero(nkwq, sizeof(struct ksyn_wait_queue));
1650 int i;
1651 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
1652 ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
1653 }
1654 lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
1655 continue;
1656 } else if (kwq == NULL && nkwq != NULL) {
1657 // Still not found, add the new kwq to the hash.
1658 kwq = nkwq;
1659 nkwq = NULL; // Don't free.
1660 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1661 kwq->kw_pflags |= KSYN_WQ_SHARED;
1662 LIST_INSERT_HEAD(&hashptr[object & pthhash], kwq, kw_hash);
1663 } else {
1664 LIST_INSERT_HEAD(&hashptr[uaddr & pthhash], kwq, kw_hash);
1665 }
1666 kwq->kw_pflags |= KSYN_WQ_INHASH;
1667 } else if (kwq != NULL) {
1668 // Found an existing kwq, use it.
1669 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1670 LIST_REMOVE(kwq, kw_list);
1671 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1672 }
1673 if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
1674 if (kwq->kw_inqueue == 0 && kwq->kw_pre_rwwc == 0 && kwq->kw_pre_intrcount == 0) {
1675 if (kwq->kw_iocount == 0) {
1676 kwq->kw_type = 0; // mark for reinitialization
1677 } else if (kwq->kw_iocount == 1 && kwq->kw_dropcount == kwq->kw_iocount) {
1678 /* if all users are unlockers then wait for it to finish */
1679 kwq->kw_pflags |= KSYN_WQ_WAITING;
1680 // Drop the lock and wait for the kwq to be free.
1681 (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
1682 continue;
1683 } else {
1684 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
1685 res = EINVAL;
1686 }
1687 } else {
1688 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
1689 res = EINVAL;
1690 }
1691 }
1692 }
1693 if (res == 0) {
1694 if (kwq->kw_type == 0) {
1695 kwq->kw_addr = uaddr;
1696 kwq->kw_object = object;
1697 kwq->kw_offset = offset;
1698 kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
1699 CLEAR_REINIT_BITS(kwq);
1700 kwq->kw_lword = mgen;
1701 kwq->kw_uword = ugen;
1702 kwq->kw_sword = sgen;
1703 kwq->kw_owner = 0;
1704 kwq->kw_kflags = 0;
1705 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
1706 }
1707 kwq->kw_iocount++;
1708 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1709 kwq->kw_dropcount++;
1710 }
1711 }
1712 pthread_list_unlock();
1713 break;
1714 }
1715 if (kwqp != NULL) {
1716 *kwqp = kwq;
1717 }
1718 if (nkwq) {
1719 lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
1720 pthread_kern->zfree(kwq_zone, nkwq);
1721 }
1722 return res;
1723 }
1724
1725 /* Reference from find is dropped here. Starts the free process if needed */
1726 void
1727 ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
1728 {
1729 uint64_t deadline;
1730 ksyn_wait_queue_t free_elem = NULL;
1731
1732 pthread_list_lock();
1733 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1734 kwq->kw_dropcount--;
1735 }
1736 if (--kwq->kw_iocount == 0) {
1737 if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
1738 /* some one is waiting for the waitqueue, wake them up */
1739 kwq->kw_pflags &= ~KSYN_WQ_WAITING;
1740 wakeup(&kwq->kw_pflags);
1741 }
1742
1743 if (kwq->kw_pre_rwwc == 0 && kwq->kw_inqueue == 0 && kwq->kw_pre_intrcount == 0) {
1744 if (qfreenow == 0) {
1745 microuptime(&kwq->kw_ts);
1746 LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
1747 kwq->kw_pflags |= KSYN_WQ_FLIST;
1748 if (psynch_cleanupset == 0) {
1749 struct timeval t;
1750 microuptime(&t);
1751 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1752 deadline = tvtoabstime(&t);
1753 thread_call_enter_delayed(psynch_thcall, deadline);
1754 psynch_cleanupset = 1;
1755 }
1756 } else {
1757 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1758 LIST_REMOVE(kwq, kw_hash);
1759 free_elem = kwq;
1760 }
1761 }
1762 }
1763 pthread_list_unlock();
1764 if (free_elem != NULL) {
1765 lck_mtx_destroy(&free_elem->kw_lock, pthread_lck_grp);
1766 pthread_kern->zfree(kwq_zone, free_elem);
1767 }
1768 }
1769
1770 /* responsible to free the waitqueues */
1771 void
1772 psynch_wq_cleanup(__unused void *param, __unused void * param1)
1773 {
1774 ksyn_wait_queue_t kwq;
1775 struct timeval t;
1776 int reschedule = 0;
1777 uint64_t deadline = 0;
1778 LIST_HEAD(, ksyn_wait_queue) freelist;
1779 LIST_INIT(&freelist);
1780
1781 pthread_list_lock();
1782
1783 microuptime(&t);
1784
1785 LIST_FOREACH(kwq, &pth_free_list, kw_list) {
1786 if (kwq->kw_iocount != 0 || kwq->kw_pre_rwwc != 0 || kwq->kw_inqueue != 0 || kwq->kw_pre_intrcount != 0) {
1787 // still in use
1788 continue;
1789 }
1790 __darwin_time_t diff = t.tv_sec - kwq->kw_ts.tv_sec;
1791 if (diff < 0)
1792 diff *= -1;
1793 if (diff >= KSYN_CLEANUP_DEADLINE) {
1794 kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
1795 LIST_REMOVE(kwq, kw_hash);
1796 LIST_REMOVE(kwq, kw_list);
1797 LIST_INSERT_HEAD(&freelist, kwq, kw_list);
1798 } else {
1799 reschedule = 1;
1800 }
1801
1802 }
1803 if (reschedule != 0) {
1804 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1805 deadline = tvtoabstime(&t);
1806 thread_call_enter_delayed(psynch_thcall, deadline);
1807 psynch_cleanupset = 1;
1808 } else {
1809 psynch_cleanupset = 0;
1810 }
1811 pthread_list_unlock();
1812
1813 while ((kwq = LIST_FIRST(&freelist)) != NULL) {
1814 LIST_REMOVE(kwq, kw_list);
1815 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
1816 pthread_kern->zfree(kwq_zone, kwq);
1817 }
1818 }
1819
1820 static int
1821 _wait_result_to_errno(wait_result_t result)
1822 {
1823 int res = 0;
1824 switch (result) {
1825 case THREAD_TIMED_OUT:
1826 res = ETIMEDOUT;
1827 break;
1828 case THREAD_INTERRUPTED:
1829 res = EINTR;
1830 break;
1831 }
1832 return res;
1833 }
1834
1835 int
1836 ksyn_wait(ksyn_wait_queue_t kwq,
1837 int kqi,
1838 uint32_t lockseq,
1839 int fit,
1840 uint64_t abstime,
1841 thread_continue_t continuation,
1842 block_hint_t block_hint)
1843 {
1844 int res;
1845
1846 thread_t th = current_thread();
1847 uthread_t uth = pthread_kern->get_bsdthread_info(th);
1848 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1849 bzero(kwe, sizeof(*kwe));
1850 kwe->kwe_count = 1;
1851 kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
1852 kwe->kwe_state = KWE_THREAD_INWAIT;
1853 kwe->kwe_uth = uth;
1854 kwe->kwe_tid = thread_tid(th);
1855
1856 res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
1857 if (res != 0) {
1858 //panic("psynch_rw_wrlock: failed to enqueue\n"); // XXX
1859 ksyn_wqunlock(kwq);
1860 return res;
1861 }
1862
1863 thread_set_pending_block_hint(th, block_hint);
1864 assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0);
1865 ksyn_wqunlock(kwq);
1866
1867 kern_return_t ret;
1868 if (continuation == THREAD_CONTINUE_NULL) {
1869 ret = thread_block(NULL);
1870 } else {
1871 ret = thread_block_parameter(continuation, kwq);
1872
1873 // If thread_block_parameter returns (interrupted) call the
1874 // continuation manually to clean up.
1875 continuation(kwq, ret);
1876
1877 // NOT REACHED
1878 panic("ksyn_wait continuation returned");
1879 }
1880
1881 res = _wait_result_to_errno(ret);
1882 if (res != 0) {
1883 ksyn_wqlock(kwq);
1884 if (kwe->kwe_kwqqueue) {
1885 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1886 }
1887 ksyn_wqunlock(kwq);
1888 }
1889 return res;
1890 }
1891
1892 kern_return_t
1893 ksyn_signal(ksyn_wait_queue_t kwq,
1894 int kqi,
1895 ksyn_waitq_element_t kwe,
1896 uint32_t updateval)
1897 {
1898 kern_return_t ret;
1899
1900 // If no wait element was specified, wake the first.
1901 if (!kwe) {
1902 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[kqi].ksynq_kwelist);
1903 if (!kwe) {
1904 panic("ksyn_signal: panic signaling empty queue");
1905 }
1906 }
1907
1908 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1909 panic("ksyn_signal: panic signaling non-waiting element");
1910 }
1911
1912 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1913 kwe->kwe_psynchretval = updateval;
1914
1915 ret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval);
1916 if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
1917 panic("ksyn_signal: panic waking up thread %x\n", ret);
1918 }
1919 return ret;
1920 }
1921
1922 int
1923 ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
1924 {
1925 kern_return_t ret;
1926 vm_page_info_basic_data_t info;
1927 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
1928 ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
1929 if (ret != KERN_SUCCESS) {
1930 return EINVAL;
1931 }
1932
1933 if (objectp != NULL) {
1934 *objectp = (uint64_t)info.object_id;
1935 }
1936 if (offsetp != NULL) {
1937 *offsetp = (uint64_t)info.offset;
1938 }
1939
1940 return(0);
1941 }
1942
1943
1944 /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
1945 int
1946 kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *typep, uint32_t lowest[])
1947 {
1948 uint32_t kw_fr, kw_fwr, low;
1949 int type = 0, lowtype, typenum[2] = { 0 };
1950 uint32_t numbers[2] = { 0 };
1951 int count = 0, i;
1952
1953
1954 if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
1955 type |= PTH_RWSHFT_TYPE_READ;
1956 /* read entries are present */
1957 if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
1958 kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
1959 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0))
1960 kw_fr = premgen;
1961 } else
1962 kw_fr = premgen;
1963
1964 lowest[KSYN_QUEUE_READ] = kw_fr;
1965 numbers[count]= kw_fr;
1966 typenum[count] = PTH_RW_TYPE_READ;
1967 count++;
1968 } else
1969 lowest[KSYN_QUEUE_READ] = 0;
1970
1971 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
1972 type |= PTH_RWSHFT_TYPE_WRITE;
1973 /* read entries are present */
1974 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
1975 kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
1976 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0))
1977 kw_fwr = premgen;
1978 } else
1979 kw_fwr = premgen;
1980
1981 lowest[KSYN_QUEUE_WRITER] = kw_fwr;
1982 numbers[count]= kw_fwr;
1983 typenum[count] = PTH_RW_TYPE_WRITE;
1984 count++;
1985 } else
1986 lowest[KSYN_QUEUE_WRITER] = 0;
1987
1988 #if __TESTPANICS__
1989 if (count == 0)
1990 panic("nothing in the queue???\n");
1991 #endif /* __TESTPANICS__ */
1992
1993 low = numbers[0];
1994 lowtype = typenum[0];
1995 if (count > 1) {
1996 for (i = 1; i< count; i++) {
1997 if (is_seqlower(numbers[i] , low) != 0) {
1998 low = numbers[i];
1999 lowtype = typenum[i];
2000 }
2001 }
2002 }
2003 type |= lowtype;
2004
2005 if (typep != 0)
2006 *typep = type;
2007 return(0);
2008 }
2009
2010 /* wakeup readers to upto the writer limits */
2011 int
2012 ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp)
2013 {
2014 ksyn_queue_t kq;
2015 int failedwakeup = 0;
2016 int numwoken = 0;
2017 kern_return_t kret = KERN_SUCCESS;
2018 uint32_t lbits = 0;
2019
2020 lbits = updatebits;
2021
2022 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2023 while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
2024 kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
2025 if (kret == KERN_NOT_WAITING) {
2026 failedwakeup++;
2027 }
2028 numwoken++;
2029 }
2030
2031 if (wokenp != NULL)
2032 *wokenp = numwoken;
2033 return(failedwakeup);
2034 }
2035
2036
2037 /* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */
2038 int
2039 kwq_handle_unlock(ksyn_wait_queue_t kwq,
2040 __unused uint32_t mgen,
2041 uint32_t rw_wc,
2042 uint32_t *updatep,
2043 int flags,
2044 int *blockp,
2045 uint32_t premgen)
2046 {
2047 uint32_t low_writer, limitrdnum;
2048 int rwtype, error=0;
2049 int allreaders, failed;
2050 uint32_t updatebits=0, numneeded = 0;;
2051 int prepost = flags & KW_UNLOCK_PREPOST;
2052 thread_t preth = THREAD_NULL;
2053 ksyn_waitq_element_t kwe;
2054 uthread_t uth;
2055 thread_t th;
2056 int woken = 0;
2057 int block = 1;
2058 uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
2059 kern_return_t kret = KERN_SUCCESS;
2060 ksyn_queue_t kq;
2061 int curthreturns = 0;
2062
2063 if (prepost != 0) {
2064 preth = current_thread();
2065 }
2066
2067 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2068 kwq->kw_lastseqword = rw_wc;
2069 kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
2070 kwq->kw_overlapwatch = 0;
2071
2072 error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
2073 #if __TESTPANICS__
2074 if (error != 0)
2075 panic("rwunlock: cannot fails to slot next round of threads");
2076 #endif /* __TESTPANICS__ */
2077
2078 low_writer = lowest[KSYN_QUEUE_WRITER];
2079
2080 allreaders = 0;
2081 updatebits = 0;
2082
2083 switch (rwtype & PTH_RW_TYPE_MASK) {
2084 case PTH_RW_TYPE_READ: {
2085 // XXX
2086 /* what about the preflight which is LREAD or READ ?? */
2087 if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
2088 if (rwtype & PTH_RWSHFT_TYPE_WRITE) {
2089 updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
2090 }
2091 }
2092 limitrdnum = 0;
2093 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2094 limitrdnum = low_writer;
2095 } else {
2096 allreaders = 1;
2097 }
2098
2099 numneeded = 0;
2100
2101 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2102 limitrdnum = low_writer;
2103 numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
2104 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
2105 curthreturns = 1;
2106 numneeded += 1;
2107 }
2108 } else {
2109 // no writers at all
2110 // no other waiters only readers
2111 kwq->kw_overlapwatch = 1;
2112 numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
2113 if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
2114 curthreturns = 1;
2115 numneeded += 1;
2116 }
2117 }
2118
2119 updatebits += (numneeded << PTHRW_COUNT_SHIFT);
2120
2121 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2122
2123 if (curthreturns != 0) {
2124 block = 0;
2125 uth = current_uthread();
2126 kwe = pthread_kern->uthread_get_uukwe(uth);
2127 kwe->kwe_psynchretval = updatebits;
2128 }
2129
2130
2131 failed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, updatebits, &woken);
2132 if (failed != 0) {
2133 kwq->kw_pre_intrcount = failed; /* actually a count */
2134 kwq->kw_pre_intrseq = limitrdnum;
2135 kwq->kw_pre_intrretbits = updatebits;
2136 kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
2137 }
2138
2139 error = 0;
2140
2141 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0))
2142 panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
2143 }
2144 break;
2145
2146 case PTH_RW_TYPE_WRITE: {
2147
2148 /* only one thread is goin to be granted */
2149 updatebits |= (PTHRW_INC);
2150 updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
2151
2152 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
2153 block = 0;
2154 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
2155 updatebits |= PTH_RWL_WBIT;
2156 }
2157 th = preth;
2158 uth = pthread_kern->get_bsdthread_info(th);
2159 kwe = pthread_kern->uthread_get_uukwe(uth);
2160 kwe->kwe_psynchretval = updatebits;
2161 } else {
2162 /* we are not granting writelock to the preposting thread */
2163 /* if there are writers present or the preposting write thread then W bit is to be set */
2164 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 1 ||
2165 (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
2166 updatebits |= PTH_RWL_WBIT;
2167 }
2168 /* setup next in the queue */
2169 kret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, NULL, updatebits);
2170 if (kret == KERN_NOT_WAITING) {
2171 kwq->kw_pre_intrcount = 1; /* actually a count */
2172 kwq->kw_pre_intrseq = low_writer;
2173 kwq->kw_pre_intrretbits = updatebits;
2174 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
2175 }
2176 error = 0;
2177 }
2178 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2179 if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT))
2180 panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
2181 }
2182 break;
2183
2184 default:
2185 panic("rwunlock: invalid type for lock grants");
2186
2187 };
2188
2189 if (updatep != NULL)
2190 *updatep = updatebits;
2191 if (blockp != NULL)
2192 *blockp = block;
2193 return(error);
2194 }
2195
2196 /************* Indiv queue support routines ************************/
2197 void
2198 ksyn_queue_init(ksyn_queue_t kq)
2199 {
2200 TAILQ_INIT(&kq->ksynq_kwelist);
2201 kq->ksynq_count = 0;
2202 kq->ksynq_firstnum = 0;
2203 kq->ksynq_lastnum = 0;
2204 }
2205
2206 int
2207 ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int fit)
2208 {
2209 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2210 uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
2211 int res = 0;
2212
2213 if (kwe->kwe_kwqqueue != NULL) {
2214 panic("adding enqueued item to another queue");
2215 }
2216
2217 if (kq->ksynq_count == 0) {
2218 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2219 kq->ksynq_firstnum = lockseq;
2220 kq->ksynq_lastnum = lockseq;
2221 } else if (fit == FIRSTFIT) {
2222 /* TBD: if retry bit is set for mutex, add it to the head */
2223 /* firstfit, arriving order */
2224 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2225 if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2226 kq->ksynq_firstnum = lockseq;
2227 }
2228 if (is_seqhigher(lockseq, kq->ksynq_lastnum)) {
2229 kq->ksynq_lastnum = lockseq;
2230 }
2231 } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
2232 /* During prepost when a thread is getting cancelled, we could have two with same seq */
2233 res = EBUSY;
2234 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
2235 ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
2236 if (tmp != NULL && tmp->kwe_uth != NULL && pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
2237 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2238 res = 0;
2239 }
2240 }
2241 } else if (is_seqlower(kq->ksynq_lastnum, lockseq)) { // XXX is_seqhigher
2242 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2243 kq->ksynq_lastnum = lockseq;
2244 } else if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2245 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2246 kq->ksynq_firstnum = lockseq;
2247 } else {
2248 ksyn_waitq_element_t q_kwe, r_kwe;
2249
2250 res = ESRCH;
2251 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2252 if (is_seqhigher(q_kwe->kwe_lockseq, lockseq)) {
2253 TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list);
2254 res = 0;
2255 break;
2256 }
2257 }
2258 }
2259
2260 if (res == 0) {
2261 kwe->kwe_kwqqueue = kwq;
2262 kq->ksynq_count++;
2263 kwq->kw_inqueue++;
2264 update_low_high(kwq, lockseq);
2265 }
2266 return res;
2267 }
2268
2269 void
2270 ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe)
2271 {
2272 if (kq->ksynq_count == 0) {
2273 panic("removing item from empty queue");
2274 }
2275
2276 if (kwe->kwe_kwqqueue != kwq) {
2277 panic("removing item from wrong queue");
2278 }
2279
2280 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
2281 kwe->kwe_list.tqe_next = NULL;
2282 kwe->kwe_list.tqe_prev = NULL;
2283 kwe->kwe_kwqqueue = NULL;
2284
2285 if (--kq->ksynq_count > 0) {
2286 ksyn_waitq_element_t tmp;
2287 tmp = TAILQ_FIRST(&kq->ksynq_kwelist);
2288 kq->ksynq_firstnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2289 tmp = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head);
2290 kq->ksynq_lastnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2291 } else {
2292 kq->ksynq_firstnum = 0;
2293 kq->ksynq_lastnum = 0;
2294 }
2295
2296 if (--kwq->kw_inqueue > 0) {
2297 uint32_t curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
2298 if (kwq->kw_lowseq == curseq) {
2299 kwq->kw_lowseq = find_nextlowseq(kwq);
2300 }
2301 if (kwq->kw_highseq == curseq) {
2302 kwq->kw_highseq = find_nexthighseq(kwq);
2303 }
2304 } else {
2305 kwq->kw_lowseq = 0;
2306 kwq->kw_highseq = 0;
2307 }
2308 }
2309
2310 ksyn_waitq_element_t
2311 ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq)
2312 {
2313 ksyn_waitq_element_t kwe;
2314
2315 // XXX: should stop searching when higher sequence number is seen
2316 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2317 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) {
2318 return kwe;
2319 }
2320 }
2321 return NULL;
2322 }
2323
2324 /* find the thread at the target sequence (or a broadcast/prepost at or above) */
2325 ksyn_waitq_element_t
2326 ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
2327 {
2328 ksyn_waitq_element_t result = NULL;
2329 ksyn_waitq_element_t kwe;
2330 uint32_t lgen = (cgen & PTHRW_COUNT_MASK);
2331
2332 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2333 if (is_seqhigher_eq(kwe->kwe_lockseq, cgen)) {
2334 result = kwe;
2335
2336 // KWE_THREAD_INWAIT must be strictly equal
2337 if (kwe->kwe_state == KWE_THREAD_INWAIT && (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
2338 result = NULL;
2339 }
2340 break;
2341 }
2342 }
2343 return result;
2344 }
2345
2346 /* look for a thread at lockseq, a */
2347 ksyn_waitq_element_t
2348 ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq)
2349 {
2350 ksyn_waitq_element_t result = NULL;
2351 ksyn_waitq_element_t q_kwe, r_kwe;
2352
2353 // XXX
2354 /* case where wrap in the tail of the queue exists */
2355 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2356 if (q_kwe->kwe_state == KWE_THREAD_PREPOST) {
2357 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2358 return result;
2359 }
2360 }
2361 if (q_kwe->kwe_state == KWE_THREAD_PREPOST || q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
2362 /* match any prepost at our same uptoseq or any broadcast above */
2363 if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
2364 continue;
2365 }
2366 return q_kwe;
2367 } else if (q_kwe->kwe_state == KWE_THREAD_INWAIT) {
2368 /*
2369 * Match any (non-cancelled) thread at or below our upto sequence -
2370 * but prefer an exact match to our signal sequence (if present) to
2371 * keep exact matches happening.
2372 */
2373 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2374 return result;
2375 }
2376 if (q_kwe->kwe_kwqqueue == kwq) {
2377 if (!pthread_kern->uthread_is_cancelled(q_kwe->kwe_uth)) {
2378 /* if equal or higher than our signal sequence, return this one */
2379 if (is_seqhigher_eq(q_kwe->kwe_lockseq, signalseq)) {
2380 return q_kwe;
2381 }
2382
2383 /* otherwise, just remember this eligible thread and move on */
2384 if (result == NULL) {
2385 result = q_kwe;
2386 }
2387 }
2388 }
2389 } else {
2390 panic("ksyn_queue_find_signalseq(): unknown wait queue element type (%d)\n", q_kwe->kwe_state);
2391 }
2392 }
2393 return result;
2394 }
2395
2396 void
2397 ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
2398 {
2399 ksyn_waitq_element_t kwe;
2400 uint32_t tseq = upto & PTHRW_COUNT_MASK;
2401 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2402
2403 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
2404 if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
2405 break;
2406 }
2407 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2408 /*
2409 * This scenario is typically noticed when the cvar is
2410 * reinited and the new waiters are waiting. We can
2411 * return them as spurious wait so the cvar state gets
2412 * reset correctly.
2413 */
2414
2415 /* skip canceled ones */
2416 /* wake the rest */
2417 /* set M bit to indicate to waking CV to retun Inc val */
2418 (void)ksyn_signal(kwq, kqi, kwe, PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
2419 } else {
2420 ksyn_queue_remove_item(kwq, kq, kwe);
2421 pthread_kern->zfree(kwe_zone, kwe);
2422 kwq->kw_fakecount--;
2423 }
2424 }
2425 }
2426
2427 /*************************************************************************/
2428
2429 void
2430 update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
2431 {
2432 if (kwq->kw_inqueue == 1) {
2433 kwq->kw_lowseq = lockseq;
2434 kwq->kw_highseq = lockseq;
2435 } else {
2436 if (is_seqlower(lockseq, kwq->kw_lowseq)) {
2437 kwq->kw_lowseq = lockseq;
2438 }
2439 if (is_seqhigher(lockseq, kwq->kw_highseq)) {
2440 kwq->kw_highseq = lockseq;
2441 }
2442 }
2443 }
2444
2445 uint32_t
2446 find_nextlowseq(ksyn_wait_queue_t kwq)
2447 {
2448 uint32_t lowest = 0;
2449 int first = 1;
2450 int i;
2451
2452 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2453 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2454 uint32_t current = kwq->kw_ksynqueues[i].ksynq_firstnum;
2455 if (first || is_seqlower(current, lowest)) {
2456 lowest = current;
2457 first = 0;
2458 }
2459 }
2460 }
2461
2462 return lowest;
2463 }
2464
2465 uint32_t
2466 find_nexthighseq(ksyn_wait_queue_t kwq)
2467 {
2468 uint32_t highest = 0;
2469 int first = 1;
2470 int i;
2471
2472 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2473 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2474 uint32_t current = kwq->kw_ksynqueues[i].ksynq_lastnum;
2475 if (first || is_seqhigher(current, highest)) {
2476 highest = current;
2477 first = 0;
2478 }
2479 }
2480 }
2481
2482 return highest;
2483 }
2484
2485 int
2486 find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp)
2487 {
2488 int i;
2489 uint32_t count = 0;
2490
2491 for (i = 0; i< KSYN_QUEUE_MAX; i++) {
2492 count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto);
2493 if (count >= nwaiters) {
2494 break;
2495 }
2496 }
2497
2498 if (countp != NULL) {
2499 *countp = count;
2500 }
2501
2502 if (count == 0) {
2503 return 0;
2504 } else if (count >= nwaiters) {
2505 return 1;
2506 } else {
2507 return 0;
2508 }
2509 }
2510
2511
2512 uint32_t
2513 ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
2514 {
2515 uint32_t i = 0;
2516 ksyn_waitq_element_t kwe, newkwe;
2517
2518 if (kq->ksynq_count == 0 || is_seqhigher(kq->ksynq_firstnum, upto)) {
2519 return 0;
2520 }
2521 if (upto == kq->ksynq_firstnum) {
2522 return 1;
2523 }
2524 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2525 uint32_t curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK);
2526 if (is_seqhigher(curval, upto)) {
2527 break;
2528 }
2529 ++i;
2530 if (upto == curval) {
2531 break;
2532 }
2533 }
2534 return i;
2535 }
2536
2537 /* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */
2538 void
2539 ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
2540 {
2541 ksyn_waitq_element_t kwe, newkwe;
2542 uint32_t updatebits = 0;
2543 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
2544
2545 struct ksyn_queue kfreeq;
2546 ksyn_queue_init(&kfreeq);
2547
2548 retry:
2549 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2550 if (is_seqhigher(kwe->kwe_lockseq, upto)) {
2551 // outside our range
2552 break;
2553 }
2554
2555 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2556 // Wake only non-canceled threads waiting on this CV.
2557 if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
2558 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
2559 updatebits += PTHRW_INC;
2560 }
2561 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
2562 kwe->kwe_state == KWE_THREAD_PREPOST) {
2563 ksyn_queue_remove_item(ckwq, kq, kwe);
2564 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2565 ckwq->kw_fakecount--;
2566 } else {
2567 panic("unknown kwe state\n");
2568 }
2569 }
2570
2571 /* Need to enter a broadcast in the queue (if not already at L == S) */
2572
2573 if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
2574 newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
2575 if (newkwe == NULL) {
2576 ksyn_wqunlock(ckwq);
2577 newkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
2578 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2579 ksyn_wqlock(ckwq);
2580 goto retry;
2581 } else {
2582 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2583 ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
2584 }
2585 }
2586
2587 // free up any remaining things stumbled across above
2588 while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
2589 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2590 pthread_kern->zfree(kwe_zone, kwe);
2591 }
2592
2593 if (updatep != NULL) {
2594 *updatep = updatebits;
2595 }
2596 }
2597
2598 void
2599 ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
2600 {
2601 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
2602 if (ckwq->kw_inqueue != 0) {
2603 /* FREE THE QUEUE */
2604 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 0);
2605 #if __TESTPANICS__
2606 if (ckwq->kw_inqueue != 0)
2607 panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
2608 #endif /* __TESTPANICS__ */
2609 }
2610 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
2611 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
2612 *updatebits |= PTH_RWS_CV_CBIT;
2613 } else if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
2614 // only fake entries are present in the queue
2615 *updatebits |= PTH_RWS_CV_PBIT;
2616 }
2617 }
2618
2619 void
2620 psynch_zoneinit(void)
2621 {
2622 kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
2623 kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
2624 }
2625
2626 void *
2627 _pthread_get_thread_kwq(thread_t thread)
2628 {
2629 assert(thread);
2630 struct uthread * uthread = pthread_kern->get_bsdthread_info(thread);
2631 assert(uthread);
2632 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uthread);
2633 assert(kwe);
2634 ksyn_wait_queue_t kwq = kwe->kwe_kwqqueue;
2635 return kwq;
2636 }
2637
2638 /* This function is used by stackshot to determine why a thread is blocked, and report
2639 * who owns the object that the thread is blocked on. It should *only* be called if the
2640 * `block_hint' field in the relevant thread's struct is populated with something related
2641 * to pthread sync objects.
2642 */
2643 void
2644 _pthread_find_owner(thread_t thread, struct stackshot_thread_waitinfo * waitinfo)
2645 {
2646 ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread);
2647 switch (waitinfo->wait_type) {
2648 case kThreadWaitPThreadMutex:
2649 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX);
2650 waitinfo->owner = kwq->kw_owner;
2651 waitinfo->context = kwq->kw_addr;
2652 break;
2653 /* Owner of rwlock not stored in kernel space due to races. Punt
2654 * and hope that the userspace address is helpful enough. */
2655 case kThreadWaitPThreadRWLockRead:
2656 case kThreadWaitPThreadRWLockWrite:
2657 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK);
2658 waitinfo->owner = 0;
2659 waitinfo->context = kwq->kw_addr;
2660 break;
2661 /* Condvars don't have owners, so just give the userspace address. */
2662 case kThreadWaitPThreadCondVar:
2663 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
2664 waitinfo->owner = 0;
2665 waitinfo->context = kwq->kw_addr;
2666 break;
2667 case kThreadWaitNone:
2668 default:
2669 waitinfo->owner = 0;
2670 waitinfo->context = 0;
2671 break;
2672 }
2673 }