]> git.saurik.com Git - apple/libpthread.git/blob - kern/kern_synch.c
cd698ec2b11bd60636a6eb88e21e7186a337c959
[apple/libpthread.git] / kern / kern_synch.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * pthread_support.c
31 */
32
33 #include <sys/param.h>
34 #include <sys/queue.h>
35 #include <sys/resourcevar.h>
36 //#include <sys/proc_internal.h>
37 #include <sys/kauth.h>
38 #include <sys/systm.h>
39 #include <sys/timeb.h>
40 #include <sys/times.h>
41 #include <sys/time.h>
42 #include <sys/acct.h>
43 #include <sys/kernel.h>
44 #include <sys/wait.h>
45 #include <sys/signalvar.h>
46 #include <sys/syslog.h>
47 #include <sys/stat.h>
48 #include <sys/lock.h>
49 #include <sys/kdebug.h>
50 //#include <sys/sysproto.h>
51 //#include <sys/pthread_internal.h>
52 #include <sys/vm.h>
53 #include <sys/user.h>
54
55 #include <mach/mach_types.h>
56 #include <mach/vm_prot.h>
57 #include <mach/semaphore.h>
58 #include <mach/sync_policy.h>
59 #include <mach/task.h>
60 #include <kern/kern_types.h>
61 #include <kern/task.h>
62 #include <kern/clock.h>
63 #include <mach/kern_return.h>
64 #include <kern/thread.h>
65 #include <kern/sched_prim.h>
66 #include <kern/thread_call.h>
67 #include <kern/kalloc.h>
68 #include <kern/zalloc.h>
69 #include <kern/sched_prim.h>
70 #include <kern/processor.h>
71 #include <kern/wait_queue.h>
72 //#include <kern/mach_param.h>
73 #include <mach/mach_vm.h>
74 #include <mach/mach_param.h>
75 #include <mach/thread_policy.h>
76 #include <mach/message.h>
77 #include <mach/port.h>
78 //#include <vm/vm_protos.h>
79 #include <vm/vm_map.h>
80 #include <mach/vm_region.h>
81
82 #include <libkern/OSAtomic.h>
83
84 #include <pexpert/pexpert.h>
85 #include <sys/pthread_shims.h>
86
87 #include "kern_internal.h"
88 #include "synch_internal.h"
89 #include "kern_trace.h"
90
91 typedef struct uthread *uthread_t;
92
93 //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
94 #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
95
96 #define ECVCERORR 256
97 #define ECVPERORR 512
98
99 lck_mtx_t *pthread_list_mlock;
100
101 #define PTH_HASHSIZE 100
102
103 static LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl;
104 static unsigned long pthhash;
105
106 static LIST_HEAD(, ksyn_wait_queue) pth_free_list;
107
108 static zone_t kwq_zone; /* zone for allocation of ksyn_queue */
109 static zone_t kwe_zone; /* zone for allocation of ksyn_waitq_element */
110
111 #define SEQFIT 0
112 #define FIRSTFIT 1
113
114 struct ksyn_queue {
115 TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist;
116 uint32_t ksynq_count; /* number of entries in queue */
117 uint32_t ksynq_firstnum; /* lowest seq in queue */
118 uint32_t ksynq_lastnum; /* highest seq in queue */
119 };
120 typedef struct ksyn_queue *ksyn_queue_t;
121
122 enum {
123 KSYN_QUEUE_READ = 0,
124 KSYN_QUEUE_WRITER,
125 KSYN_QUEUE_MAX,
126 };
127
128 struct ksyn_wait_queue {
129 LIST_ENTRY(ksyn_wait_queue) kw_hash;
130 LIST_ENTRY(ksyn_wait_queue) kw_list;
131 user_addr_t kw_addr;
132 uint64_t kw_owner;
133 uint64_t kw_object; /* object backing in shared mode */
134 uint64_t kw_offset; /* offset inside the object in shared mode */
135 int kw_pflags; /* flags under listlock protection */
136 struct timeval kw_ts; /* timeval need for upkeep before free */
137 int kw_iocount; /* inuse reference */
138 int kw_dropcount; /* current users unlocking... */
139
140 int kw_type; /* queue type like mutex, cvar, etc */
141 uint32_t kw_inqueue; /* num of waiters held */
142 uint32_t kw_fakecount; /* number of error/prepost fakes */
143 uint32_t kw_highseq; /* highest seq in the queue */
144 uint32_t kw_lowseq; /* lowest seq in the queue */
145 uint32_t kw_lword; /* L value from userland */
146 uint32_t kw_uword; /* U world value from userland */
147 uint32_t kw_sword; /* S word value from userland */
148 uint32_t kw_lastunlockseq; /* the last seq that unlocked */
149 /* for CV to be used as the seq kernel has seen so far */
150 #define kw_cvkernelseq kw_lastunlockseq
151 uint32_t kw_lastseqword; /* the last seq that unlocked */
152 /* for mutex and cvar we need to track I bit values */
153 uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */
154 uint32_t kw_overlapwatch; /* chance for overlaps */
155 uint32_t kw_pre_rwwc; /* prepost count */
156 uint32_t kw_pre_lockseq; /* prepost target seq */
157 uint32_t kw_pre_sseq; /* prepost target sword, in cvar used for mutexowned */
158 uint32_t kw_pre_intrcount; /* prepost of missed wakeup due to intrs */
159 uint32_t kw_pre_intrseq; /* prepost of missed wakeup limit seq */
160 uint32_t kw_pre_intrretbits; /* return bits value for missed wakeup threads */
161 uint32_t kw_pre_intrtype; /* type of failed wakueps*/
162
163 int kw_kflags;
164 int kw_qos_override; /* QoS of max waiter during contention period */
165 struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */
166 lck_mtx_t kw_lock; /* mutex lock protecting this structure */
167 };
168 typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
169
170 #define TID_ZERO (uint64_t)0
171
172 /* bits needed in handling the rwlock unlock */
173 #define PTH_RW_TYPE_READ 0x01
174 #define PTH_RW_TYPE_WRITE 0x04
175 #define PTH_RW_TYPE_MASK 0xff
176 #define PTH_RW_TYPE_SHIFT 8
177
178 #define PTH_RWSHFT_TYPE_READ 0x0100
179 #define PTH_RWSHFT_TYPE_WRITE 0x0400
180 #define PTH_RWSHFT_TYPE_MASK 0xff00
181
182 /*
183 * Mutex pshared attributes
184 */
185 #define PTHREAD_PROCESS_SHARED _PTHREAD_MTX_OPT_PSHARED
186 #define PTHREAD_PROCESS_PRIVATE 0x20
187 #define PTHREAD_PSHARED_FLAGS_MASK 0x30
188
189 /*
190 * Mutex policy attributes
191 */
192 #define _PTHREAD_MUTEX_POLICY_NONE 0
193 #define _PTHREAD_MUTEX_POLICY_FAIRSHARE 0x040 /* 1 */
194 #define _PTHREAD_MUTEX_POLICY_FIRSTFIT 0x080 /* 2 */
195 #define _PTHREAD_MUTEX_POLICY_REALTIME 0x0c0 /* 3 */
196 #define _PTHREAD_MUTEX_POLICY_ADAPTIVE 0x100 /* 4 */
197 #define _PTHREAD_MUTEX_POLICY_PRIPROTECT 0x140 /* 5 */
198 #define _PTHREAD_MUTEX_POLICY_PRIINHERIT 0x180 /* 6 */
199 #define PTHREAD_POLICY_FLAGS_MASK 0x1c0
200
201 /* pflags */
202 #define KSYN_WQ_INHASH 2
203 #define KSYN_WQ_SHARED 4
204 #define KSYN_WQ_WAITING 8 /* threads waiting for this wq to be available */
205 #define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */
206
207 /* kflags */
208 #define KSYN_KWF_INITCLEARED 1 /* the init status found and preposts cleared */
209 #define KSYN_KWF_ZEROEDOUT 2 /* the lword, etc are inited to 0 */
210 #define KSYN_KWF_QOS_APPLIED 4 /* QoS override applied to owner */
211
212 #define KSYN_CLEANUP_DEADLINE 10
213 static int psynch_cleanupset;
214 thread_call_t psynch_thcall;
215
216 #define KSYN_WQTYPE_INWAIT 0x1000
217 #define KSYN_WQTYPE_INDROP 0x2000
218 #define KSYN_WQTYPE_MTX 0x01
219 #define KSYN_WQTYPE_CVAR 0x02
220 #define KSYN_WQTYPE_RWLOCK 0x04
221 #define KSYN_WQTYPE_SEMA 0x08
222 #define KSYN_WQTYPE_MASK 0xff
223
224 #define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
225
226 #define KW_UNLOCK_PREPOST 0x01
227 #define KW_UNLOCK_PREPOST_READLOCK 0x08
228 #define KW_UNLOCK_PREPOST_WRLOCK 0x20
229
230 static void
231 CLEAR_PREPOST_BITS(ksyn_wait_queue_t kwq)
232 {
233 kwq->kw_pre_lockseq = 0;
234 kwq->kw_pre_sseq = PTHRW_RWS_INIT;
235 kwq->kw_pre_rwwc = 0;
236 }
237
238 static void
239 CLEAR_INTR_PREPOST_BITS(ksyn_wait_queue_t kwq)
240 {
241 kwq->kw_pre_intrcount = 0;
242 kwq->kw_pre_intrseq = 0;
243 kwq->kw_pre_intrretbits = 0;
244 kwq->kw_pre_intrtype = 0;
245 }
246
247 static void
248 CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
249 {
250 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
251 if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
252 panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
253 }
254 };
255 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
256 kwq->kw_nextseqword = PTHRW_RWS_INIT;
257 kwq->kw_overlapwatch = 0;
258 };
259 CLEAR_PREPOST_BITS(kwq);
260 kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
261 kwq->kw_lastseqword = PTHRW_RWS_INIT;
262 CLEAR_INTR_PREPOST_BITS(kwq);
263 kwq->kw_lword = 0;
264 kwq->kw_uword = 0;
265 kwq->kw_sword = PTHRW_RWS_INIT;
266 }
267
268 static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset);
269 static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
270 static void ksyn_wqrelease(ksyn_wait_queue_t mkwq, int qfreenow, int wqtype);
271 static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp);
272
273 static int _wait_result_to_errno(wait_result_t result);
274
275 static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t);
276 static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t);
277 static void ksyn_freeallkwe(ksyn_queue_t kq);
278
279 static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t);
280 static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t, uint64_t tid, boolean_t prepost);
281 static void ksyn_mtx_transfer_qos_override(ksyn_wait_queue_t, ksyn_waitq_element_t);
282 static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t);
283
284 static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
285
286 static void ksyn_queue_init(ksyn_queue_t kq);
287 static int ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int firstfit);
288 static void ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe);
289 static void ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all);
290
291 static void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq);
292 static uint32_t find_nextlowseq(ksyn_wait_queue_t kwq);
293 static uint32_t find_nexthighseq(ksyn_wait_queue_t kwq);
294 static int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp);
295
296 static uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto);
297
298 static ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen);
299 static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep);
300 static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
301 static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
302
303 static void psynch_cvcontinue(void *, wait_result_t);
304 static void psynch_mtxcontinue(void *, wait_result_t);
305
306 static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
307 static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
308 static ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq);
309
310 static void
311 UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc)
312 {
313 int sinit = ((rw_wc & PTH_RWS_CV_CBIT) != 0);
314
315 // assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
316
317 if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) {
318 /* the values of L,U and S are cleared out due to L==S in previous transition */
319 kwq->kw_lword = mgen;
320 kwq->kw_uword = ugen;
321 kwq->kw_sword = rw_wc;
322 kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT;
323 } else {
324 if (is_seqhigher(mgen, kwq->kw_lword)) {
325 kwq->kw_lword = mgen;
326 }
327 if (is_seqhigher(ugen, kwq->kw_uword)) {
328 kwq->kw_uword = ugen;
329 }
330 if (sinit && is_seqhigher(rw_wc, kwq->kw_sword)) {
331 kwq->kw_sword = rw_wc;
332 }
333 }
334 if (sinit && is_seqlower(kwq->kw_cvkernelseq, rw_wc)) {
335 kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK);
336 }
337 }
338
339 static void
340 pthread_list_lock(void)
341 {
342 lck_mtx_lock(pthread_list_mlock);
343 }
344
345 static void
346 pthread_list_unlock(void)
347 {
348 lck_mtx_unlock(pthread_list_mlock);
349 }
350
351 static void
352 ksyn_wqlock(ksyn_wait_queue_t kwq)
353 {
354
355 lck_mtx_lock(&kwq->kw_lock);
356 }
357
358 static void
359 ksyn_wqunlock(ksyn_wait_queue_t kwq)
360 {
361 lck_mtx_unlock(&kwq->kw_lock);
362 }
363
364
365 /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
366 static uint32_t
367 _psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, int flags)
368 {
369 kern_return_t ret;
370 uint32_t returnbits = 0;
371 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
372 uint32_t nextgen = (ugen + PTHRW_INC);
373
374 ksyn_wqlock(kwq);
375 kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
376 uint32_t updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT);
377
378 redrive:
379 if (firstfit) {
380 if (kwq->kw_inqueue == 0) {
381 // not set or the new lock sequence is higher
382 if (kwq->kw_pre_rwwc == 0 || is_seqhigher(mgen, kwq->kw_pre_lockseq)) {
383 kwq->kw_pre_lockseq = (mgen & PTHRW_COUNT_MASK);
384 }
385 kwq->kw_pre_rwwc = 1;
386 ksyn_mtx_drop_qos_override(kwq);
387 kwq->kw_owner = 0;
388 // indicate prepost content in kernel
389 returnbits = mgen | PTH_RWL_PBIT;
390 } else {
391 // signal first waiter
392 ret = ksyn_mtxsignal(kwq, NULL, updatebits);
393 if (ret == KERN_NOT_WAITING) {
394 goto redrive;
395 }
396 }
397 } else {
398 int prepost = 0;
399 if (kwq->kw_inqueue == 0) {
400 // No waiters in the queue.
401 prepost = 1;
402 } else {
403 uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum & PTHRW_COUNT_MASK);
404 if (low_writer == nextgen) {
405 /* next seq to be granted found */
406 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
407 ret = ksyn_mtxsignal(kwq, NULL, updatebits | PTH_RWL_MTX_WAIT);
408 if (ret == KERN_NOT_WAITING) {
409 /* interrupt post */
410 kwq->kw_pre_intrcount = 1;
411 kwq->kw_pre_intrseq = nextgen;
412 kwq->kw_pre_intrretbits = updatebits;
413 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
414 }
415
416 } else if (is_seqhigher(low_writer, nextgen)) {
417 prepost = 1;
418 } else {
419 //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
420 ksyn_waitq_element_t kwe;
421 kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen);
422 if (kwe != NULL) {
423 /* next seq to be granted found */
424 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
425 ret = ksyn_mtxsignal(kwq, kwe, updatebits | PTH_RWL_MTX_WAIT);
426 if (ret == KERN_NOT_WAITING) {
427 goto redrive;
428 }
429 } else {
430 prepost = 1;
431 }
432 }
433 }
434 if (prepost) {
435 ksyn_mtx_drop_qos_override(kwq);
436 kwq->kw_owner = 0;
437 if (++kwq->kw_pre_rwwc > 1) {
438 __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
439 } else {
440 kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
441 }
442 }
443 }
444
445 ksyn_wqunlock(kwq);
446 ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
447 return returnbits;
448 }
449
450 static int
451 _ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
452 {
453 int res = (lgenval & PTHRW_RWL_INIT) != 0;
454 if (res) {
455 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
456 /* first to notice the reset of the lock, clear preposts */
457 CLEAR_REINIT_BITS(kwq);
458 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
459 }
460 }
461 return res;
462 }
463
464 static int
465 _ksyn_handle_missed_wakeups(ksyn_wait_queue_t kwq,
466 uint32_t type,
467 uint32_t lockseq,
468 uint32_t *retval)
469 {
470 int res = 0;
471 if (kwq->kw_pre_intrcount != 0 &&
472 kwq->kw_pre_intrtype == type &&
473 is_seqlower_eq(lockseq, kwq->kw_pre_intrseq)) {
474 kwq->kw_pre_intrcount--;
475 *retval = kwq->kw_pre_intrretbits;
476 if (kwq->kw_pre_intrcount == 0) {
477 CLEAR_INTR_PREPOST_BITS(kwq);
478 }
479 res = 1;
480 }
481 return res;
482 }
483
484 static int
485 _ksyn_handle_overlap(ksyn_wait_queue_t kwq,
486 uint32_t lgenval,
487 uint32_t rw_wc,
488 uint32_t *retval)
489 {
490 int res = 0;
491
492 // check for overlap and no pending W bit (indicates writers)
493 if (kwq->kw_overlapwatch != 0 &&
494 (rw_wc & PTHRW_RWS_SAVEMASK) == 0 &&
495 (lgenval & PTH_RWL_WBIT) == 0) {
496 /* overlap is set, so no need to check for valid state for overlap */
497
498 if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
499 /* increase the next expected seq by one */
500 kwq->kw_nextseqword += PTHRW_INC;
501 /* set count by one & bits from the nextseq and add M bit */
502 *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
503 res = 1;
504 }
505 }
506 return res;
507 }
508
509 static int
510 _ksyn_handle_prepost(ksyn_wait_queue_t kwq,
511 uint32_t type,
512 uint32_t lockseq,
513 uint32_t *retval)
514 {
515 int res = 0;
516 if (kwq->kw_pre_rwwc != 0 && is_seqlower_eq(lockseq, kwq->kw_pre_lockseq)) {
517 kwq->kw_pre_rwwc--;
518 if (kwq->kw_pre_rwwc == 0) {
519 uint32_t preseq = kwq->kw_pre_lockseq;
520 uint32_t prerw_wc = kwq->kw_pre_sseq;
521 CLEAR_PREPOST_BITS(kwq);
522 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
523 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
524 }
525
526 int error, block;
527 uint32_t updatebits;
528 error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (type|KW_UNLOCK_PREPOST), &block, lockseq);
529 if (error != 0) {
530 panic("kwq_handle_unlock failed %d\n", error);
531 }
532
533 if (block == 0) {
534 *retval = updatebits;
535 res = 1;
536 }
537 }
538 }
539 return res;
540 }
541
542 /* Helpers for QoS override management. Only applies to mutexes */
543 static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t kwq, uint64_t tid, boolean_t prepost)
544 {
545 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
546 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
547 int waiter_qos = pthread_kern->proc_usynch_get_requested_thread_qos(current_uthread());
548
549 kwq->kw_qos_override = MAX(waiter_qos, kwq->kw_qos_override);
550
551 if (prepost && kwq->kw_inqueue == 0) {
552 // if there are no more waiters in the queue after the new (prepost-receiving) owner, we do not set an
553 // override, because the receiving owner may not re-enter the kernel to signal someone else if it is
554 // the last one to unlock. If other waiters end up entering the kernel, they will boost the owner
555 tid = 0;
556 }
557
558 if (tid != 0) {
559 if ((tid == kwq->kw_owner) && (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED)) {
560 // hint continues to be accurate, and a boost was already applied
561 pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, FALSE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
562 } else {
563 // either hint did not match previous owner, or hint was accurate but mutex was not contended enough for a boost previously
564 boolean_t boostsucceded;
565
566 boostsucceded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), NULL, tid, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
567
568 if (boostsucceded) {
569 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
570 }
571
572 if (wasboosted && (tid != kwq->kw_owner) && (kwq->kw_owner != 0)) {
573 // the hint did not match the previous owner, so drop overrides
574 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
575 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
576 }
577 }
578 } else {
579 // new hint tells us that we don't know the owner, so drop any existing overrides
580 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
581 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
582
583 if (wasboosted && (kwq->kw_owner != 0)) {
584 // the hint did not match the previous owner, so drop overrides
585 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
586 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
587 }
588 }
589 }
590 }
591
592 static void ksyn_mtx_transfer_qos_override(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe)
593 {
594 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
595 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
596
597 if (kwq->kw_inqueue > 1) {
598 boolean_t boostsucceeded;
599
600 // More than one waiter, so resource will still be contended after handing off ownership
601 boostsucceeded = pthread_kern->proc_usynch_thread_qos_add_override_for_resource(current_task(), kwe->kwe_uth, 0, kwq->kw_qos_override, TRUE, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
602
603 if (boostsucceeded) {
604 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
605 }
606 } else {
607 // kw_inqueue == 1 to get to this point, which means there will be no contention after this point
608 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
609 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
610 }
611
612 // Remove the override that was applied to kw_owner. There may have been a race,
613 // in which case it may not match the current thread
614 if (wasboosted) {
615 if (kwq->kw_owner == 0) {
616 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
617 } else if (thread_tid(current_thread()) != kwq->kw_owner) {
618 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
619 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
620 } else {
621 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
622 }
623 }
624 }
625 }
626
627 static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t kwq)
628 {
629 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
630 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
631
632 // assume nobody else in queue if this routine was called
633 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
634 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
635
636 // Remove the override that was applied to kw_owner. There may have been a race,
637 // in which case it may not match the current thread
638 if (wasboosted) {
639 if (kwq->kw_owner == 0) {
640 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
641 } else if (thread_tid(current_thread()) != kwq->kw_owner) {
642 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
643 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), NULL, kwq->kw_owner, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
644 } else {
645 pthread_kern->proc_usynch_thread_qos_remove_override_for_resource(current_task(), current_uthread(), 0, kwq->kw_addr, THREAD_QOS_OVERRIDE_TYPE_PTHREAD_MUTEX);
646 }
647 }
648 }
649 }
650
651 /*
652 * psynch_mutexwait: This system call is used for contended psynch mutexes to block.
653 */
654
655 int
656 _psynch_mutexwait(__unused proc_t p,
657 user_addr_t mutex,
658 uint32_t mgen,
659 uint32_t ugen,
660 uint64_t tid,
661 uint32_t flags,
662 uint32_t *retval)
663 {
664 ksyn_wait_queue_t kwq;
665 int error=0;
666 int ins_flags;
667
668 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
669 uint32_t updatebits = 0;
670
671 uint32_t lockseq = (mgen & PTHRW_COUNT_MASK);
672
673 if (firstfit == 0) {
674 ins_flags = SEQFIT;
675 } else {
676 /* first fit */
677 ins_flags = FIRSTFIT;
678 }
679
680 error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq);
681 if (error != 0) {
682 return(error);
683 }
684
685 ksyn_wqlock(kwq);
686
687 // mutexwait passes in an owner hint at the time userspace contended for the mutex, however, the
688 // owner tid in the userspace data structure may be unset or SWITCHING (-1), or it may correspond
689 // to a stale snapshot after the lock has subsequently been unlocked by another thread.
690 if (tid == 0) {
691 // contender came in before owner could write TID
692 tid = 0;
693 } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && is_seqlower(ugen, kwq->kw_lastunlockseq)) {
694 // owner is stale, someone has come in and unlocked since this contended read the TID, so
695 // assume what is known in the kernel is accurate
696 tid = kwq->kw_owner;
697 } else if (tid == PTHREAD_MTX_TID_SWITCHING) {
698 // userspace didn't know the owner because it was being unlocked, but that unlocker hasn't
699 // reached the kernel yet. So assume what is known in the kernel is accurate
700 tid = kwq->kw_owner;
701 } else {
702 // hint is being passed in for a specific thread, and we have no reason not to trust
703 // it (like the kernel unlock sequence being higher
704 }
705
706
707 if (_ksyn_handle_missed_wakeups(kwq, PTH_RW_TYPE_WRITE, lockseq, retval)) {
708 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
709 kwq->kw_owner = thread_tid(current_thread());
710
711 ksyn_wqunlock(kwq);
712 goto out;
713 }
714
715 if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) {
716 /* got preposted lock */
717 kwq->kw_pre_rwwc--;
718 if (kwq->kw_pre_rwwc == 0) {
719 CLEAR_PREPOST_BITS(kwq);
720 if (kwq->kw_inqueue == 0) {
721 updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
722 } else {
723 updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT);
724 }
725 updatebits &= ~PTH_RWL_MTX_WAIT;
726
727 if (updatebits == 0) {
728 __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
729 }
730
731 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
732 kwq->kw_owner = thread_tid(current_thread());
733
734 ksyn_wqunlock(kwq);
735 *retval = updatebits;
736 goto out;
737 } else {
738 __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
739 kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */
740 ksyn_wqunlock(kwq);
741 error = EINVAL;
742 goto out;
743 }
744 }
745
746 ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE);
747 kwq->kw_owner = tid;
748
749 error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue);
750 // ksyn_wait drops wait queue lock
751 out:
752 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
753 return error;
754 }
755
756 void
757 psynch_mtxcontinue(void *parameter, wait_result_t result)
758 {
759 uthread_t uth = current_uthread();
760 ksyn_wait_queue_t kwq = parameter;
761 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
762
763 int error = _wait_result_to_errno(result);
764 if (error != 0) {
765 ksyn_wqlock(kwq);
766 if (kwe->kwe_kwqqueue) {
767 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
768 }
769 ksyn_wqunlock(kwq);
770 } else {
771 uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
772 pthread_kern->uthread_set_returnval(uth, updatebits);
773
774 if (updatebits == 0)
775 __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
776 }
777 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
778 pthread_kern->unix_syscall_return(error);
779 }
780
781 /*
782 * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
783 */
784 int
785 _psynch_mutexdrop(__unused proc_t p,
786 user_addr_t mutex,
787 uint32_t mgen,
788 uint32_t ugen,
789 uint64_t tid __unused,
790 uint32_t flags,
791 uint32_t *retval)
792 {
793 int res;
794 ksyn_wait_queue_t kwq;
795
796 res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
797 if (res == 0) {
798 uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
799 /* drops the kwq reference */
800 if (retval) {
801 *retval = updateval;
802 }
803 }
804
805 return res;
806 }
807
808 static kern_return_t
809 ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t updateval)
810 {
811 kern_return_t ret;
812
813 if (!kwe) {
814 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_kwelist);
815 if (!kwe) {
816 panic("ksyn_mtxsignal: panic signaling empty queue");
817 }
818 }
819
820 ksyn_mtx_transfer_qos_override(kwq, kwe);
821 kwq->kw_owner = kwe->kwe_tid;
822
823 ret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, kwe, updateval);
824
825 // if waking the new owner failed, remove any overrides
826 if (ret != KERN_SUCCESS) {
827 ksyn_mtx_drop_qos_override(kwq);
828 kwq->kw_owner = 0;
829 }
830
831 return ret;
832 }
833
834
835 static void
836 ksyn_prepost(ksyn_wait_queue_t kwq,
837 ksyn_waitq_element_t kwe,
838 uint32_t state,
839 uint32_t lockseq)
840 {
841 bzero(kwe, sizeof(*kwe));
842 kwe->kwe_state = state;
843 kwe->kwe_lockseq = lockseq;
844 kwe->kwe_count = 1;
845
846 (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITER, kwe, lockseq, SEQFIT);
847 kwq->kw_fakecount++;
848 }
849
850 static void
851 ksyn_cvsignal(ksyn_wait_queue_t ckwq,
852 thread_t th,
853 uint32_t uptoseq,
854 uint32_t signalseq,
855 uint32_t *updatebits,
856 int *broadcast,
857 ksyn_waitq_element_t *nkwep)
858 {
859 ksyn_waitq_element_t kwe = NULL;
860 ksyn_waitq_element_t nkwe = NULL;
861 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
862
863 uptoseq &= PTHRW_COUNT_MASK;
864
865 // Find the specified thread to wake.
866 if (th != THREAD_NULL) {
867 uthread_t uth = pthread_kern->get_bsdthread_info(th);
868 kwe = pthread_kern->uthread_get_uukwe(uth);
869 if (kwe->kwe_kwqqueue != ckwq ||
870 is_seqhigher(kwe->kwe_lockseq, uptoseq)) {
871 // Unless it's no longer waiting on this CV...
872 kwe = NULL;
873 // ...in which case we post a broadcast instead.
874 *broadcast = 1;
875 return;
876 }
877 }
878
879 // If no thread was specified, find any thread to wake (with the right
880 // sequence number).
881 while (th == THREAD_NULL) {
882 if (kwe == NULL) {
883 kwe = ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq);
884 }
885 if (kwe == NULL && nkwe == NULL) {
886 // No eligible entries; need to allocate a new
887 // entry to prepost. Loop to rescan after
888 // reacquiring the lock after allocation in
889 // case anything new shows up.
890 ksyn_wqunlock(ckwq);
891 nkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
892 ksyn_wqlock(ckwq);
893 } else {
894 break;
895 }
896 }
897
898 if (kwe != NULL) {
899 // If we found a thread to wake...
900 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
901 if (is_seqlower(kwe->kwe_lockseq, signalseq)) {
902 /*
903 * A valid thread in our range, but lower than our signal.
904 * Matching it may leave our match with nobody to wake it if/when
905 * it arrives (the signal originally meant for this thread might
906 * not successfully wake it).
907 *
908 * Convert to broadcast - may cause some spurious wakeups
909 * (allowed by spec), but avoids starvation (better choice).
910 */
911 *broadcast = 1;
912 } else {
913 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
914 *updatebits += PTHRW_INC;
915 }
916 } else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
917 // Merge with existing prepost at same uptoseq.
918 kwe->kwe_count += 1;
919 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
920 // Existing broadcasts subsume this signal.
921 } else {
922 panic("unknown kwe state\n");
923 }
924 if (nkwe) {
925 /*
926 * If we allocated a new kwe above but then found a different kwe to
927 * use then we need to deallocate the spare one.
928 */
929 pthread_kern->zfree(kwe_zone, nkwe);
930 nkwe = NULL;
931 }
932 } else if (nkwe != NULL) {
933 // ... otherwise, insert the newly allocated prepost.
934 ksyn_prepost(ckwq, nkwe, KWE_THREAD_PREPOST, uptoseq);
935 nkwe = NULL;
936 } else {
937 panic("failed to allocate kwe\n");
938 }
939
940 *nkwep = nkwe;
941 }
942
943 static int
944 __psynch_cvsignal(user_addr_t cv,
945 uint32_t cgen,
946 uint32_t cugen,
947 uint32_t csgen,
948 uint32_t flags,
949 int broadcast,
950 mach_port_name_t threadport,
951 uint32_t *retval)
952 {
953 int error = 0;
954 thread_t th = THREAD_NULL;
955 ksyn_wait_queue_t kwq;
956
957 uint32_t uptoseq = cgen & PTHRW_COUNT_MASK;
958 uint32_t fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
959
960 // validate sane L, U, and S values
961 if ((threadport == 0 && is_seqhigher(fromseq, uptoseq)) || is_seqhigher(csgen, uptoseq)) {
962 __FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n");
963 return EINVAL;
964 }
965
966 if (threadport != 0) {
967 th = port_name_to_thread((mach_port_name_t)threadport);
968 if (th == THREAD_NULL) {
969 return ESRCH;
970 }
971 }
972
973 error = ksyn_wqfind(cv, cgen, cugen, csgen, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &kwq);
974 if (error == 0) {
975 uint32_t updatebits = 0;
976 ksyn_waitq_element_t nkwe = NULL;
977
978 ksyn_wqlock(kwq);
979
980 // update L, U and S...
981 UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
982
983 if (!broadcast) {
984 // No need to signal if the CV is already balanced.
985 if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
986 ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, &broadcast, &nkwe);
987 }
988 }
989
990 if (broadcast) {
991 ksyn_handle_cvbroad(kwq, uptoseq, &updatebits);
992 }
993
994 kwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
995 // set C or P bits and free if needed
996 ksyn_cvupdate_fixup(kwq, &updatebits);
997 *retval = updatebits;
998
999 ksyn_wqunlock(kwq);
1000
1001 if (nkwe != NULL) {
1002 pthread_kern->zfree(kwe_zone, nkwe);
1003 }
1004
1005 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
1006 }
1007
1008 if (th != NULL) {
1009 thread_deallocate(th);
1010 }
1011
1012 return error;
1013 }
1014
1015 /*
1016 * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
1017 */
1018 int
1019 _psynch_cvbroad(__unused proc_t p,
1020 user_addr_t cv,
1021 uint64_t cvlsgen,
1022 uint64_t cvudgen,
1023 uint32_t flags,
1024 __unused user_addr_t mutex,
1025 __unused uint64_t mugen,
1026 __unused uint64_t tid,
1027 uint32_t *retval)
1028 {
1029 uint32_t diffgen = cvudgen & 0xffffffff;
1030 uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
1031 if (count > pthread_kern->get_task_threadmax()) {
1032 __FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n");
1033 return EBUSY;
1034 }
1035
1036 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1037 uint32_t cgen = cvlsgen & 0xffffffff;
1038 uint32_t cugen = (cvudgen >> 32) & 0xffffffff;
1039
1040 return __psynch_cvsignal(cv, cgen, cugen, csgen, flags, 1, 0, retval);
1041 }
1042
1043 /*
1044 * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
1045 */
1046 int
1047 _psynch_cvsignal(__unused proc_t p,
1048 user_addr_t cv,
1049 uint64_t cvlsgen,
1050 uint32_t cvugen,
1051 int threadport,
1052 __unused user_addr_t mutex,
1053 __unused uint64_t mugen,
1054 __unused uint64_t tid,
1055 uint32_t flags,
1056 uint32_t *retval)
1057 {
1058 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1059 uint32_t cgen = cvlsgen & 0xffffffff;
1060
1061 return __psynch_cvsignal(cv, cgen, cvugen, csgen, flags, 0, threadport, retval);
1062 }
1063
1064 /*
1065 * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
1066 */
1067 int
1068 _psynch_cvwait(__unused proc_t p,
1069 user_addr_t cv,
1070 uint64_t cvlsgen,
1071 uint32_t cvugen,
1072 user_addr_t mutex,
1073 uint64_t mugen,
1074 uint32_t flags,
1075 int64_t sec,
1076 uint32_t nsec,
1077 uint32_t *retval)
1078 {
1079 int error = 0;
1080 uint32_t updatebits = 0;
1081 ksyn_wait_queue_t ckwq = NULL;
1082 ksyn_waitq_element_t kwe, nkwe = NULL;
1083
1084 /* for conformance reasons */
1085 pthread_kern->__pthread_testcancel(0);
1086
1087 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1088 uint32_t cgen = cvlsgen & 0xffffffff;
1089 uint32_t ugen = (mugen >> 32) & 0xffffffff;
1090 uint32_t mgen = mugen & 0xffffffff;
1091
1092 uint32_t lockseq = (cgen & PTHRW_COUNT_MASK);
1093
1094 /*
1095 * In cvwait U word can be out of range as cv could be used only for
1096 * timeouts. However S word needs to be within bounds and validated at
1097 * user level as well.
1098 */
1099 if (is_seqhigher_eq(csgen, lockseq) != 0) {
1100 __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
1101 return EINVAL;
1102 }
1103
1104 error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
1105 if (error != 0) {
1106 return error;
1107 }
1108
1109 if (mutex != 0) {
1110 error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, NULL);
1111 if (error != 0) {
1112 goto out;
1113 }
1114 }
1115
1116 ksyn_wqlock(ckwq);
1117
1118 // update L, U and S...
1119 UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
1120
1121 /* Look for the sequence for prepost (or conflicting thread */
1122 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
1123 kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
1124 if (kwe != NULL) {
1125 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
1126 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) {
1127 /* we can safely consume a reference, so do so */
1128 if (--kwe->kwe_count == 0) {
1129 ksyn_queue_remove_item(ckwq, kq, kwe);
1130 ckwq->kw_fakecount--;
1131 nkwe = kwe;
1132 }
1133 } else {
1134 /*
1135 * consuming a prepost higher than our lock sequence is valid, but
1136 * can leave the higher thread without a match. Convert the entry
1137 * to a broadcast to compensate for this.
1138 */
1139 ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits);
1140 #if __TESTPANICS__
1141 if (updatebits != 0)
1142 panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", updatebits);
1143 #endif /* __TESTPANICS__ */
1144 }
1145 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
1146 // XXX
1147 // Nothing to do.
1148 } else if (kwe->kwe_state == KWE_THREAD_INWAIT) {
1149 __FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n");
1150 error = EBUSY;
1151 } else {
1152 panic("psync_cvwait: unexpected wait queue element type\n");
1153 }
1154
1155 if (error == 0) {
1156 updatebits = PTHRW_INC;
1157 ckwq->kw_sword += PTHRW_INC;
1158
1159 /* set C or P bits and free if needed */
1160 ksyn_cvupdate_fixup(ckwq, &updatebits);
1161 *retval = updatebits;
1162 }
1163 } else {
1164 uint64_t abstime = 0;
1165
1166 if (sec != 0 || (nsec & 0x3fffffff) != 0) {
1167 struct timespec ts;
1168 ts.tv_sec = (__darwin_time_t)sec;
1169 ts.tv_nsec = (nsec & 0x3fffffff);
1170 nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
1171 clock_absolutetime_interval_to_deadline(abstime, &abstime);
1172 }
1173
1174 error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue);
1175 // ksyn_wait drops wait queue lock
1176 }
1177
1178 ksyn_wqunlock(ckwq);
1179
1180 if (nkwe != NULL) {
1181 pthread_kern->zfree(kwe_zone, nkwe);
1182 }
1183 out:
1184 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1185 return error;
1186 }
1187
1188
1189 void
1190 psynch_cvcontinue(void *parameter, wait_result_t result)
1191 {
1192 uthread_t uth = current_uthread();
1193 ksyn_wait_queue_t ckwq = parameter;
1194 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1195
1196 int error = _wait_result_to_errno(result);
1197 if (error != 0) {
1198 ksyn_wqlock(ckwq);
1199 /* just in case it got woken up as we were granting */
1200 pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
1201
1202 if (kwe->kwe_kwqqueue) {
1203 ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
1204 }
1205 if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
1206 /* the condition var granted.
1207 * reset the error so that the thread returns back.
1208 */
1209 error = 0;
1210 /* no need to set any bits just return as cvsig/broad covers this */
1211 } else {
1212 ckwq->kw_sword += PTHRW_INC;
1213
1214 /* set C and P bits, in the local error */
1215 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
1216 error |= ECVCERORR;
1217 if (ckwq->kw_inqueue != 0) {
1218 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 1);
1219 }
1220 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
1221 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
1222 } else {
1223 /* everythig in the queue is a fake entry ? */
1224 if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
1225 error |= ECVPERORR;
1226 }
1227 }
1228 }
1229 ksyn_wqunlock(ckwq);
1230 } else {
1231 int val = 0;
1232 // PTH_RWL_MTX_WAIT is removed
1233 if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
1234 val = PTHRW_INC | PTH_RWS_CV_CBIT;
1235 }
1236 pthread_kern->uthread_set_returnval(uth, val);
1237 }
1238
1239 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1240 pthread_kern->unix_syscall_return(error);
1241 }
1242
1243 /*
1244 * psynch_cvclrprepost: This system call clears pending prepost if present.
1245 */
1246 int
1247 _psynch_cvclrprepost(__unused proc_t p,
1248 user_addr_t cv,
1249 uint32_t cvgen,
1250 uint32_t cvugen,
1251 uint32_t cvsgen,
1252 __unused uint32_t prepocnt,
1253 uint32_t preposeq,
1254 uint32_t flags,
1255 int *retval)
1256 {
1257 int error = 0;
1258 int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
1259 int wqtype = (mutex ? KSYN_WQTYPE_MTX : KSYN_WQTYPE_CVAR) | KSYN_WQTYPE_INDROP;
1260 ksyn_wait_queue_t kwq = NULL;
1261
1262 *retval = 0;
1263
1264 error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, &kwq);
1265 if (error != 0) {
1266 return error;
1267 }
1268
1269 ksyn_wqlock(kwq);
1270
1271 if (mutex) {
1272 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
1273 if (firstfit && kwq->kw_pre_rwwc != 0) {
1274 if (is_seqlower_eq(kwq->kw_pre_lockseq, cvgen)) {
1275 // clear prepost
1276 kwq->kw_pre_rwwc = 0;
1277 kwq->kw_pre_lockseq = 0;
1278 }
1279 }
1280 } else {
1281 ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITER, preposeq, 0);
1282 }
1283
1284 ksyn_wqunlock(kwq);
1285 ksyn_wqrelease(kwq, 1, wqtype);
1286 return error;
1287 }
1288
1289 /* ***************** pthread_rwlock ************************ */
1290
1291 static int
1292 __psynch_rw_lock(int type,
1293 user_addr_t rwlock,
1294 uint32_t lgenval,
1295 uint32_t ugenval,
1296 uint32_t rw_wc,
1297 int flags,
1298 uint32_t *retval)
1299 {
1300 int prepost_type, kqi;
1301
1302 if (type == PTH_RW_TYPE_READ) {
1303 prepost_type = KW_UNLOCK_PREPOST_READLOCK;
1304 kqi = KSYN_QUEUE_READ;
1305 } else {
1306 prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
1307 kqi = KSYN_QUEUE_WRITER;
1308 }
1309
1310 uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
1311
1312 int error;
1313 ksyn_wait_queue_t kwq;
1314 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
1315 if (error == 0) {
1316 ksyn_wqlock(kwq);
1317 _ksyn_check_init(kwq, lgenval);
1318 if (_ksyn_handle_missed_wakeups(kwq, type, lockseq, retval) ||
1319 // handle overlap first as they are not counted against pre_rwwc
1320 (type == PTH_RW_TYPE_READ && _ksyn_handle_overlap(kwq, lgenval, rw_wc, retval)) ||
1321 _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) {
1322 ksyn_wqunlock(kwq);
1323 } else {
1324 error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL);
1325 // ksyn_wait drops wait queue lock
1326 if (error == 0) {
1327 uthread_t uth = current_uthread();
1328 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1329 *retval = kwe->kwe_psynchretval;
1330 }
1331 }
1332 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
1333 }
1334 return error;
1335 }
1336
1337 /*
1338 * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
1339 */
1340 int
1341 _psynch_rw_rdlock(__unused proc_t p,
1342 user_addr_t rwlock,
1343 uint32_t lgenval,
1344 uint32_t ugenval,
1345 uint32_t rw_wc,
1346 int flags,
1347 uint32_t *retval)
1348 {
1349 return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, flags, retval);
1350 }
1351
1352 /*
1353 * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
1354 */
1355 int
1356 _psynch_rw_longrdlock(__unused proc_t p,
1357 __unused user_addr_t rwlock,
1358 __unused uint32_t lgenval,
1359 __unused uint32_t ugenval,
1360 __unused uint32_t rw_wc,
1361 __unused int flags,
1362 __unused uint32_t *retval)
1363 {
1364 return ESRCH;
1365 }
1366
1367
1368 /*
1369 * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
1370 */
1371 int
1372 _psynch_rw_wrlock(__unused proc_t p,
1373 user_addr_t rwlock,
1374 uint32_t lgenval,
1375 uint32_t ugenval,
1376 uint32_t rw_wc,
1377 int flags,
1378 uint32_t *retval)
1379 {
1380 return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, rw_wc, flags, retval);
1381 }
1382
1383 /*
1384 * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
1385 */
1386 int
1387 _psynch_rw_yieldwrlock(__unused proc_t p,
1388 __unused user_addr_t rwlock,
1389 __unused uint32_t lgenval,
1390 __unused uint32_t ugenval,
1391 __unused uint32_t rw_wc,
1392 __unused int flags,
1393 __unused uint32_t *retval)
1394 {
1395 return ESRCH;
1396 }
1397
1398 /*
1399 * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate
1400 * reader/writer variety lock.
1401 */
1402 int
1403 _psynch_rw_unlock(__unused proc_t p,
1404 user_addr_t rwlock,
1405 uint32_t lgenval,
1406 uint32_t ugenval,
1407 uint32_t rw_wc,
1408 int flags,
1409 uint32_t *retval)
1410 {
1411 int error = 0;
1412 ksyn_wait_queue_t kwq;
1413 uint32_t updatebits = 0;
1414 int diff;
1415 uint32_t count = 0;
1416 uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
1417
1418 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
1419 if (error != 0) {
1420 return(error);
1421 }
1422
1423 ksyn_wqlock(kwq);
1424 int isinit = _ksyn_check_init(kwq, lgenval);
1425
1426 /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
1427 if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
1428 error = 0;
1429 goto out;
1430 }
1431
1432 /* If L-U != num of waiters, then it needs to be preposted or spr */
1433 diff = find_diff(lgenval, ugenval);
1434
1435 if (find_seq_till(kwq, curgen, diff, &count) == 0) {
1436 if ((count == 0) || (count < (uint32_t)diff))
1437 goto prepost;
1438 }
1439
1440 /* no prepost and all threads are in place, reset the bit */
1441 if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
1442 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
1443 }
1444
1445 /* can handle unlock now */
1446
1447 CLEAR_PREPOST_BITS(kwq);
1448
1449 error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
1450 #if __TESTPANICS__
1451 if (error != 0)
1452 panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error);
1453 #endif /* __TESTPANICS__ */
1454 out:
1455 if (error == 0) {
1456 /* update bits?? */
1457 *retval = updatebits;
1458 }
1459
1460
1461 ksyn_wqunlock(kwq);
1462 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
1463
1464 return(error);
1465
1466 prepost:
1467 /* update if the new seq is higher than prev prepost, or first set */
1468 if (is_rws_setseq(kwq->kw_pre_sseq) ||
1469 is_seqhigher_eq(rw_wc, kwq->kw_pre_sseq)) {
1470 kwq->kw_pre_rwwc = (diff - count);
1471 kwq->kw_pre_lockseq = curgen;
1472 kwq->kw_pre_sseq = rw_wc;
1473 updatebits = lgenval; /* let this not do unlock handling */
1474 }
1475 error = 0;
1476 goto out;
1477 }
1478
1479
1480 /* ************************************************************************** */
1481 void
1482 pth_global_hashinit(void)
1483 {
1484 pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash);
1485 }
1486
1487 void
1488 _pth_proc_hashinit(proc_t p)
1489 {
1490 void *ptr = hashinit(PTH_HASHSIZE, M_PCB, &pthhash);
1491 if (ptr == NULL) {
1492 panic("pth_proc_hashinit: hash init returned 0\n");
1493 }
1494
1495 pthread_kern->proc_set_pthhash(p, ptr);
1496 }
1497
1498
1499 static int
1500 ksyn_wq_hash_lookup(user_addr_t uaddr,
1501 proc_t p,
1502 int flags,
1503 ksyn_wait_queue_t *out_kwq,
1504 struct pthhashhead **out_hashptr,
1505 uint64_t *out_object,
1506 uint64_t *out_offset)
1507 {
1508 int res = 0;
1509 ksyn_wait_queue_t kwq;
1510 uint64_t object = 0, offset = 0;
1511 struct pthhashhead *hashptr;
1512 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1513 hashptr = pth_glob_hashtbl;
1514 res = ksyn_findobj(uaddr, &object, &offset);
1515 if (res == 0) {
1516 LIST_FOREACH(kwq, &hashptr[object & pthhash], kw_hash) {
1517 if (kwq->kw_object == object && kwq->kw_offset == offset) {
1518 break;
1519 }
1520 }
1521 } else {
1522 kwq = NULL;
1523 }
1524 } else {
1525 hashptr = pthread_kern->proc_get_pthhash(p);
1526 LIST_FOREACH(kwq, &hashptr[uaddr & pthhash], kw_hash) {
1527 if (kwq->kw_addr == uaddr) {
1528 break;
1529 }
1530 }
1531 }
1532 *out_kwq = kwq;
1533 *out_object = object;
1534 *out_offset = offset;
1535 *out_hashptr = hashptr;
1536 return res;
1537 }
1538
1539 void
1540 _pth_proc_hashdelete(proc_t p)
1541 {
1542 struct pthhashhead * hashptr;
1543 ksyn_wait_queue_t kwq;
1544 unsigned long hashsize = pthhash + 1;
1545 unsigned long i;
1546
1547 hashptr = pthread_kern->proc_get_pthhash(p);
1548 pthread_kern->proc_set_pthhash(p, NULL);
1549 if (hashptr == NULL) {
1550 return;
1551 }
1552
1553 pthread_list_lock();
1554 for(i= 0; i < hashsize; i++) {
1555 while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) {
1556 if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) {
1557 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1558 LIST_REMOVE(kwq, kw_hash);
1559 }
1560 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1561 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1562 LIST_REMOVE(kwq, kw_list);
1563 }
1564 pthread_list_unlock();
1565 /* release fake entries if present for cvars */
1566 if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
1567 ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
1568 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
1569 pthread_kern->zfree(kwq_zone, kwq);
1570 pthread_list_lock();
1571 }
1572 }
1573 pthread_list_unlock();
1574 FREE(hashptr, M_PROC);
1575 }
1576
1577 /* no lock held for this as the waitqueue is getting freed */
1578 void
1579 ksyn_freeallkwe(ksyn_queue_t kq)
1580 {
1581 ksyn_waitq_element_t kwe;
1582 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
1583 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
1584 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1585 pthread_kern->zfree(kwe_zone, kwe);
1586 }
1587 }
1588 }
1589
1590 /* find kernel waitqueue, if not present create one. Grants a reference */
1591 int
1592 ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int flags, int wqtype, ksyn_wait_queue_t *kwqp)
1593 {
1594 int res = 0;
1595 ksyn_wait_queue_t kwq = NULL;
1596 ksyn_wait_queue_t nkwq = NULL;
1597 struct pthhashhead *hashptr;
1598 proc_t p = current_proc();
1599
1600 uint64_t object = 0, offset = 0;
1601 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1602 res = ksyn_findobj(uaddr, &object, &offset);
1603 hashptr = pth_glob_hashtbl;
1604 } else {
1605 hashptr = pthread_kern->proc_get_pthhash(p);
1606 }
1607
1608 while (res == 0) {
1609 pthread_list_lock();
1610 res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, &object, &offset);
1611 if (res != 0) {
1612 pthread_list_unlock();
1613 break;
1614 }
1615 if (kwq == NULL && nkwq == NULL) {
1616 // Drop the lock to allocate a new kwq and retry.
1617 pthread_list_unlock();
1618
1619 nkwq = (ksyn_wait_queue_t)pthread_kern->zalloc(kwq_zone);
1620 bzero(nkwq, sizeof(struct ksyn_wait_queue));
1621 int i;
1622 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
1623 ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
1624 }
1625 lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
1626 continue;
1627 } else if (kwq == NULL && nkwq != NULL) {
1628 // Still not found, add the new kwq to the hash.
1629 kwq = nkwq;
1630 nkwq = NULL; // Don't free.
1631 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1632 kwq->kw_pflags |= KSYN_WQ_SHARED;
1633 LIST_INSERT_HEAD(&hashptr[object & pthhash], kwq, kw_hash);
1634 } else {
1635 LIST_INSERT_HEAD(&hashptr[uaddr & pthhash], kwq, kw_hash);
1636 }
1637 kwq->kw_pflags |= KSYN_WQ_INHASH;
1638 } else if (kwq != NULL) {
1639 // Found an existing kwq, use it.
1640 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1641 LIST_REMOVE(kwq, kw_list);
1642 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1643 }
1644 if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
1645 if (kwq->kw_inqueue == 0 && kwq->kw_pre_rwwc == 0 && kwq->kw_pre_intrcount == 0) {
1646 if (kwq->kw_iocount == 0) {
1647 kwq->kw_type = 0; // mark for reinitialization
1648 } else if (kwq->kw_iocount == 1 && kwq->kw_dropcount == kwq->kw_iocount) {
1649 /* if all users are unlockers then wait for it to finish */
1650 kwq->kw_pflags |= KSYN_WQ_WAITING;
1651 // Drop the lock and wait for the kwq to be free.
1652 (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
1653 continue;
1654 } else {
1655 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
1656 res = EINVAL;
1657 }
1658 } else {
1659 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
1660 res = EINVAL;
1661 }
1662 }
1663 }
1664 if (res == 0) {
1665 if (kwq->kw_type == 0) {
1666 kwq->kw_addr = uaddr;
1667 kwq->kw_object = object;
1668 kwq->kw_offset = offset;
1669 kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
1670 CLEAR_REINIT_BITS(kwq);
1671 kwq->kw_lword = mgen;
1672 kwq->kw_uword = ugen;
1673 kwq->kw_sword = sgen;
1674 kwq->kw_owner = 0;
1675 kwq->kw_kflags = 0;
1676 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
1677 }
1678 kwq->kw_iocount++;
1679 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1680 kwq->kw_dropcount++;
1681 }
1682 }
1683 pthread_list_unlock();
1684 break;
1685 }
1686 if (kwqp != NULL) {
1687 *kwqp = kwq;
1688 }
1689 if (nkwq) {
1690 lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
1691 pthread_kern->zfree(kwq_zone, nkwq);
1692 }
1693 return res;
1694 }
1695
1696 /* Reference from find is dropped here. Starts the free process if needed */
1697 void
1698 ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
1699 {
1700 uint64_t deadline;
1701 ksyn_wait_queue_t free_elem = NULL;
1702
1703 pthread_list_lock();
1704 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1705 kwq->kw_dropcount--;
1706 }
1707 if (--kwq->kw_iocount == 0) {
1708 if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
1709 /* some one is waiting for the waitqueue, wake them up */
1710 kwq->kw_pflags &= ~KSYN_WQ_WAITING;
1711 wakeup(&kwq->kw_pflags);
1712 }
1713
1714 if (kwq->kw_pre_rwwc == 0 && kwq->kw_inqueue == 0 && kwq->kw_pre_intrcount == 0) {
1715 if (qfreenow == 0) {
1716 microuptime(&kwq->kw_ts);
1717 LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
1718 kwq->kw_pflags |= KSYN_WQ_FLIST;
1719 if (psynch_cleanupset == 0) {
1720 struct timeval t;
1721 microuptime(&t);
1722 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1723 deadline = tvtoabstime(&t);
1724 thread_call_enter_delayed(psynch_thcall, deadline);
1725 psynch_cleanupset = 1;
1726 }
1727 } else {
1728 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1729 LIST_REMOVE(kwq, kw_hash);
1730 free_elem = kwq;
1731 }
1732 }
1733 }
1734 pthread_list_unlock();
1735 if (free_elem != NULL) {
1736 lck_mtx_destroy(&free_elem->kw_lock, pthread_lck_grp);
1737 pthread_kern->zfree(kwq_zone, free_elem);
1738 }
1739 }
1740
1741 /* responsible to free the waitqueues */
1742 void
1743 psynch_wq_cleanup(__unused void *param, __unused void * param1)
1744 {
1745 ksyn_wait_queue_t kwq;
1746 struct timeval t;
1747 int reschedule = 0;
1748 uint64_t deadline = 0;
1749 LIST_HEAD(, ksyn_wait_queue) freelist;
1750 LIST_INIT(&freelist);
1751
1752 pthread_list_lock();
1753
1754 microuptime(&t);
1755
1756 LIST_FOREACH(kwq, &pth_free_list, kw_list) {
1757 if (kwq->kw_iocount != 0 || kwq->kw_pre_rwwc != 0 || kwq->kw_inqueue != 0 || kwq->kw_pre_intrcount != 0) {
1758 // still in use
1759 continue;
1760 }
1761 __darwin_time_t diff = t.tv_sec - kwq->kw_ts.tv_sec;
1762 if (diff < 0)
1763 diff *= -1;
1764 if (diff >= KSYN_CLEANUP_DEADLINE) {
1765 kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
1766 LIST_REMOVE(kwq, kw_hash);
1767 LIST_REMOVE(kwq, kw_list);
1768 LIST_INSERT_HEAD(&freelist, kwq, kw_list);
1769 } else {
1770 reschedule = 1;
1771 }
1772
1773 }
1774 if (reschedule != 0) {
1775 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1776 deadline = tvtoabstime(&t);
1777 thread_call_enter_delayed(psynch_thcall, deadline);
1778 psynch_cleanupset = 1;
1779 } else {
1780 psynch_cleanupset = 0;
1781 }
1782 pthread_list_unlock();
1783
1784 while ((kwq = LIST_FIRST(&freelist)) != NULL) {
1785 LIST_REMOVE(kwq, kw_list);
1786 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
1787 pthread_kern->zfree(kwq_zone, kwq);
1788 }
1789 }
1790
1791 static int
1792 _wait_result_to_errno(wait_result_t result)
1793 {
1794 int res = 0;
1795 switch (result) {
1796 case THREAD_TIMED_OUT:
1797 res = ETIMEDOUT;
1798 break;
1799 case THREAD_INTERRUPTED:
1800 res = EINTR;
1801 break;
1802 }
1803 return res;
1804 }
1805
1806 int
1807 ksyn_wait(ksyn_wait_queue_t kwq,
1808 int kqi,
1809 uint32_t lockseq,
1810 int fit,
1811 uint64_t abstime,
1812 thread_continue_t continuation)
1813 {
1814 int res;
1815
1816 thread_t th = current_thread();
1817 uthread_t uth = pthread_kern->get_bsdthread_info(th);
1818 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1819 bzero(kwe, sizeof(*kwe));
1820 kwe->kwe_count = 1;
1821 kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
1822 kwe->kwe_state = KWE_THREAD_INWAIT;
1823 kwe->kwe_uth = uth;
1824 kwe->kwe_tid = thread_tid(th);
1825
1826 res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
1827 if (res != 0) {
1828 //panic("psynch_rw_wrlock: failed to enqueue\n"); // XXX
1829 ksyn_wqunlock(kwq);
1830 return res;
1831 }
1832
1833 assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0);
1834 ksyn_wqunlock(kwq);
1835
1836 kern_return_t ret;
1837 if (continuation == THREAD_CONTINUE_NULL) {
1838 ret = thread_block(NULL);
1839 } else {
1840 ret = thread_block_parameter(continuation, kwq);
1841
1842 // If thread_block_parameter returns (interrupted) call the
1843 // continuation manually to clean up.
1844 continuation(kwq, ret);
1845
1846 // NOT REACHED
1847 panic("ksyn_wait continuation returned");
1848 }
1849
1850 res = _wait_result_to_errno(ret);
1851 if (res != 0) {
1852 ksyn_wqlock(kwq);
1853 if (kwe->kwe_kwqqueue) {
1854 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1855 }
1856 ksyn_wqunlock(kwq);
1857 }
1858 return res;
1859 }
1860
1861 kern_return_t
1862 ksyn_signal(ksyn_wait_queue_t kwq,
1863 int kqi,
1864 ksyn_waitq_element_t kwe,
1865 uint32_t updateval)
1866 {
1867 kern_return_t ret;
1868
1869 // If no wait element was specified, wake the first.
1870 if (!kwe) {
1871 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[kqi].ksynq_kwelist);
1872 if (!kwe) {
1873 panic("ksyn_signal: panic signaling empty queue");
1874 }
1875 }
1876
1877 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1878 panic("ksyn_signal: panic signaling non-waiting element");
1879 }
1880
1881 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1882 kwe->kwe_psynchretval = updateval;
1883
1884 ret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval);
1885 if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
1886 panic("ksyn_signal: panic waking up thread %x\n", ret);
1887 }
1888 return ret;
1889 }
1890
1891 int
1892 ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
1893 {
1894 kern_return_t ret;
1895 vm_page_info_basic_data_t info;
1896 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
1897 ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
1898 if (ret != KERN_SUCCESS) {
1899 return EINVAL;
1900 }
1901
1902 if (objectp != NULL) {
1903 *objectp = (uint64_t)info.object_id;
1904 }
1905 if (offsetp != NULL) {
1906 *offsetp = (uint64_t)info.offset;
1907 }
1908
1909 return(0);
1910 }
1911
1912
1913 /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
1914 int
1915 kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *typep, uint32_t lowest[])
1916 {
1917 uint32_t kw_fr, kw_fwr, low;
1918 int type = 0, lowtype, typenum[2] = { 0 };
1919 uint32_t numbers[2] = { 0 };
1920 int count = 0, i;
1921
1922
1923 if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
1924 type |= PTH_RWSHFT_TYPE_READ;
1925 /* read entries are present */
1926 if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
1927 kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
1928 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0))
1929 kw_fr = premgen;
1930 } else
1931 kw_fr = premgen;
1932
1933 lowest[KSYN_QUEUE_READ] = kw_fr;
1934 numbers[count]= kw_fr;
1935 typenum[count] = PTH_RW_TYPE_READ;
1936 count++;
1937 } else
1938 lowest[KSYN_QUEUE_READ] = 0;
1939
1940 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
1941 type |= PTH_RWSHFT_TYPE_WRITE;
1942 /* read entries are present */
1943 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
1944 kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
1945 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0))
1946 kw_fwr = premgen;
1947 } else
1948 kw_fwr = premgen;
1949
1950 lowest[KSYN_QUEUE_WRITER] = kw_fwr;
1951 numbers[count]= kw_fwr;
1952 typenum[count] = PTH_RW_TYPE_WRITE;
1953 count++;
1954 } else
1955 lowest[KSYN_QUEUE_WRITER] = 0;
1956
1957 #if __TESTPANICS__
1958 if (count == 0)
1959 panic("nothing in the queue???\n");
1960 #endif /* __TESTPANICS__ */
1961
1962 low = numbers[0];
1963 lowtype = typenum[0];
1964 if (count > 1) {
1965 for (i = 1; i< count; i++) {
1966 if (is_seqlower(numbers[i] , low) != 0) {
1967 low = numbers[i];
1968 lowtype = typenum[i];
1969 }
1970 }
1971 }
1972 type |= lowtype;
1973
1974 if (typep != 0)
1975 *typep = type;
1976 return(0);
1977 }
1978
1979 /* wakeup readers to upto the writer limits */
1980 int
1981 ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp)
1982 {
1983 ksyn_queue_t kq;
1984 int failedwakeup = 0;
1985 int numwoken = 0;
1986 kern_return_t kret = KERN_SUCCESS;
1987 uint32_t lbits = 0;
1988
1989 lbits = updatebits;
1990
1991 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
1992 while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
1993 kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
1994 if (kret == KERN_NOT_WAITING) {
1995 failedwakeup++;
1996 }
1997 numwoken++;
1998 }
1999
2000 if (wokenp != NULL)
2001 *wokenp = numwoken;
2002 return(failedwakeup);
2003 }
2004
2005
2006 /* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */
2007 int
2008 kwq_handle_unlock(ksyn_wait_queue_t kwq,
2009 __unused uint32_t mgen,
2010 uint32_t rw_wc,
2011 uint32_t *updatep,
2012 int flags,
2013 int *blockp,
2014 uint32_t premgen)
2015 {
2016 uint32_t low_writer, limitrdnum;
2017 int rwtype, error=0;
2018 int allreaders, failed;
2019 uint32_t updatebits=0, numneeded = 0;;
2020 int prepost = flags & KW_UNLOCK_PREPOST;
2021 thread_t preth = THREAD_NULL;
2022 ksyn_waitq_element_t kwe;
2023 uthread_t uth;
2024 thread_t th;
2025 int woken = 0;
2026 int block = 1;
2027 uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
2028 kern_return_t kret = KERN_SUCCESS;
2029 ksyn_queue_t kq;
2030 int curthreturns = 0;
2031
2032 if (prepost != 0) {
2033 preth = current_thread();
2034 }
2035
2036 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2037 kwq->kw_lastseqword = rw_wc;
2038 kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
2039 kwq->kw_overlapwatch = 0;
2040
2041 error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
2042 #if __TESTPANICS__
2043 if (error != 0)
2044 panic("rwunlock: cannot fails to slot next round of threads");
2045 #endif /* __TESTPANICS__ */
2046
2047 low_writer = lowest[KSYN_QUEUE_WRITER];
2048
2049 allreaders = 0;
2050 updatebits = 0;
2051
2052 switch (rwtype & PTH_RW_TYPE_MASK) {
2053 case PTH_RW_TYPE_READ: {
2054 // XXX
2055 /* what about the preflight which is LREAD or READ ?? */
2056 if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
2057 if (rwtype & PTH_RWSHFT_TYPE_WRITE) {
2058 updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
2059 }
2060 }
2061 limitrdnum = 0;
2062 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2063 limitrdnum = low_writer;
2064 } else {
2065 allreaders = 1;
2066 }
2067
2068 numneeded = 0;
2069
2070 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2071 limitrdnum = low_writer;
2072 numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
2073 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
2074 curthreturns = 1;
2075 numneeded += 1;
2076 }
2077 } else {
2078 // no writers at all
2079 // no other waiters only readers
2080 kwq->kw_overlapwatch = 1;
2081 numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
2082 if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
2083 curthreturns = 1;
2084 numneeded += 1;
2085 }
2086 }
2087
2088 updatebits += (numneeded << PTHRW_COUNT_SHIFT);
2089
2090 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2091
2092 if (curthreturns != 0) {
2093 block = 0;
2094 uth = current_uthread();
2095 kwe = pthread_kern->uthread_get_uukwe(uth);
2096 kwe->kwe_psynchretval = updatebits;
2097 }
2098
2099
2100 failed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, updatebits, &woken);
2101 if (failed != 0) {
2102 kwq->kw_pre_intrcount = failed; /* actually a count */
2103 kwq->kw_pre_intrseq = limitrdnum;
2104 kwq->kw_pre_intrretbits = updatebits;
2105 kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
2106 }
2107
2108 error = 0;
2109
2110 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0))
2111 panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
2112 }
2113 break;
2114
2115 case PTH_RW_TYPE_WRITE: {
2116
2117 /* only one thread is goin to be granted */
2118 updatebits |= (PTHRW_INC);
2119 updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
2120
2121 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
2122 block = 0;
2123 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
2124 updatebits |= PTH_RWL_WBIT;
2125 }
2126 th = preth;
2127 uth = pthread_kern->get_bsdthread_info(th);
2128 kwe = pthread_kern->uthread_get_uukwe(uth);
2129 kwe->kwe_psynchretval = updatebits;
2130 } else {
2131 /* we are not granting writelock to the preposting thread */
2132 /* if there are writers present or the preposting write thread then W bit is to be set */
2133 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 1 ||
2134 (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
2135 updatebits |= PTH_RWL_WBIT;
2136 }
2137 /* setup next in the queue */
2138 kret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, NULL, updatebits);
2139 if (kret == KERN_NOT_WAITING) {
2140 kwq->kw_pre_intrcount = 1; /* actually a count */
2141 kwq->kw_pre_intrseq = low_writer;
2142 kwq->kw_pre_intrretbits = updatebits;
2143 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
2144 }
2145 error = 0;
2146 }
2147 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2148 if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT))
2149 panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
2150 }
2151 break;
2152
2153 default:
2154 panic("rwunlock: invalid type for lock grants");
2155
2156 };
2157
2158 if (updatep != NULL)
2159 *updatep = updatebits;
2160 if (blockp != NULL)
2161 *blockp = block;
2162 return(error);
2163 }
2164
2165 /************* Indiv queue support routines ************************/
2166 void
2167 ksyn_queue_init(ksyn_queue_t kq)
2168 {
2169 TAILQ_INIT(&kq->ksynq_kwelist);
2170 kq->ksynq_count = 0;
2171 kq->ksynq_firstnum = 0;
2172 kq->ksynq_lastnum = 0;
2173 }
2174
2175 int
2176 ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int fit)
2177 {
2178 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2179 uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
2180 int res = 0;
2181
2182 if (kwe->kwe_kwqqueue != NULL) {
2183 panic("adding enqueued item to another queue");
2184 }
2185
2186 if (kq->ksynq_count == 0) {
2187 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2188 kq->ksynq_firstnum = lockseq;
2189 kq->ksynq_lastnum = lockseq;
2190 } else if (fit == FIRSTFIT) {
2191 /* TBD: if retry bit is set for mutex, add it to the head */
2192 /* firstfit, arriving order */
2193 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2194 if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2195 kq->ksynq_firstnum = lockseq;
2196 }
2197 if (is_seqhigher(lockseq, kq->ksynq_lastnum)) {
2198 kq->ksynq_lastnum = lockseq;
2199 }
2200 } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
2201 /* During prepost when a thread is getting cancelled, we could have two with same seq */
2202 res = EBUSY;
2203 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
2204 ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
2205 if (tmp != NULL && tmp->kwe_uth != NULL && pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
2206 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2207 res = 0;
2208 }
2209 }
2210 } else if (is_seqlower(kq->ksynq_lastnum, lockseq)) { // XXX is_seqhigher
2211 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2212 kq->ksynq_lastnum = lockseq;
2213 } else if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2214 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2215 kq->ksynq_firstnum = lockseq;
2216 } else {
2217 ksyn_waitq_element_t q_kwe, r_kwe;
2218
2219 res = ESRCH;
2220 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2221 if (is_seqhigher(q_kwe->kwe_lockseq, lockseq)) {
2222 TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list);
2223 res = 0;
2224 break;
2225 }
2226 }
2227 }
2228
2229 if (res == 0) {
2230 kwe->kwe_kwqqueue = kwq;
2231 kq->ksynq_count++;
2232 kwq->kw_inqueue++;
2233 update_low_high(kwq, lockseq);
2234 }
2235 return res;
2236 }
2237
2238 void
2239 ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe)
2240 {
2241 if (kq->ksynq_count == 0) {
2242 panic("removing item from empty queue");
2243 }
2244
2245 if (kwe->kwe_kwqqueue != kwq) {
2246 panic("removing item from wrong queue");
2247 }
2248
2249 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
2250 kwe->kwe_list.tqe_next = NULL;
2251 kwe->kwe_list.tqe_prev = NULL;
2252 kwe->kwe_kwqqueue = NULL;
2253
2254 if (--kq->ksynq_count > 0) {
2255 ksyn_waitq_element_t tmp;
2256 tmp = TAILQ_FIRST(&kq->ksynq_kwelist);
2257 kq->ksynq_firstnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2258 tmp = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head);
2259 kq->ksynq_lastnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2260 } else {
2261 kq->ksynq_firstnum = 0;
2262 kq->ksynq_lastnum = 0;
2263 }
2264
2265 if (--kwq->kw_inqueue > 0) {
2266 uint32_t curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
2267 if (kwq->kw_lowseq == curseq) {
2268 kwq->kw_lowseq = find_nextlowseq(kwq);
2269 }
2270 if (kwq->kw_highseq == curseq) {
2271 kwq->kw_highseq = find_nexthighseq(kwq);
2272 }
2273 } else {
2274 kwq->kw_lowseq = 0;
2275 kwq->kw_highseq = 0;
2276 }
2277 }
2278
2279 ksyn_waitq_element_t
2280 ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq)
2281 {
2282 ksyn_waitq_element_t kwe;
2283
2284 // XXX: should stop searching when higher sequence number is seen
2285 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2286 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) {
2287 return kwe;
2288 }
2289 }
2290 return NULL;
2291 }
2292
2293 /* find the thread at the target sequence (or a broadcast/prepost at or above) */
2294 ksyn_waitq_element_t
2295 ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
2296 {
2297 ksyn_waitq_element_t result = NULL;
2298 ksyn_waitq_element_t kwe;
2299 uint32_t lgen = (cgen & PTHRW_COUNT_MASK);
2300
2301 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2302 if (is_seqhigher_eq(kwe->kwe_lockseq, cgen)) {
2303 result = kwe;
2304
2305 // KWE_THREAD_INWAIT must be strictly equal
2306 if (kwe->kwe_state == KWE_THREAD_INWAIT && (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
2307 result = NULL;
2308 }
2309 break;
2310 }
2311 }
2312 return result;
2313 }
2314
2315 /* look for a thread at lockseq, a */
2316 ksyn_waitq_element_t
2317 ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq)
2318 {
2319 ksyn_waitq_element_t result = NULL;
2320 ksyn_waitq_element_t q_kwe, r_kwe;
2321
2322 // XXX
2323 /* case where wrap in the tail of the queue exists */
2324 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2325 if (q_kwe->kwe_state == KWE_THREAD_PREPOST) {
2326 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2327 return result;
2328 }
2329 }
2330 if (q_kwe->kwe_state == KWE_THREAD_PREPOST || q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
2331 /* match any prepost at our same uptoseq or any broadcast above */
2332 if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
2333 continue;
2334 }
2335 return q_kwe;
2336 } else if (q_kwe->kwe_state == KWE_THREAD_INWAIT) {
2337 /*
2338 * Match any (non-cancelled) thread at or below our upto sequence -
2339 * but prefer an exact match to our signal sequence (if present) to
2340 * keep exact matches happening.
2341 */
2342 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2343 return result;
2344 }
2345 if (q_kwe->kwe_kwqqueue == kwq) {
2346 if (!pthread_kern->uthread_is_cancelled(q_kwe->kwe_uth)) {
2347 /* if equal or higher than our signal sequence, return this one */
2348 if (is_seqhigher_eq(q_kwe->kwe_lockseq, signalseq)) {
2349 return q_kwe;
2350 }
2351
2352 /* otherwise, just remember this eligible thread and move on */
2353 if (result == NULL) {
2354 result = q_kwe;
2355 }
2356 }
2357 }
2358 } else {
2359 panic("ksyn_queue_find_signalseq(): unknown wait queue element type (%d)\n", q_kwe->kwe_state);
2360 }
2361 }
2362 return result;
2363 }
2364
2365 void
2366 ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
2367 {
2368 ksyn_waitq_element_t kwe;
2369 uint32_t tseq = upto & PTHRW_COUNT_MASK;
2370 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2371
2372 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
2373 if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
2374 break;
2375 }
2376 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2377 /*
2378 * This scenario is typically noticed when the cvar is
2379 * reinited and the new waiters are waiting. We can
2380 * return them as spurious wait so the cvar state gets
2381 * reset correctly.
2382 */
2383
2384 /* skip canceled ones */
2385 /* wake the rest */
2386 /* set M bit to indicate to waking CV to retun Inc val */
2387 (void)ksyn_signal(kwq, kqi, kwe, PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
2388 } else {
2389 ksyn_queue_remove_item(kwq, kq, kwe);
2390 pthread_kern->zfree(kwe_zone, kwe);
2391 kwq->kw_fakecount--;
2392 }
2393 }
2394 }
2395
2396 /*************************************************************************/
2397
2398 void
2399 update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
2400 {
2401 if (kwq->kw_inqueue == 1) {
2402 kwq->kw_lowseq = lockseq;
2403 kwq->kw_highseq = lockseq;
2404 } else {
2405 if (is_seqlower(lockseq, kwq->kw_lowseq)) {
2406 kwq->kw_lowseq = lockseq;
2407 }
2408 if (is_seqhigher(lockseq, kwq->kw_highseq)) {
2409 kwq->kw_highseq = lockseq;
2410 }
2411 }
2412 }
2413
2414 uint32_t
2415 find_nextlowseq(ksyn_wait_queue_t kwq)
2416 {
2417 uint32_t lowest = 0;
2418 int first = 1;
2419 int i;
2420
2421 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2422 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2423 uint32_t current = kwq->kw_ksynqueues[i].ksynq_firstnum;
2424 if (first || is_seqlower(current, lowest)) {
2425 lowest = current;
2426 first = 0;
2427 }
2428 }
2429 }
2430
2431 return lowest;
2432 }
2433
2434 uint32_t
2435 find_nexthighseq(ksyn_wait_queue_t kwq)
2436 {
2437 uint32_t highest = 0;
2438 int first = 1;
2439 int i;
2440
2441 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2442 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2443 uint32_t current = kwq->kw_ksynqueues[i].ksynq_lastnum;
2444 if (first || is_seqhigher(current, highest)) {
2445 highest = current;
2446 first = 0;
2447 }
2448 }
2449 }
2450
2451 return highest;
2452 }
2453
2454 int
2455 find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp)
2456 {
2457 int i;
2458 uint32_t count = 0;
2459
2460 for (i = 0; i< KSYN_QUEUE_MAX; i++) {
2461 count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto);
2462 if (count >= nwaiters) {
2463 break;
2464 }
2465 }
2466
2467 if (countp != NULL) {
2468 *countp = count;
2469 }
2470
2471 if (count == 0) {
2472 return 0;
2473 } else if (count >= nwaiters) {
2474 return 1;
2475 } else {
2476 return 0;
2477 }
2478 }
2479
2480
2481 uint32_t
2482 ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
2483 {
2484 uint32_t i = 0;
2485 ksyn_waitq_element_t kwe, newkwe;
2486
2487 if (kq->ksynq_count == 0 || is_seqhigher(kq->ksynq_firstnum, upto)) {
2488 return 0;
2489 }
2490 if (upto == kq->ksynq_firstnum) {
2491 return 1;
2492 }
2493 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2494 uint32_t curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK);
2495 if (is_seqhigher(curval, upto)) {
2496 break;
2497 }
2498 ++i;
2499 if (upto == curval) {
2500 break;
2501 }
2502 }
2503 return i;
2504 }
2505
2506 /* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */
2507 void
2508 ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
2509 {
2510 ksyn_waitq_element_t kwe, newkwe;
2511 uint32_t updatebits = 0;
2512 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
2513
2514 struct ksyn_queue kfreeq;
2515 ksyn_queue_init(&kfreeq);
2516
2517 retry:
2518 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2519 if (is_seqhigher(kwe->kwe_lockseq, upto)) {
2520 // outside our range
2521 break;
2522 }
2523
2524 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2525 // Wake only non-canceled threads waiting on this CV.
2526 if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
2527 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
2528 updatebits += PTHRW_INC;
2529 }
2530 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
2531 kwe->kwe_state == KWE_THREAD_PREPOST) {
2532 ksyn_queue_remove_item(ckwq, kq, kwe);
2533 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2534 ckwq->kw_fakecount--;
2535 } else {
2536 panic("unknown kwe state\n");
2537 }
2538 }
2539
2540 /* Need to enter a broadcast in the queue (if not already at L == S) */
2541
2542 if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
2543 newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
2544 if (newkwe == NULL) {
2545 ksyn_wqunlock(ckwq);
2546 newkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
2547 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2548 ksyn_wqlock(ckwq);
2549 goto retry;
2550 } else {
2551 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2552 ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
2553 }
2554 }
2555
2556 // free up any remaining things stumbled across above
2557 while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
2558 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2559 pthread_kern->zfree(kwe_zone, kwe);
2560 }
2561
2562 if (updatep != NULL) {
2563 *updatep = updatebits;
2564 }
2565 }
2566
2567 void
2568 ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
2569 {
2570 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
2571 if (ckwq->kw_inqueue != 0) {
2572 /* FREE THE QUEUE */
2573 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 0);
2574 #if __TESTPANICS__
2575 if (ckwq->kw_inqueue != 0)
2576 panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
2577 #endif /* __TESTPANICS__ */
2578 }
2579 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
2580 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
2581 *updatebits |= PTH_RWS_CV_CBIT;
2582 } else if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
2583 // only fake entries are present in the queue
2584 *updatebits |= PTH_RWS_CV_PBIT;
2585 }
2586 }
2587
2588 void
2589 psynch_zoneinit(void)
2590 {
2591 kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
2592 kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
2593 }