]> git.saurik.com Git - apple/libpthread.git/blame - kern/kern_synch.c
libpthread-454.100.8.tar.gz
[apple/libpthread.git] / kern / kern_synch.c
CommitLineData
f1a1da6c
A
1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29/*
30 * pthread_support.c
31 */
32
33#include <sys/param.h>
34#include <sys/queue.h>
35#include <sys/resourcevar.h>
36//#include <sys/proc_internal.h>
37#include <sys/kauth.h>
38#include <sys/systm.h>
39#include <sys/timeb.h>
40#include <sys/times.h>
41#include <sys/time.h>
42#include <sys/acct.h>
43#include <sys/kernel.h>
44#include <sys/wait.h>
45#include <sys/signalvar.h>
46#include <sys/syslog.h>
47#include <sys/stat.h>
48#include <sys/lock.h>
49#include <sys/kdebug.h>
50//#include <sys/sysproto.h>
51//#include <sys/pthread_internal.h>
52#include <sys/vm.h>
53#include <sys/user.h>
54
55#include <mach/mach_types.h>
56#include <mach/vm_prot.h>
57#include <mach/semaphore.h>
58#include <mach/sync_policy.h>
59#include <mach/task.h>
60#include <kern/kern_types.h>
61#include <kern/task.h>
62#include <kern/clock.h>
63#include <mach/kern_return.h>
64#include <kern/thread.h>
65#include <kern/sched_prim.h>
66#include <kern/thread_call.h>
67#include <kern/kalloc.h>
68#include <kern/zalloc.h>
69#include <kern/sched_prim.h>
70#include <kern/processor.h>
010efe49 71#include <kern/block_hint.h>
214d78a2 72#include <kern/turnstile.h>
f1a1da6c
A
73//#include <kern/mach_param.h>
74#include <mach/mach_vm.h>
75#include <mach/mach_param.h>
76#include <mach/thread_policy.h>
77#include <mach/message.h>
78#include <mach/port.h>
79//#include <vm/vm_protos.h>
80#include <vm/vm_map.h>
81#include <mach/vm_region.h>
82
c1f56ec9
A
83#include "kern/kern_internal.h"
84#include "kern/synch_internal.h"
85#include "kern/kern_trace.h"
f1a1da6c
A
86
87typedef struct uthread *uthread_t;
88
89//#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
90#define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
214d78a2 91#define __FAILEDUSERTEST2__(s, x...) do { printf("PSYNCH: pid[%d]: " s "\n", proc_pid(current_proc()), x); } while (0)
f1a1da6c
A
92
93lck_mtx_t *pthread_list_mlock;
94
95#define PTH_HASHSIZE 100
96
97static LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl;
98static unsigned long pthhash;
99
100static LIST_HEAD(, ksyn_wait_queue) pth_free_list;
101
102static zone_t kwq_zone; /* zone for allocation of ksyn_queue */
103static zone_t kwe_zone; /* zone for allocation of ksyn_waitq_element */
104
105#define SEQFIT 0
106#define FIRSTFIT 1
107
108struct ksyn_queue {
109 TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist;
110 uint32_t ksynq_count; /* number of entries in queue */
111 uint32_t ksynq_firstnum; /* lowest seq in queue */
112 uint32_t ksynq_lastnum; /* highest seq in queue */
113};
114typedef struct ksyn_queue *ksyn_queue_t;
115
214d78a2 116typedef enum {
f1a1da6c 117 KSYN_QUEUE_READ = 0,
214d78a2 118 KSYN_QUEUE_WRITE,
f1a1da6c 119 KSYN_QUEUE_MAX,
214d78a2
A
120} kwq_queue_type_t;
121
122typedef enum {
123 KWQ_INTR_NONE = 0,
124 KWQ_INTR_READ = 0x1,
125 KWQ_INTR_WRITE = 0x2,
126} kwq_intr_type_t;
f1a1da6c
A
127
128struct ksyn_wait_queue {
129 LIST_ENTRY(ksyn_wait_queue) kw_hash;
130 LIST_ENTRY(ksyn_wait_queue) kw_list;
131 user_addr_t kw_addr;
214d78a2 132 thread_t kw_owner; /* current owner or THREAD_NULL, has a +1 */
f1a1da6c
A
133 uint64_t kw_object; /* object backing in shared mode */
134 uint64_t kw_offset; /* offset inside the object in shared mode */
135 int kw_pflags; /* flags under listlock protection */
136 struct timeval kw_ts; /* timeval need for upkeep before free */
137 int kw_iocount; /* inuse reference */
138 int kw_dropcount; /* current users unlocking... */
139
140 int kw_type; /* queue type like mutex, cvar, etc */
141 uint32_t kw_inqueue; /* num of waiters held */
142 uint32_t kw_fakecount; /* number of error/prepost fakes */
143 uint32_t kw_highseq; /* highest seq in the queue */
144 uint32_t kw_lowseq; /* lowest seq in the queue */
145 uint32_t kw_lword; /* L value from userland */
146 uint32_t kw_uword; /* U world value from userland */
147 uint32_t kw_sword; /* S word value from userland */
148 uint32_t kw_lastunlockseq; /* the last seq that unlocked */
149 /* for CV to be used as the seq kernel has seen so far */
150#define kw_cvkernelseq kw_lastunlockseq
151 uint32_t kw_lastseqword; /* the last seq that unlocked */
152 /* for mutex and cvar we need to track I bit values */
153 uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */
214d78a2
A
154 struct {
155 uint32_t count; /* prepost count */
156 uint32_t lseq; /* prepost target seq */
157 uint32_t sseq; /* prepost target sword, in cvar used for mutexowned */
158 } kw_prepost;
159 struct {
160 kwq_intr_type_t type; /* type of failed wakueps */
161 uint32_t count; /* prepost of missed wakeup due to intrs */
162 uint32_t seq; /* prepost of missed wakeup limit seq */
163 uint32_t returnbits; /* return bits value for missed wakeup threads */
164 } kw_intr;
f1a1da6c
A
165
166 int kw_kflags;
167 int kw_qos_override; /* QoS of max waiter during contention period */
214d78a2 168 struct turnstile *kw_turnstile;
f1a1da6c 169 struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */
214d78a2 170 lck_spin_t kw_lock; /* spinlock protecting this structure */
f1a1da6c
A
171};
172typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
173
174#define TID_ZERO (uint64_t)0
175
176/* bits needed in handling the rwlock unlock */
177#define PTH_RW_TYPE_READ 0x01
178#define PTH_RW_TYPE_WRITE 0x04
179#define PTH_RW_TYPE_MASK 0xff
180#define PTH_RW_TYPE_SHIFT 8
181
182#define PTH_RWSHFT_TYPE_READ 0x0100
183#define PTH_RWSHFT_TYPE_WRITE 0x0400
184#define PTH_RWSHFT_TYPE_MASK 0xff00
185
186/*
187 * Mutex pshared attributes
188 */
189#define PTHREAD_PROCESS_SHARED _PTHREAD_MTX_OPT_PSHARED
190#define PTHREAD_PROCESS_PRIVATE 0x20
191#define PTHREAD_PSHARED_FLAGS_MASK 0x30
192
193/*
194 * Mutex policy attributes
195 */
214d78a2
A
196#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE 0x040 /* 1 */
197#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT 0x080 /* 2 */
198#define _PTHREAD_MTX_OPT_POLICY_MASK 0x1c0
f1a1da6c
A
199
200/* pflags */
201#define KSYN_WQ_INHASH 2
202#define KSYN_WQ_SHARED 4
203#define KSYN_WQ_WAITING 8 /* threads waiting for this wq to be available */
204#define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */
205
206/* kflags */
214d78a2
A
207#define KSYN_KWF_INITCLEARED 0x1 /* the init status found and preposts cleared */
208#define KSYN_KWF_ZEROEDOUT 0x2 /* the lword, etc are inited to 0 */
209#define KSYN_KWF_QOS_APPLIED 0x4 /* QoS override applied to owner */
210#define KSYN_KWF_OVERLAP_GUARD 0x8 /* overlap guard */
f1a1da6c
A
211
212#define KSYN_CLEANUP_DEADLINE 10
213static int psynch_cleanupset;
214thread_call_t psynch_thcall;
215
216#define KSYN_WQTYPE_INWAIT 0x1000
217#define KSYN_WQTYPE_INDROP 0x2000
218#define KSYN_WQTYPE_MTX 0x01
219#define KSYN_WQTYPE_CVAR 0x02
220#define KSYN_WQTYPE_RWLOCK 0x04
221#define KSYN_WQTYPE_SEMA 0x08
222#define KSYN_WQTYPE_MASK 0xff
223
224#define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
225
214d78a2
A
226static inline int
227_kwq_type(ksyn_wait_queue_t kwq)
f1a1da6c 228{
214d78a2 229 return (kwq->kw_type & KSYN_WQTYPE_MASK);
f1a1da6c
A
230}
231
214d78a2
A
232static inline bool
233_kwq_use_turnstile(ksyn_wait_queue_t kwq)
f1a1da6c 234{
214d78a2
A
235 // <rdar://problem/15926625> If we had writer-owner information from the
236 // rwlock then we could use the turnstile to push on it. For now, only
237 // plain mutexes use it.
238 return (_kwq_type(kwq) == KSYN_WQTYPE_MTX);
f1a1da6c
A
239}
240
214d78a2
A
241#define KW_UNLOCK_PREPOST 0x01
242#define KW_UNLOCK_PREPOST_READLOCK 0x08
243#define KW_UNLOCK_PREPOST_WRLOCK 0x20
f1a1da6c 244
c6e5f90c 245static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t object, uint64_t offset);
f1a1da6c
A
246static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
247static void ksyn_wqrelease(ksyn_wait_queue_t mkwq, int qfreenow, int wqtype);
248static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp);
249
250static int _wait_result_to_errno(wait_result_t result);
251
214d78a2
A
252static int ksyn_wait(ksyn_wait_queue_t, kwq_queue_type_t, uint32_t, int, uint64_t, uint16_t, thread_continue_t, block_hint_t);
253static kern_return_t ksyn_signal(ksyn_wait_queue_t, kwq_queue_type_t, ksyn_waitq_element_t, uint32_t);
f1a1da6c
A
254static void ksyn_freeallkwe(ksyn_queue_t kq);
255
214d78a2 256static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t, thread_t *);
f1a1da6c
A
257
258static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
259
260static void ksyn_queue_init(ksyn_queue_t kq);
261static int ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int firstfit);
262static void ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe);
263static void ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all);
264
265static void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq);
266static uint32_t find_nextlowseq(ksyn_wait_queue_t kwq);
267static uint32_t find_nexthighseq(ksyn_wait_queue_t kwq);
268static int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp);
269
270static uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto);
271
272static ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen);
273static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep);
274static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
275static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
276
214d78a2
A
277static void __dead2 psynch_cvcontinue(void *, wait_result_t);
278static void __dead2 psynch_mtxcontinue(void *, wait_result_t);
279static void __dead2 psynch_rw_rdcontinue(void *, wait_result_t);
280static void __dead2 psynch_rw_wrcontinue(void *, wait_result_t);
f1a1da6c
A
281
282static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
283static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
284static ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq);
285
286static void
287UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc)
288{
289 int sinit = ((rw_wc & PTH_RWS_CV_CBIT) != 0);
290
291 // assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
292
293 if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) {
294 /* the values of L,U and S are cleared out due to L==S in previous transition */
295 kwq->kw_lword = mgen;
296 kwq->kw_uword = ugen;
297 kwq->kw_sword = rw_wc;
298 kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT;
299 } else {
300 if (is_seqhigher(mgen, kwq->kw_lword)) {
301 kwq->kw_lword = mgen;
302 }
303 if (is_seqhigher(ugen, kwq->kw_uword)) {
304 kwq->kw_uword = ugen;
305 }
306 if (sinit && is_seqhigher(rw_wc, kwq->kw_sword)) {
307 kwq->kw_sword = rw_wc;
308 }
309 }
310 if (sinit && is_seqlower(kwq->kw_cvkernelseq, rw_wc)) {
311 kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK);
312 }
313}
314
214d78a2
A
315static inline void
316_kwq_clear_preposted_wakeup(ksyn_wait_queue_t kwq)
317{
318 kwq->kw_prepost.lseq = 0;
319 kwq->kw_prepost.sseq = PTHRW_RWS_INIT;
320 kwq->kw_prepost.count = 0;
321}
322
323static inline void
324_kwq_mark_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t count,
325 uint32_t lseq, uint32_t sseq)
326{
327 kwq->kw_prepost.count = count;
328 kwq->kw_prepost.lseq = lseq;
329 kwq->kw_prepost.sseq = sseq;
330}
331
332static inline void
333_kwq_clear_interrupted_wakeup(ksyn_wait_queue_t kwq)
334{
335 kwq->kw_intr.type = KWQ_INTR_NONE;
336 kwq->kw_intr.count = 0;
337 kwq->kw_intr.seq = 0;
338 kwq->kw_intr.returnbits = 0;
339}
340
341static inline void
342_kwq_mark_interruped_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
343 uint32_t count, uint32_t lseq, uint32_t returnbits)
344{
345 kwq->kw_intr.count = count;
346 kwq->kw_intr.seq = lseq;
347 kwq->kw_intr.returnbits = returnbits;
348 kwq->kw_intr.type = type;
349}
350
351static void
352_kwq_destroy(ksyn_wait_queue_t kwq)
353{
354 if (kwq->kw_owner) {
355 thread_deallocate(kwq->kw_owner);
356 }
357 lck_spin_destroy(&kwq->kw_lock, pthread_lck_grp);
358 zfree(kwq_zone, kwq);
359}
360
361#define KWQ_SET_OWNER_TRANSFER_REF 0x1
362
363static inline thread_t
364_kwq_set_owner(ksyn_wait_queue_t kwq, thread_t new_owner, int flags)
365{
366 thread_t old_owner = kwq->kw_owner;
367 if (old_owner == new_owner) {
368 if (flags & KWQ_SET_OWNER_TRANSFER_REF) return new_owner;
369 return THREAD_NULL;
370 }
371 if ((flags & KWQ_SET_OWNER_TRANSFER_REF) == 0) {
372 thread_reference(new_owner);
373 }
374 kwq->kw_owner = new_owner;
375 return old_owner;
376}
377
378static inline thread_t
379_kwq_clear_owner(ksyn_wait_queue_t kwq)
380{
381 return _kwq_set_owner(kwq, THREAD_NULL, KWQ_SET_OWNER_TRANSFER_REF);
382}
383
384static inline void
385_kwq_cleanup_old_owner(thread_t *thread)
386{
387 if (*thread) {
388 thread_deallocate(*thread);
389 *thread = THREAD_NULL;
390 }
391}
392
393static void
394CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
395{
396 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
397 if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
398 panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
399 }
400 };
401 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
402 kwq->kw_nextseqword = PTHRW_RWS_INIT;
403 kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
404 };
405 _kwq_clear_preposted_wakeup(kwq);
406 kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
407 kwq->kw_lastseqword = PTHRW_RWS_INIT;
408 _kwq_clear_interrupted_wakeup(kwq);
409 kwq->kw_lword = 0;
410 kwq->kw_uword = 0;
411 kwq->kw_sword = PTHRW_RWS_INIT;
412}
413
414static bool
415_kwq_handle_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t type,
416 uint32_t lseq, uint32_t *retval)
417{
418 if (kwq->kw_prepost.count == 0 ||
419 !is_seqlower_eq(lseq, kwq->kw_prepost.lseq)) {
420 return false;
421 }
422
423 kwq->kw_prepost.count--;
424 if (kwq->kw_prepost.count > 0) {
425 return false;
426 }
427
428 int error, should_block = 0;
429 uint32_t updatebits = 0;
430 uint32_t pp_lseq = kwq->kw_prepost.lseq;
431 uint32_t pp_sseq = kwq->kw_prepost.sseq;
432 _kwq_clear_preposted_wakeup(kwq);
433
434 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
435
436 error = kwq_handle_unlock(kwq, pp_lseq, pp_sseq, &updatebits,
437 (type | KW_UNLOCK_PREPOST), &should_block, lseq);
438 if (error) {
439 panic("_kwq_handle_preposted_wakeup: kwq_handle_unlock failed %d",
440 error);
441 }
442
443 if (should_block) {
444 return false;
445 }
446 *retval = updatebits;
447 return true;
448}
449
450static bool
451_kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t type, uint32_t lgenval,
452 uint32_t rw_wc, uint32_t *retval)
453{
454 int res = 0;
455
456 // overlaps only occur on read lockers
457 if (type != PTH_RW_TYPE_READ) {
458 return false;
459 }
460
461 // check for overlap and no pending W bit (indicates writers)
462 if ((kwq->kw_kflags & KSYN_KWF_OVERLAP_GUARD) &&
463 !is_rws_savemask_set(rw_wc) && !is_rwl_wbit_set(lgenval)) {
464 /* overlap is set, so no need to check for valid state for overlap */
465
466 if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
467 /* increase the next expected seq by one */
468 kwq->kw_nextseqword += PTHRW_INC;
469 /* set count by one & bits from the nextseq and add M bit */
470 *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
471 res = 1;
472 }
473 }
474 return res;
475}
476
477static inline bool
478_kwq_is_used(ksyn_wait_queue_t kwq)
479{
480 return (kwq->kw_inqueue != 0 || kwq->kw_prepost.count != 0 ||
481 kwq->kw_intr.count != 0);
482}
483
484/*
485 * consumes a pending interrupted waiter, returns true if the current
486 * thread should return back to userspace because it was previously
487 * interrupted.
488 */
489static inline bool
490_kwq_handle_interrupted_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
491 uint32_t lseq, uint32_t *retval)
492{
493 if (kwq->kw_intr.count != 0 && kwq->kw_intr.type == type &&
494 (!kwq->kw_intr.seq || is_seqlower_eq(lseq, kwq->kw_intr.seq))) {
495 kwq->kw_intr.count--;
496 *retval = kwq->kw_intr.returnbits;
497 if (kwq->kw_intr.returnbits == 0) {
498 _kwq_clear_interrupted_wakeup(kwq);
499 }
500 return true;
501 }
502 return false;
503}
504
f1a1da6c
A
505static void
506pthread_list_lock(void)
507{
c6e5f90c 508 lck_mtx_lock_spin(pthread_list_mlock);
f1a1da6c
A
509}
510
511static void
512pthread_list_unlock(void)
513{
514 lck_mtx_unlock(pthread_list_mlock);
515}
516
517static void
518ksyn_wqlock(ksyn_wait_queue_t kwq)
519{
214d78a2 520 lck_spin_lock(&kwq->kw_lock);
f1a1da6c
A
521}
522
523static void
524ksyn_wqunlock(ksyn_wait_queue_t kwq)
525{
214d78a2 526 lck_spin_unlock(&kwq->kw_lock);
f1a1da6c
A
527}
528
f1a1da6c
A
529/* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
530static uint32_t
214d78a2
A
531_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen,
532 int flags)
f1a1da6c
A
533{
534 kern_return_t ret;
535 uint32_t returnbits = 0;
214d78a2
A
536 uint32_t updatebits = 0;
537 int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) ==
538 _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
f1a1da6c 539 uint32_t nextgen = (ugen + PTHRW_INC);
214d78a2 540 thread_t old_owner = THREAD_NULL;
f1a1da6c
A
541
542 ksyn_wqlock(kwq);
543 kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
f1a1da6c
A
544
545redrive:
214d78a2
A
546 updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
547 (PTH_RWL_EBIT | PTH_RWL_KBIT);
548
f1a1da6c
A
549 if (firstfit) {
550 if (kwq->kw_inqueue == 0) {
214d78a2
A
551 uint32_t count = kwq->kw_prepost.count + 1;
552 // Increment the number of preposters we have waiting
553 _kwq_mark_preposted_wakeup(kwq, count, mgen & PTHRW_COUNT_MASK, 0);
554 // We don't know the current owner as we've determined this mutex
555 // drop should have a preposted locker inbound into the kernel but
556 // we have no way of knowing who it is. When it arrives, the lock
557 // path will update the turnstile owner and return it to userspace.
558 old_owner = _kwq_clear_owner(kwq);
559 pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
560 &kwq->kw_turnstile);
561 PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
562 kwq->kw_prepost.lseq, count, 0);
f1a1da6c
A
563 } else {
564 // signal first waiter
214d78a2 565 ret = ksyn_mtxsignal(kwq, NULL, updatebits, &old_owner);
f1a1da6c 566 if (ret == KERN_NOT_WAITING) {
214d78a2
A
567 // <rdar://problem/39093536> ksyn_mtxsignal attempts to signal
568 // the thread but it sets up the turnstile inheritor first.
569 // That means we can't redrive the mutex in a loop without
570 // dropping the wq lock and cleaning up the turnstile state.
571 ksyn_wqunlock(kwq);
572 pthread_kern->psynch_wait_cleanup();
573 _kwq_cleanup_old_owner(&old_owner);
574 ksyn_wqlock(kwq);
f1a1da6c
A
575 goto redrive;
576 }
577 }
578 } else {
214d78a2 579 bool prepost = false;
f1a1da6c
A
580 if (kwq->kw_inqueue == 0) {
581 // No waiters in the queue.
214d78a2 582 prepost = true;
f1a1da6c 583 } else {
214d78a2 584 uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum & PTHRW_COUNT_MASK);
f1a1da6c
A
585 if (low_writer == nextgen) {
586 /* next seq to be granted found */
587 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
214d78a2
A
588 ret = ksyn_mtxsignal(kwq, NULL,
589 updatebits | PTH_RWL_MTX_WAIT, &old_owner);
f1a1da6c
A
590 if (ret == KERN_NOT_WAITING) {
591 /* interrupt post */
214d78a2
A
592 _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
593 nextgen, updatebits);
f1a1da6c 594 }
f1a1da6c 595 } else if (is_seqhigher(low_writer, nextgen)) {
214d78a2 596 prepost = true;
f1a1da6c
A
597 } else {
598 //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
599 ksyn_waitq_element_t kwe;
214d78a2
A
600 kwe = ksyn_queue_find_seq(kwq,
601 &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], nextgen);
f1a1da6c
A
602 if (kwe != NULL) {
603 /* next seq to be granted found */
604 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
214d78a2
A
605 ret = ksyn_mtxsignal(kwq, kwe,
606 updatebits | PTH_RWL_MTX_WAIT, &old_owner);
f1a1da6c
A
607 if (ret == KERN_NOT_WAITING) {
608 goto redrive;
609 }
610 } else {
214d78a2 611 prepost = true;
f1a1da6c
A
612 }
613 }
614 }
615 if (prepost) {
214d78a2 616 if (kwq->kw_prepost.count != 0) {
f1a1da6c
A
617 __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
618 } else {
214d78a2
A
619 _kwq_mark_preposted_wakeup(kwq, 1, nextgen & PTHRW_COUNT_MASK,
620 0);
f1a1da6c 621 }
214d78a2
A
622 old_owner = _kwq_clear_owner(kwq);
623 pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
624 &kwq->kw_turnstile);
f1a1da6c
A
625 }
626 }
214d78a2 627
f1a1da6c 628 ksyn_wqunlock(kwq);
214d78a2
A
629 pthread_kern->psynch_wait_cleanup();
630 _kwq_cleanup_old_owner(&old_owner);
f1a1da6c
A
631 ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
632 return returnbits;
633}
634
635static int
636_ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
637{
638 int res = (lgenval & PTHRW_RWL_INIT) != 0;
639 if (res) {
640 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
641 /* first to notice the reset of the lock, clear preposts */
642 CLEAR_REINIT_BITS(kwq);
643 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
644 }
645 }
646 return res;
647}
648
214d78a2
A
649/*
650 * psynch_mutexwait: This system call is used for contended psynch mutexes to
651 * block.
652 */
653int
654_psynch_mutexwait(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
655 uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t *retval)
f1a1da6c 656{
214d78a2
A
657 ksyn_wait_queue_t kwq;
658 int error = 0;
659 int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
660 == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
661 int ins_flags = SEQFIT;
662 uint32_t lseq = (mgen & PTHRW_COUNT_MASK);
663 uint32_t updatebits = 0;
664 thread_t tid_th = THREAD_NULL, old_owner = THREAD_NULL;
f1a1da6c 665
214d78a2
A
666 if (firstfit) {
667 /* first fit */
668 ins_flags = FIRSTFIT;
f1a1da6c 669 }
f1a1da6c 670
214d78a2
A
671 error = ksyn_wqfind(mutex, mgen, ugen, 0, flags,
672 (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX), &kwq);
673 if (error != 0) {
674 return error;
675 }
f1a1da6c 676
214d78a2
A
677again:
678 ksyn_wqlock(kwq);
f1a1da6c 679
214d78a2
A
680 if (_kwq_handle_interrupted_wakeup(kwq, KWQ_INTR_WRITE, lseq, retval)) {
681 old_owner = _kwq_set_owner(kwq, current_thread(), 0);
682 pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
683 &kwq->kw_turnstile);
684 ksyn_wqunlock(kwq);
214d78a2 685 goto out;
f1a1da6c 686 }
f1a1da6c 687
214d78a2
A
688 if (kwq->kw_prepost.count && (firstfit || (lseq == kwq->kw_prepost.lseq))) {
689 /* got preposted lock */
690 kwq->kw_prepost.count--;
691
692 if (!firstfit) {
693 if (kwq->kw_prepost.count > 0) {
694 __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
695 kwq->kw_prepost.lseq += PTHRW_INC; /* look for next one */
696 ksyn_wqunlock(kwq);
697 error = EINVAL;
698 goto out;
f1a1da6c 699 }
214d78a2 700 _kwq_clear_preposted_wakeup(kwq);
f1a1da6c 701 }
f1a1da6c 702
214d78a2
A
703 if (kwq->kw_inqueue == 0) {
704 updatebits = lseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
f1a1da6c 705 } else {
214d78a2
A
706 updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
707 (PTH_RWL_KBIT | PTH_RWL_EBIT);
f1a1da6c 708 }
214d78a2 709 updatebits &= ~PTH_RWL_MTX_WAIT;
f1a1da6c 710
214d78a2
A
711 if (updatebits == 0) {
712 __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
f1a1da6c 713 }
f1a1da6c 714
214d78a2
A
715 PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
716 kwq->kw_prepost.lseq, kwq->kw_prepost.count, 1);
f1a1da6c 717
214d78a2
A
718 old_owner = _kwq_set_owner(kwq, current_thread(), 0);
719 pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
720 &kwq->kw_turnstile);
11a10453 721
214d78a2 722 ksyn_wqunlock(kwq);
214d78a2
A
723 *retval = updatebits;
724 goto out;
f1a1da6c 725 }
f1a1da6c 726
214d78a2
A
727 // mutexwait passes in an owner hint at the time userspace contended for
728 // the mutex, however, the owner tid in the userspace data structure may be
729 // unset or SWITCHING (-1), or it may correspond to a stale snapshot after
730 // the lock has subsequently been unlocked by another thread.
731 if (tid == thread_tid(kwq->kw_owner)) {
732 // userspace and kernel agree
733 } else if (tid == 0) {
f1a1da6c 734 // contender came in before owner could write TID
214d78a2
A
735 // let's assume that what the kernel knows is accurate
736 // for all we know this waiter came in late in the kernel
737 } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT &&
738 is_seqlower(ugen, kwq->kw_lastunlockseq)) {
739 // owner is stale, someone has come in and unlocked since this
740 // contended read the TID, so assume what is known in the kernel is
741 // accurate
f1a1da6c 742 } else if (tid == PTHREAD_MTX_TID_SWITCHING) {
214d78a2
A
743 // userspace didn't know the owner because it was being unlocked, but
744 // that unlocker hasn't reached the kernel yet. So assume what is known
745 // in the kernel is accurate
f1a1da6c 746 } else {
214d78a2
A
747 // hint is being passed in for a specific thread, and we have no reason
748 // not to trust it (like the kernel unlock sequence being higher)
749 //
750 // So resolve the hint to a thread_t if we haven't done so yet
751 // and redrive as we dropped the lock
752 if (tid_th == THREAD_NULL) {
f1a1da6c 753 ksyn_wqunlock(kwq);
214d78a2
A
754 tid_th = pthread_kern->task_findtid(current_task(), tid);
755 if (tid_th == THREAD_NULL) tid = 0;
756 goto again;
f1a1da6c 757 }
214d78a2 758 tid_th = _kwq_set_owner(kwq, tid_th, KWQ_SET_OWNER_TRANSFER_REF);
f1a1da6c 759 }
f1a1da6c 760
214d78a2
A
761 if (tid_th) {
762 // We are on our way to block, and can't drop the spinlock anymore
763 pthread_kern->thread_deallocate_safe(tid_th);
764 tid_th = THREAD_NULL;
765 }
11a10453 766 assert(old_owner == THREAD_NULL);
214d78a2
A
767 error = ksyn_wait(kwq, KSYN_QUEUE_WRITE, mgen, ins_flags, 0, 0,
768 psynch_mtxcontinue, kThreadWaitPThreadMutex);
f1a1da6c
A
769 // ksyn_wait drops wait queue lock
770out:
214d78a2
A
771 pthread_kern->psynch_wait_cleanup();
772 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
773 if (tid_th) {
774 thread_deallocate(tid_th);
775 }
11a10453
A
776 if (old_owner) {
777 thread_deallocate(old_owner);
778 }
f1a1da6c
A
779 return error;
780}
781
214d78a2 782void __dead2
f1a1da6c
A
783psynch_mtxcontinue(void *parameter, wait_result_t result)
784{
785 uthread_t uth = current_uthread();
786 ksyn_wait_queue_t kwq = parameter;
787 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
214d78a2
A
788
789 ksyn_wqlock(kwq);
790
f1a1da6c
A
791 int error = _wait_result_to_errno(result);
792 if (error != 0) {
f1a1da6c 793 if (kwe->kwe_kwqqueue) {
214d78a2 794 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
f1a1da6c 795 }
f1a1da6c
A
796 } else {
797 uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
798 pthread_kern->uthread_set_returnval(uth, updatebits);
214d78a2
A
799
800 if (updatebits == 0) {
f1a1da6c 801 __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
214d78a2 802 }
f1a1da6c 803 }
214d78a2
A
804
805 pthread_kern->psynch_wait_complete(kwq, &kwq->kw_turnstile);
806
807 ksyn_wqunlock(kwq);
808 pthread_kern->psynch_wait_cleanup();
809 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
f1a1da6c 810 pthread_kern->unix_syscall_return(error);
214d78a2
A
811 __builtin_unreachable();
812}
813
814static void __dead2
815_psynch_rw_continue(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
816 wait_result_t result)
817{
818 uthread_t uth = current_uthread();
819 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
820
821 ksyn_wqlock(kwq);
822
823 int error = _wait_result_to_errno(result);
824 if (error != 0) {
825 if (kwe->kwe_kwqqueue) {
826 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
827 }
828 } else {
829 pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
830 }
831
832 ksyn_wqunlock(kwq);
833 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
834
835 pthread_kern->unix_syscall_return(error);
836 __builtin_unreachable();
837}
838
839void __dead2
840psynch_rw_rdcontinue(void *parameter, wait_result_t result)
841{
842 _psynch_rw_continue(parameter, KSYN_QUEUE_READ, result);
843}
844
845void __dead2
846psynch_rw_wrcontinue(void *parameter, wait_result_t result)
847{
848 _psynch_rw_continue(parameter, KSYN_QUEUE_WRITE, result);
f1a1da6c
A
849}
850
851/*
852 * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
853 */
854int
214d78a2
A
855_psynch_mutexdrop(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
856 uint32_t ugen, uint64_t tid __unused, uint32_t flags, uint32_t *retval)
f1a1da6c
A
857{
858 int res;
859 ksyn_wait_queue_t kwq;
214d78a2 860
f1a1da6c
A
861 res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
862 if (res == 0) {
863 uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
864 /* drops the kwq reference */
865 if (retval) {
866 *retval = updateval;
867 }
868 }
869
870 return res;
871}
872
873static kern_return_t
214d78a2
A
874ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe,
875 uint32_t updateval, thread_t *old_owner)
f1a1da6c
A
876{
877 kern_return_t ret;
878
879 if (!kwe) {
214d78a2 880 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_kwelist);
f1a1da6c
A
881 if (!kwe) {
882 panic("ksyn_mtxsignal: panic signaling empty queue");
883 }
884 }
885
214d78a2
A
886 PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_START, kwq->kw_addr, kwe,
887 thread_tid(kwe->kwe_thread), kwq->kw_inqueue);
f1a1da6c 888
214d78a2
A
889 ret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, kwe, updateval);
890 if (ret == KERN_SUCCESS) {
891 *old_owner = _kwq_set_owner(kwq, kwe->kwe_thread, 0);
892 } else {
893 *old_owner = _kwq_clear_owner(kwq);
f1a1da6c 894 }
214d78a2
A
895 PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_END, kwq->kw_addr, kwe,
896 ret, 0);
f1a1da6c
A
897 return ret;
898}
899
900
901static void
214d78a2 902ksyn_prepost(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t state,
f1a1da6c
A
903 uint32_t lockseq)
904{
905 bzero(kwe, sizeof(*kwe));
906 kwe->kwe_state = state;
907 kwe->kwe_lockseq = lockseq;
908 kwe->kwe_count = 1;
214d78a2
A
909
910 (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITE, kwe, lockseq, SEQFIT);
f1a1da6c
A
911 kwq->kw_fakecount++;
912}
913
914static void
214d78a2
A
915ksyn_cvsignal(ksyn_wait_queue_t ckwq, thread_t th, uint32_t uptoseq,
916 uint32_t signalseq, uint32_t *updatebits, int *broadcast,
917 ksyn_waitq_element_t *nkwep)
f1a1da6c
A
918{
919 ksyn_waitq_element_t kwe = NULL;
920 ksyn_waitq_element_t nkwe = NULL;
214d78a2
A
921 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
922
f1a1da6c 923 uptoseq &= PTHRW_COUNT_MASK;
214d78a2 924
f1a1da6c
A
925 // Find the specified thread to wake.
926 if (th != THREAD_NULL) {
927 uthread_t uth = pthread_kern->get_bsdthread_info(th);
928 kwe = pthread_kern->uthread_get_uukwe(uth);
929 if (kwe->kwe_kwqqueue != ckwq ||
930 is_seqhigher(kwe->kwe_lockseq, uptoseq)) {
931 // Unless it's no longer waiting on this CV...
932 kwe = NULL;
933 // ...in which case we post a broadcast instead.
934 *broadcast = 1;
935 return;
936 }
937 }
214d78a2 938
f1a1da6c
A
939 // If no thread was specified, find any thread to wake (with the right
940 // sequence number).
941 while (th == THREAD_NULL) {
942 if (kwe == NULL) {
943 kwe = ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq);
944 }
945 if (kwe == NULL && nkwe == NULL) {
946 // No eligible entries; need to allocate a new
947 // entry to prepost. Loop to rescan after
948 // reacquiring the lock after allocation in
949 // case anything new shows up.
950 ksyn_wqunlock(ckwq);
214d78a2 951 nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
f1a1da6c
A
952 ksyn_wqlock(ckwq);
953 } else {
954 break;
955 }
956 }
214d78a2 957
f1a1da6c
A
958 if (kwe != NULL) {
959 // If we found a thread to wake...
960 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
961 if (is_seqlower(kwe->kwe_lockseq, signalseq)) {
962 /*
963 * A valid thread in our range, but lower than our signal.
964 * Matching it may leave our match with nobody to wake it if/when
965 * it arrives (the signal originally meant for this thread might
966 * not successfully wake it).
967 *
968 * Convert to broadcast - may cause some spurious wakeups
969 * (allowed by spec), but avoids starvation (better choice).
970 */
971 *broadcast = 1;
972 } else {
214d78a2 973 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
f1a1da6c
A
974 *updatebits += PTHRW_INC;
975 }
976 } else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
977 // Merge with existing prepost at same uptoseq.
978 kwe->kwe_count += 1;
979 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
980 // Existing broadcasts subsume this signal.
981 } else {
982 panic("unknown kwe state\n");
983 }
984 if (nkwe) {
985 /*
986 * If we allocated a new kwe above but then found a different kwe to
987 * use then we need to deallocate the spare one.
988 */
214d78a2 989 zfree(kwe_zone, nkwe);
f1a1da6c
A
990 nkwe = NULL;
991 }
992 } else if (nkwe != NULL) {
993 // ... otherwise, insert the newly allocated prepost.
994 ksyn_prepost(ckwq, nkwe, KWE_THREAD_PREPOST, uptoseq);
995 nkwe = NULL;
996 } else {
997 panic("failed to allocate kwe\n");
998 }
214d78a2 999
f1a1da6c
A
1000 *nkwep = nkwe;
1001}
1002
1003static int
214d78a2
A
1004__psynch_cvsignal(user_addr_t cv, uint32_t cgen, uint32_t cugen,
1005 uint32_t csgen, uint32_t flags, int broadcast,
1006 mach_port_name_t threadport, uint32_t *retval)
f1a1da6c
A
1007{
1008 int error = 0;
1009 thread_t th = THREAD_NULL;
1010 ksyn_wait_queue_t kwq;
1011
1012 uint32_t uptoseq = cgen & PTHRW_COUNT_MASK;
1013 uint32_t fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
1014
1015 // validate sane L, U, and S values
1016 if ((threadport == 0 && is_seqhigher(fromseq, uptoseq)) || is_seqhigher(csgen, uptoseq)) {
1017 __FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n");
1018 return EINVAL;
1019 }
1020
1021 if (threadport != 0) {
1022 th = port_name_to_thread((mach_port_name_t)threadport);
1023 if (th == THREAD_NULL) {
1024 return ESRCH;
1025 }
1026 }
1027
1028 error = ksyn_wqfind(cv, cgen, cugen, csgen, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &kwq);
1029 if (error == 0) {
1030 uint32_t updatebits = 0;
1031 ksyn_waitq_element_t nkwe = NULL;
1032
1033 ksyn_wqlock(kwq);
1034
1035 // update L, U and S...
1036 UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
214d78a2
A
1037
1038 PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_START, kwq->kw_addr,
1039 fromseq, uptoseq, broadcast);
1040
f1a1da6c
A
1041 if (!broadcast) {
1042 // No need to signal if the CV is already balanced.
1043 if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
214d78a2
A
1044 ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits,
1045 &broadcast, &nkwe);
1046 PTHREAD_TRACE(psynch_cvar_signal, kwq->kw_addr, broadcast, 0,0);
f1a1da6c
A
1047 }
1048 }
1049
1050 if (broadcast) {
1051 ksyn_handle_cvbroad(kwq, uptoseq, &updatebits);
1052 }
1053
1054 kwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
1055 // set C or P bits and free if needed
1056 ksyn_cvupdate_fixup(kwq, &updatebits);
1057 *retval = updatebits;
214d78a2
A
1058
1059 PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_END, kwq->kw_addr,
1060 updatebits, 0, 0);
f1a1da6c
A
1061
1062 ksyn_wqunlock(kwq);
214d78a2
A
1063
1064 pthread_kern->psynch_wait_cleanup();
f1a1da6c
A
1065
1066 if (nkwe != NULL) {
214d78a2 1067 zfree(kwe_zone, nkwe);
f1a1da6c
A
1068 }
1069
1070 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
1071 }
1072
1073 if (th != NULL) {
1074 thread_deallocate(th);
1075 }
1076
1077 return error;
1078}
1079
1080/*
1081 * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
1082 */
1083int
214d78a2
A
1084_psynch_cvbroad(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
1085 uint64_t cvudgen, uint32_t flags, __unused user_addr_t mutex,
1086 __unused uint64_t mugen, __unused uint64_t tid, uint32_t *retval)
f1a1da6c
A
1087{
1088 uint32_t diffgen = cvudgen & 0xffffffff;
1089 uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
1090 if (count > pthread_kern->get_task_threadmax()) {
1091 __FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n");
1092 return EBUSY;
1093 }
1094
1095 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1096 uint32_t cgen = cvlsgen & 0xffffffff;
1097 uint32_t cugen = (cvudgen >> 32) & 0xffffffff;
1098
1099 return __psynch_cvsignal(cv, cgen, cugen, csgen, flags, 1, 0, retval);
1100}
1101
1102/*
1103 * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
1104 */
1105int
214d78a2
A
1106_psynch_cvsignal(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
1107 uint32_t cvugen, int threadport, __unused user_addr_t mutex,
1108 __unused uint64_t mugen, __unused uint64_t tid, uint32_t flags,
f1a1da6c
A
1109 uint32_t *retval)
1110{
1111 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1112 uint32_t cgen = cvlsgen & 0xffffffff;
1113
1114 return __psynch_cvsignal(cv, cgen, cvugen, csgen, flags, 0, threadport, retval);
1115}
1116
1117/*
1118 * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
1119 */
1120int
214d78a2
A
1121_psynch_cvwait(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
1122 uint32_t cvugen, user_addr_t mutex, uint64_t mugen, uint32_t flags,
1123 int64_t sec, uint32_t nsec, uint32_t *retval)
f1a1da6c
A
1124{
1125 int error = 0;
1126 uint32_t updatebits = 0;
1127 ksyn_wait_queue_t ckwq = NULL;
1128 ksyn_waitq_element_t kwe, nkwe = NULL;
1129
1130 /* for conformance reasons */
1131 pthread_kern->__pthread_testcancel(0);
1132
1133 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1134 uint32_t cgen = cvlsgen & 0xffffffff;
1135 uint32_t ugen = (mugen >> 32) & 0xffffffff;
1136 uint32_t mgen = mugen & 0xffffffff;
1137
1138 uint32_t lockseq = (cgen & PTHRW_COUNT_MASK);
1139
1140 /*
1141 * In cvwait U word can be out of range as cv could be used only for
1142 * timeouts. However S word needs to be within bounds and validated at
1143 * user level as well.
1144 */
1145 if (is_seqhigher_eq(csgen, lockseq) != 0) {
1146 __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
1147 return EINVAL;
1148 }
214d78a2
A
1149
1150 PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_START, cv, mutex, cgen, 0);
f1a1da6c
A
1151
1152 error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
1153 if (error != 0) {
1154 return error;
1155 }
1156
1157 if (mutex != 0) {
214d78a2
A
1158 uint32_t mutexrv = 0;
1159 error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, &mutexrv);
f1a1da6c
A
1160 if (error != 0) {
1161 goto out;
1162 }
1163 }
1164
1165 ksyn_wqlock(ckwq);
1166
1167 // update L, U and S...
1168 UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
1169
1170 /* Look for the sequence for prepost (or conflicting thread */
214d78a2 1171 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
f1a1da6c
A
1172 kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
1173 if (kwe != NULL) {
1174 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
1175 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) {
1176 /* we can safely consume a reference, so do so */
1177 if (--kwe->kwe_count == 0) {
1178 ksyn_queue_remove_item(ckwq, kq, kwe);
1179 ckwq->kw_fakecount--;
1180 nkwe = kwe;
1181 }
1182 } else {
1183 /*
1184 * consuming a prepost higher than our lock sequence is valid, but
1185 * can leave the higher thread without a match. Convert the entry
1186 * to a broadcast to compensate for this.
1187 */
1188 ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits);
1189#if __TESTPANICS__
1190 if (updatebits != 0)
1191 panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", updatebits);
1192#endif /* __TESTPANICS__ */
1193 }
1194 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
1195 // XXX
1196 // Nothing to do.
1197 } else if (kwe->kwe_state == KWE_THREAD_INWAIT) {
1198 __FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n");
1199 error = EBUSY;
1200 } else {
1201 panic("psync_cvwait: unexpected wait queue element type\n");
1202 }
1203
1204 if (error == 0) {
214d78a2 1205 updatebits |= PTHRW_INC;
f1a1da6c
A
1206 ckwq->kw_sword += PTHRW_INC;
1207
1208 /* set C or P bits and free if needed */
1209 ksyn_cvupdate_fixup(ckwq, &updatebits);
1210 *retval = updatebits;
1211 }
1212 } else {
1213 uint64_t abstime = 0;
214d78a2 1214 uint16_t kwe_flags = 0;
f1a1da6c
A
1215
1216 if (sec != 0 || (nsec & 0x3fffffff) != 0) {
1217 struct timespec ts;
1218 ts.tv_sec = (__darwin_time_t)sec;
1219 ts.tv_nsec = (nsec & 0x3fffffff);
214d78a2
A
1220 nanoseconds_to_absolutetime(
1221 (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
f1a1da6c
A
1222 clock_absolutetime_interval_to_deadline(abstime, &abstime);
1223 }
214d78a2
A
1224
1225 PTHREAD_TRACE(psynch_cvar_kwait, cv, mutex, kwe_flags, 1);
f1a1da6c 1226
214d78a2
A
1227 error = ksyn_wait(ckwq, KSYN_QUEUE_WRITE, cgen, SEQFIT, abstime,
1228 kwe_flags, psynch_cvcontinue, kThreadWaitPThreadCondVar);
f1a1da6c
A
1229 // ksyn_wait drops wait queue lock
1230 }
1231
1232 ksyn_wqunlock(ckwq);
214d78a2 1233
f1a1da6c 1234 if (nkwe != NULL) {
214d78a2 1235 zfree(kwe_zone, nkwe);
f1a1da6c
A
1236 }
1237out:
214d78a2
A
1238
1239 PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, cv, error, updatebits, 2);
1240
f1a1da6c
A
1241 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1242 return error;
1243}
1244
1245
214d78a2 1246void __dead2
f1a1da6c
A
1247psynch_cvcontinue(void *parameter, wait_result_t result)
1248{
1249 uthread_t uth = current_uthread();
1250 ksyn_wait_queue_t ckwq = parameter;
1251 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
214d78a2 1252
f1a1da6c
A
1253 int error = _wait_result_to_errno(result);
1254 if (error != 0) {
1255 ksyn_wqlock(ckwq);
1256 /* just in case it got woken up as we were granting */
214d78a2
A
1257 int retval = kwe->kwe_psynchretval;
1258 pthread_kern->uthread_set_returnval(uth, retval);
f1a1da6c
A
1259
1260 if (kwe->kwe_kwqqueue) {
214d78a2 1261 ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
f1a1da6c
A
1262 }
1263 if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
1264 /* the condition var granted.
1265 * reset the error so that the thread returns back.
1266 */
1267 error = 0;
1268 /* no need to set any bits just return as cvsig/broad covers this */
1269 } else {
1270 ckwq->kw_sword += PTHRW_INC;
1271
1272 /* set C and P bits, in the local error */
1273 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
214d78a2
A
1274 PTHREAD_TRACE(psynch_cvar_zeroed, ckwq->kw_addr,
1275 ckwq->kw_lword, ckwq->kw_sword, ckwq->kw_inqueue);
1276 error |= ECVCLEARED;
f1a1da6c 1277 if (ckwq->kw_inqueue != 0) {
214d78a2 1278 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 1);
f1a1da6c
A
1279 }
1280 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
1281 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
1282 } else {
1283 /* everythig in the queue is a fake entry ? */
1284 if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
214d78a2 1285 error |= ECVPREPOST;
f1a1da6c
A
1286 }
1287 }
1288 }
1289 ksyn_wqunlock(ckwq);
214d78a2
A
1290
1291 PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
1292 error, 0, 3);
f1a1da6c
A
1293 } else {
1294 int val = 0;
1295 // PTH_RWL_MTX_WAIT is removed
1296 if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
1297 val = PTHRW_INC | PTH_RWS_CV_CBIT;
1298 }
214d78a2
A
1299 PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
1300 val, 0, 4);
f1a1da6c
A
1301 pthread_kern->uthread_set_returnval(uth, val);
1302 }
1303
1304 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1305 pthread_kern->unix_syscall_return(error);
214d78a2 1306 __builtin_unreachable();
f1a1da6c
A
1307}
1308
1309/*
1310 * psynch_cvclrprepost: This system call clears pending prepost if present.
1311 */
1312int
214d78a2
A
1313_psynch_cvclrprepost(__unused proc_t p, user_addr_t cv, uint32_t cvgen,
1314 uint32_t cvugen, uint32_t cvsgen, __unused uint32_t prepocnt,
1315 uint32_t preposeq, uint32_t flags, int *retval)
f1a1da6c
A
1316{
1317 int error = 0;
1318 int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
1319 int wqtype = (mutex ? KSYN_WQTYPE_MTX : KSYN_WQTYPE_CVAR) | KSYN_WQTYPE_INDROP;
1320 ksyn_wait_queue_t kwq = NULL;
1321
1322 *retval = 0;
1323
214d78a2
A
1324 error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype,
1325 &kwq);
f1a1da6c
A
1326 if (error != 0) {
1327 return error;
1328 }
1329
1330 ksyn_wqlock(kwq);
1331
1332 if (mutex) {
214d78a2
A
1333 int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
1334 == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
1335 if (firstfit && kwq->kw_prepost.count) {
1336 if (is_seqlower_eq(kwq->kw_prepost.lseq, cvgen)) {
1337 PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
1338 kwq->kw_prepost.lseq, 0, 2);
1339 _kwq_clear_preposted_wakeup(kwq);
f1a1da6c
A
1340 }
1341 }
1342 } else {
214d78a2
A
1343 PTHREAD_TRACE(psynch_cvar_clrprepost, kwq->kw_addr, wqtype,
1344 preposeq, 0);
1345 ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITE, preposeq, 0);
f1a1da6c
A
1346 }
1347
1348 ksyn_wqunlock(kwq);
1349 ksyn_wqrelease(kwq, 1, wqtype);
1350 return error;
1351}
1352
1353/* ***************** pthread_rwlock ************************ */
1354
1355static int
214d78a2
A
1356__psynch_rw_lock(int type, user_addr_t rwlock, uint32_t lgenval,
1357 uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
f1a1da6c 1358{
214d78a2
A
1359 uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
1360 ksyn_wait_queue_t kwq;
1361 int error, prepost_type, kqi;
1362 thread_continue_t tc;
f1a1da6c
A
1363
1364 if (type == PTH_RW_TYPE_READ) {
1365 prepost_type = KW_UNLOCK_PREPOST_READLOCK;
1366 kqi = KSYN_QUEUE_READ;
214d78a2 1367 tc = psynch_rw_rdcontinue;
f1a1da6c
A
1368 } else {
1369 prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
214d78a2
A
1370 kqi = KSYN_QUEUE_WRITE;
1371 tc = psynch_rw_wrcontinue;
f1a1da6c
A
1372 }
1373
214d78a2
A
1374 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
1375 (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq);
1376 if (error != 0) {
1377 return error;
1378 }
f1a1da6c 1379
214d78a2
A
1380 ksyn_wqlock(kwq);
1381 _ksyn_check_init(kwq, lgenval);
1382 if (_kwq_handle_interrupted_wakeup(kwq, type, lockseq, retval) ||
1383 // handle overlap first as they are not counted against pre_rwwc
1384 // handle_overlap uses the flags in lgenval (vs. lockseq)
1385 _kwq_handle_overlap(kwq, type, lgenval, rw_wc, retval) ||
1386 _kwq_handle_preposted_wakeup(kwq, prepost_type, lockseq, retval)) {
1387 ksyn_wqunlock(kwq);
1388 goto out;
f1a1da6c 1389 }
214d78a2
A
1390
1391 block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
1392 kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
1393 error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, 0, tc, block_hint);
1394 // ksyn_wait drops wait queue lock
1395out:
1396 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
f1a1da6c
A
1397 return error;
1398}
1399
1400/*
1401 * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
1402 */
1403int
214d78a2
A
1404_psynch_rw_rdlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
1405 uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
f1a1da6c 1406{
214d78a2
A
1407 return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc,
1408 flags, retval);
f1a1da6c
A
1409}
1410
1411/*
1412 * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
1413 */
1414int
214d78a2
A
1415_psynch_rw_longrdlock(__unused proc_t p, __unused user_addr_t rwlock,
1416 __unused uint32_t lgenval, __unused uint32_t ugenval,
1417 __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
f1a1da6c
A
1418{
1419 return ESRCH;
1420}
1421
1422
1423/*
1424 * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
1425 */
1426int
214d78a2
A
1427_psynch_rw_wrlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
1428 uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
f1a1da6c 1429{
214d78a2
A
1430 return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval,
1431 rw_wc, flags, retval);
f1a1da6c
A
1432}
1433
1434/*
1435 * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
1436 */
1437int
214d78a2
A
1438_psynch_rw_yieldwrlock(__unused proc_t p, __unused user_addr_t rwlock,
1439 __unused uint32_t lgenval, __unused uint32_t ugenval,
1440 __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
f1a1da6c
A
1441{
1442 return ESRCH;
1443}
1444
1445/*
1446 * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate
1447 * reader/writer variety lock.
1448 */
1449int
214d78a2
A
1450_psynch_rw_unlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
1451 uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
f1a1da6c
A
1452{
1453 int error = 0;
1454 ksyn_wait_queue_t kwq;
1455 uint32_t updatebits = 0;
1456 int diff;
1457 uint32_t count = 0;
1458 uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
3a6437e6
A
1459 int clearedkflags = 0;
1460
214d78a2
A
1461 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
1462 (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
f1a1da6c
A
1463 if (error != 0) {
1464 return(error);
1465 }
1466
1467 ksyn_wqlock(kwq);
1468 int isinit = _ksyn_check_init(kwq, lgenval);
1469
1470 /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
214d78a2
A
1471 if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) &&
1472 (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
f1a1da6c
A
1473 error = 0;
1474 goto out;
1475 }
1476
1477 /* If L-U != num of waiters, then it needs to be preposted or spr */
1478 diff = find_diff(lgenval, ugenval);
1479
1480 if (find_seq_till(kwq, curgen, diff, &count) == 0) {
1481 if ((count == 0) || (count < (uint32_t)diff))
1482 goto prepost;
1483 }
1484
1485 /* no prepost and all threads are in place, reset the bit */
1486 if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
1487 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
3a6437e6 1488 clearedkflags = 1;
f1a1da6c
A
1489 }
1490
1491 /* can handle unlock now */
1492
214d78a2 1493 _kwq_clear_preposted_wakeup(kwq);
f1a1da6c
A
1494
1495 error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
1496#if __TESTPANICS__
1497 if (error != 0)
1498 panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error);
1499#endif /* __TESTPANICS__ */
1500out:
1501 if (error == 0) {
1502 /* update bits?? */
1503 *retval = updatebits;
1504 }
3a6437e6 1505
214d78a2
A
1506 // <rdar://problem/22244050> If any of the wakeups failed because they
1507 // already returned to userspace because of a signal then we need to ensure
1508 // that the reset state is not cleared when that thread returns. Otherwise,
3a6437e6 1509 // _pthread_rwlock_lock will clear the interrupted state before it is read.
214d78a2 1510 if (clearedkflags != 0 && kwq->kw_intr.count > 0) {
3a6437e6
A
1511 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
1512 }
f1a1da6c
A
1513
1514 ksyn_wqunlock(kwq);
214d78a2 1515 pthread_kern->psynch_wait_cleanup();
f1a1da6c
A
1516 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
1517
1518 return(error);
1519
1520prepost:
1521 /* update if the new seq is higher than prev prepost, or first set */
214d78a2
A
1522 if (is_rws_sbit_set(kwq->kw_prepost.sseq) ||
1523 is_seqhigher_eq(rw_wc, kwq->kw_prepost.sseq)) {
1524 _kwq_mark_preposted_wakeup(kwq, diff - count, curgen, rw_wc);
f1a1da6c
A
1525 updatebits = lgenval; /* let this not do unlock handling */
1526 }
1527 error = 0;
1528 goto out;
1529}
1530
1531
1532/* ************************************************************************** */
1533void
1534pth_global_hashinit(void)
1535{
1536 pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash);
1537}
1538
1539void
1540_pth_proc_hashinit(proc_t p)
1541{
1542 void *ptr = hashinit(PTH_HASHSIZE, M_PCB, &pthhash);
1543 if (ptr == NULL) {
1544 panic("pth_proc_hashinit: hash init returned 0\n");
1545 }
1546
1547 pthread_kern->proc_set_pthhash(p, ptr);
1548}
1549
1550
1551static int
214d78a2
A
1552ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags,
1553 ksyn_wait_queue_t *out_kwq, struct pthhashhead **out_hashptr,
c6e5f90c 1554 uint64_t object, uint64_t offset)
f1a1da6c
A
1555{
1556 int res = 0;
1557 ksyn_wait_queue_t kwq;
f1a1da6c
A
1558 struct pthhashhead *hashptr;
1559 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1560 hashptr = pth_glob_hashtbl;
c6e5f90c
A
1561 LIST_FOREACH(kwq, &hashptr[object & pthhash], kw_hash) {
1562 if (kwq->kw_object == object && kwq->kw_offset == offset) {
1563 break;
f1a1da6c 1564 }
f1a1da6c
A
1565 }
1566 } else {
1567 hashptr = pthread_kern->proc_get_pthhash(p);
1568 LIST_FOREACH(kwq, &hashptr[uaddr & pthhash], kw_hash) {
1569 if (kwq->kw_addr == uaddr) {
1570 break;
1571 }
1572 }
1573 }
1574 *out_kwq = kwq;
f1a1da6c
A
1575 *out_hashptr = hashptr;
1576 return res;
1577}
1578
1579void
1580_pth_proc_hashdelete(proc_t p)
1581{
1582 struct pthhashhead * hashptr;
1583 ksyn_wait_queue_t kwq;
1584 unsigned long hashsize = pthhash + 1;
1585 unsigned long i;
1586
1587 hashptr = pthread_kern->proc_get_pthhash(p);
1588 pthread_kern->proc_set_pthhash(p, NULL);
1589 if (hashptr == NULL) {
1590 return;
1591 }
1592
1593 pthread_list_lock();
1594 for(i= 0; i < hashsize; i++) {
1595 while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) {
1596 if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) {
1597 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1598 LIST_REMOVE(kwq, kw_hash);
1599 }
1600 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1601 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1602 LIST_REMOVE(kwq, kw_list);
1603 }
1604 pthread_list_unlock();
1605 /* release fake entries if present for cvars */
1606 if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
214d78a2
A
1607 ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE]);
1608 _kwq_destroy(kwq);
f1a1da6c
A
1609 pthread_list_lock();
1610 }
1611 }
1612 pthread_list_unlock();
1613 FREE(hashptr, M_PROC);
1614}
1615
1616/* no lock held for this as the waitqueue is getting freed */
1617void
1618ksyn_freeallkwe(ksyn_queue_t kq)
1619{
1620 ksyn_waitq_element_t kwe;
1621 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
1622 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
1623 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
214d78a2 1624 zfree(kwe_zone, kwe);
f1a1da6c
A
1625 }
1626 }
1627}
1628
214d78a2
A
1629static inline void
1630_kwq_report_inuse(ksyn_wait_queue_t kwq)
1631{
1632 if (kwq->kw_prepost.count != 0) {
1633 __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [pre %d:0x%x:0x%x]",
1634 (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_prepost.count,
1635 kwq->kw_prepost.lseq, kwq->kw_prepost.sseq);
1636 PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
1637 kwq->kw_type, 1, 0);
1638 }
1639 if (kwq->kw_intr.count != 0) {
1640 __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [intr %d:0x%x:0x%x:0x%x]",
1641 (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_intr.count,
1642 kwq->kw_intr.type, kwq->kw_intr.seq,
1643 kwq->kw_intr.returnbits);
1644 PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
1645 kwq->kw_type, 2, 0);
1646 }
1647 if (kwq->kw_iocount) {
1648 __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [ioc %d:%d]",
1649 (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_iocount,
1650 kwq->kw_dropcount);
1651 PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
1652 kwq->kw_type, 3, 0);
1653 }
1654 if (kwq->kw_inqueue) {
1655 __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [inq %d:%d]",
1656 (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_inqueue,
1657 kwq->kw_fakecount);
1658 PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, kwq->kw_type,
1659 4, 0);
1660 }
1661}
1662
f1a1da6c
A
1663/* find kernel waitqueue, if not present create one. Grants a reference */
1664int
214d78a2
A
1665ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen,
1666 int flags, int wqtype, ksyn_wait_queue_t *kwqp)
f1a1da6c
A
1667{
1668 int res = 0;
1669 ksyn_wait_queue_t kwq = NULL;
1670 ksyn_wait_queue_t nkwq = NULL;
1671 struct pthhashhead *hashptr;
1672 proc_t p = current_proc();
1673
1674 uint64_t object = 0, offset = 0;
1675 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1676 res = ksyn_findobj(uaddr, &object, &offset);
1677 hashptr = pth_glob_hashtbl;
1678 } else {
1679 hashptr = pthread_kern->proc_get_pthhash(p);
1680 }
1681
1682 while (res == 0) {
1683 pthread_list_lock();
214d78a2 1684 res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr,
c6e5f90c 1685 object, offset);
f1a1da6c 1686 if (res != 0) {
3404ec80 1687 pthread_list_unlock();
f1a1da6c
A
1688 break;
1689 }
1690 if (kwq == NULL && nkwq == NULL) {
1691 // Drop the lock to allocate a new kwq and retry.
1692 pthread_list_unlock();
1693
214d78a2 1694 nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone);
f1a1da6c
A
1695 bzero(nkwq, sizeof(struct ksyn_wait_queue));
1696 int i;
1697 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
1698 ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
1699 }
214d78a2 1700 lck_spin_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
f1a1da6c
A
1701 continue;
1702 } else if (kwq == NULL && nkwq != NULL) {
1703 // Still not found, add the new kwq to the hash.
1704 kwq = nkwq;
1705 nkwq = NULL; // Don't free.
1706 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1707 kwq->kw_pflags |= KSYN_WQ_SHARED;
1708 LIST_INSERT_HEAD(&hashptr[object & pthhash], kwq, kw_hash);
1709 } else {
1710 LIST_INSERT_HEAD(&hashptr[uaddr & pthhash], kwq, kw_hash);
1711 }
1712 kwq->kw_pflags |= KSYN_WQ_INHASH;
1713 } else if (kwq != NULL) {
1714 // Found an existing kwq, use it.
1715 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1716 LIST_REMOVE(kwq, kw_list);
1717 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1718 }
1719 if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
214d78a2 1720 if (!_kwq_is_used(kwq)) {
f1a1da6c
A
1721 if (kwq->kw_iocount == 0) {
1722 kwq->kw_type = 0; // mark for reinitialization
214d78a2
A
1723 } else if (kwq->kw_iocount == 1 &&
1724 kwq->kw_dropcount == kwq->kw_iocount) {
f1a1da6c
A
1725 /* if all users are unlockers then wait for it to finish */
1726 kwq->kw_pflags |= KSYN_WQ_WAITING;
1727 // Drop the lock and wait for the kwq to be free.
214d78a2
A
1728 (void)msleep(&kwq->kw_pflags, pthread_list_mlock,
1729 PDROP, "ksyn_wqfind", 0);
f1a1da6c
A
1730 continue;
1731 } else {
214d78a2 1732 _kwq_report_inuse(kwq);
f1a1da6c
A
1733 res = EINVAL;
1734 }
1735 } else {
214d78a2 1736 _kwq_report_inuse(kwq);
f1a1da6c
A
1737 res = EINVAL;
1738 }
1739 }
1740 }
1741 if (res == 0) {
1742 if (kwq->kw_type == 0) {
1743 kwq->kw_addr = uaddr;
1744 kwq->kw_object = object;
1745 kwq->kw_offset = offset;
1746 kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
1747 CLEAR_REINIT_BITS(kwq);
1748 kwq->kw_lword = mgen;
1749 kwq->kw_uword = ugen;
1750 kwq->kw_sword = sgen;
214d78a2 1751 kwq->kw_owner = THREAD_NULL;
f1a1da6c
A
1752 kwq->kw_kflags = 0;
1753 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
214d78a2
A
1754 PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_START, uaddr,
1755 kwq->kw_type, kwq, 0);
1756 PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_END, uaddr,
1757 mgen, ugen, sgen);
f1a1da6c
A
1758 }
1759 kwq->kw_iocount++;
1760 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1761 kwq->kw_dropcount++;
1762 }
1763 }
3404ec80 1764 pthread_list_unlock();
f1a1da6c
A
1765 break;
1766 }
f1a1da6c
A
1767 if (kwqp != NULL) {
1768 *kwqp = kwq;
1769 }
1770 if (nkwq) {
214d78a2 1771 _kwq_destroy(nkwq);
f1a1da6c
A
1772 }
1773 return res;
1774}
1775
1776/* Reference from find is dropped here. Starts the free process if needed */
1777void
1778ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
1779{
1780 uint64_t deadline;
1781 ksyn_wait_queue_t free_elem = NULL;
1782
1783 pthread_list_lock();
1784 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1785 kwq->kw_dropcount--;
1786 }
1787 if (--kwq->kw_iocount == 0) {
1788 if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
1789 /* some one is waiting for the waitqueue, wake them up */
1790 kwq->kw_pflags &= ~KSYN_WQ_WAITING;
1791 wakeup(&kwq->kw_pflags);
1792 }
1793
214d78a2
A
1794 if (!_kwq_is_used(kwq)) {
1795 if (kwq->kw_turnstile) {
1796 panic("kw_turnstile still non-null upon release");
1797 }
1798
1799 PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_START,
1800 kwq->kw_addr, kwq->kw_type, qfreenow, 0);
1801 PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_END,
1802 kwq->kw_addr, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword);
1803
f1a1da6c
A
1804 if (qfreenow == 0) {
1805 microuptime(&kwq->kw_ts);
1806 LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
1807 kwq->kw_pflags |= KSYN_WQ_FLIST;
1808 if (psynch_cleanupset == 0) {
1809 struct timeval t;
1810 microuptime(&t);
1811 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1812 deadline = tvtoabstime(&t);
1813 thread_call_enter_delayed(psynch_thcall, deadline);
1814 psynch_cleanupset = 1;
1815 }
1816 } else {
1817 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1818 LIST_REMOVE(kwq, kw_hash);
1819 free_elem = kwq;
1820 }
1821 }
1822 }
1823 pthread_list_unlock();
1824 if (free_elem != NULL) {
214d78a2 1825 _kwq_destroy(free_elem);
f1a1da6c
A
1826 }
1827}
1828
1829/* responsible to free the waitqueues */
1830void
1831psynch_wq_cleanup(__unused void *param, __unused void * param1)
1832{
214d78a2 1833 ksyn_wait_queue_t kwq, tmp;
f1a1da6c
A
1834 struct timeval t;
1835 int reschedule = 0;
1836 uint64_t deadline = 0;
1837 LIST_HEAD(, ksyn_wait_queue) freelist;
1838 LIST_INIT(&freelist);
1839
1840 pthread_list_lock();
1841
1842 microuptime(&t);
1843
1844 LIST_FOREACH(kwq, &pth_free_list, kw_list) {
214d78a2 1845 if (_kwq_is_used(kwq) || kwq->kw_iocount != 0) {
f1a1da6c
A
1846 // still in use
1847 continue;
1848 }
1849 __darwin_time_t diff = t.tv_sec - kwq->kw_ts.tv_sec;
1850 if (diff < 0)
1851 diff *= -1;
1852 if (diff >= KSYN_CLEANUP_DEADLINE) {
1853 kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
1854 LIST_REMOVE(kwq, kw_hash);
1855 LIST_REMOVE(kwq, kw_list);
1856 LIST_INSERT_HEAD(&freelist, kwq, kw_list);
1857 } else {
1858 reschedule = 1;
1859 }
1860
1861 }
1862 if (reschedule != 0) {
1863 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1864 deadline = tvtoabstime(&t);
1865 thread_call_enter_delayed(psynch_thcall, deadline);
1866 psynch_cleanupset = 1;
1867 } else {
1868 psynch_cleanupset = 0;
1869 }
1870 pthread_list_unlock();
1871
214d78a2
A
1872 LIST_FOREACH_SAFE(kwq, &freelist, kw_list, tmp) {
1873 _kwq_destroy(kwq);
f1a1da6c
A
1874 }
1875}
1876
1877static int
1878_wait_result_to_errno(wait_result_t result)
1879{
1880 int res = 0;
1881 switch (result) {
1882 case THREAD_TIMED_OUT:
1883 res = ETIMEDOUT;
1884 break;
1885 case THREAD_INTERRUPTED:
1886 res = EINTR;
1887 break;
1888 }
1889 return res;
1890}
1891
1892int
214d78a2
A
1893ksyn_wait(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, uint32_t lockseq,
1894 int fit, uint64_t abstime, uint16_t kwe_flags,
1895 thread_continue_t continuation, block_hint_t block_hint)
f1a1da6c 1896{
f1a1da6c
A
1897 thread_t th = current_thread();
1898 uthread_t uth = pthread_kern->get_bsdthread_info(th);
214d78a2
A
1899 struct turnstile **tstore = NULL;
1900 int res;
1901
1902 assert(continuation != THREAD_CONTINUE_NULL);
1903
f1a1da6c
A
1904 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1905 bzero(kwe, sizeof(*kwe));
1906 kwe->kwe_count = 1;
1907 kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
1908 kwe->kwe_state = KWE_THREAD_INWAIT;
1909 kwe->kwe_uth = uth;
214d78a2
A
1910 kwe->kwe_thread = th;
1911 kwe->kwe_flags = kwe_flags;
f1a1da6c
A
1912
1913 res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
1914 if (res != 0) {
1915 //panic("psynch_rw_wrlock: failed to enqueue\n"); // XXX
1916 ksyn_wqunlock(kwq);
1917 return res;
1918 }
214d78a2
A
1919
1920 PTHREAD_TRACE(psynch_mutex_kwqwait, kwq->kw_addr, kwq->kw_inqueue,
1921 kwq->kw_prepost.count, kwq->kw_intr.count);
1922
1923 if (_kwq_use_turnstile(kwq)) {
1924 // pthread mutexes and rwlocks both (at least sometimes) know their
1925 // owner and can use turnstiles. Otherwise, we pass NULL as the
1926 // tstore to the shims so they wait on the global waitq.
1927 tstore = &kwq->kw_turnstile;
1928 }
1929
1930 pthread_kern->psynch_wait_prepare((uintptr_t)kwq, tstore, kwq->kw_owner,
1931 block_hint, abstime);
1932
f1a1da6c 1933 ksyn_wqunlock(kwq);
214d78a2
A
1934
1935 if (tstore) {
1936 pthread_kern->psynch_wait_update_complete(kwq->kw_turnstile);
f1a1da6c
A
1937 }
1938
214d78a2
A
1939 thread_block_parameter(continuation, kwq);
1940
1941 // NOT REACHED
1942 panic("ksyn_wait continuation returned");
1943 __builtin_unreachable();
f1a1da6c
A
1944}
1945
1946kern_return_t
214d78a2
A
1947ksyn_signal(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
1948 ksyn_waitq_element_t kwe, uint32_t updateval)
f1a1da6c
A
1949{
1950 kern_return_t ret;
214d78a2 1951 struct turnstile **tstore = NULL;
f1a1da6c
A
1952
1953 // If no wait element was specified, wake the first.
1954 if (!kwe) {
1955 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[kqi].ksynq_kwelist);
1956 if (!kwe) {
1957 panic("ksyn_signal: panic signaling empty queue");
1958 }
1959 }
1960
1961 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1962 panic("ksyn_signal: panic signaling non-waiting element");
1963 }
1964
1965 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1966 kwe->kwe_psynchretval = updateval;
1967
214d78a2
A
1968 if (_kwq_use_turnstile(kwq)) {
1969 tstore = &kwq->kw_turnstile;
1970 }
1971
1972 ret = pthread_kern->psynch_wait_wakeup(kwq, kwe, tstore);
1973
f1a1da6c
A
1974 if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
1975 panic("ksyn_signal: panic waking up thread %x\n", ret);
1976 }
1977 return ret;
1978}
1979
1980int
1981ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
1982{
1983 kern_return_t ret;
1984 vm_page_info_basic_data_t info;
1985 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
214d78a2
A
1986 ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr,
1987 VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
f1a1da6c
A
1988 if (ret != KERN_SUCCESS) {
1989 return EINVAL;
1990 }
1991
1992 if (objectp != NULL) {
1993 *objectp = (uint64_t)info.object_id;
1994 }
1995 if (offsetp != NULL) {
1996 *offsetp = (uint64_t)info.offset;
1997 }
1998
1999 return(0);
2000}
2001
2002
2003/* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
2004int
214d78a2
A
2005kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen,
2006 int *typep, uint32_t lowest[])
f1a1da6c
A
2007{
2008 uint32_t kw_fr, kw_fwr, low;
2009 int type = 0, lowtype, typenum[2] = { 0 };
2010 uint32_t numbers[2] = { 0 };
2011 int count = 0, i;
2012
214d78a2
A
2013 if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) ||
2014 ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
f1a1da6c
A
2015 type |= PTH_RWSHFT_TYPE_READ;
2016 /* read entries are present */
2017 if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
2018 kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
214d78a2
A
2019 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) &&
2020 (is_seqlower(premgen, kw_fr) != 0))
f1a1da6c
A
2021 kw_fr = premgen;
2022 } else
2023 kw_fr = premgen;
2024
2025 lowest[KSYN_QUEUE_READ] = kw_fr;
2026 numbers[count]= kw_fr;
2027 typenum[count] = PTH_RW_TYPE_READ;
2028 count++;
2029 } else
2030 lowest[KSYN_QUEUE_READ] = 0;
2031
214d78a2
A
2032 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) ||
2033 ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
f1a1da6c
A
2034 type |= PTH_RWSHFT_TYPE_WRITE;
2035 /* read entries are present */
214d78a2
A
2036 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
2037 kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum;
2038 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) &&
2039 (is_seqlower(premgen, kw_fwr) != 0))
f1a1da6c
A
2040 kw_fwr = premgen;
2041 } else
2042 kw_fwr = premgen;
2043
214d78a2 2044 lowest[KSYN_QUEUE_WRITE] = kw_fwr;
f1a1da6c
A
2045 numbers[count]= kw_fwr;
2046 typenum[count] = PTH_RW_TYPE_WRITE;
2047 count++;
2048 } else
214d78a2 2049 lowest[KSYN_QUEUE_WRITE] = 0;
f1a1da6c
A
2050
2051#if __TESTPANICS__
2052 if (count == 0)
2053 panic("nothing in the queue???\n");
2054#endif /* __TESTPANICS__ */
2055
2056 low = numbers[0];
2057 lowtype = typenum[0];
2058 if (count > 1) {
2059 for (i = 1; i< count; i++) {
2060 if (is_seqlower(numbers[i] , low) != 0) {
2061 low = numbers[i];
2062 lowtype = typenum[i];
2063 }
2064 }
2065 }
2066 type |= lowtype;
2067
2068 if (typep != 0)
2069 *typep = type;
2070 return(0);
2071}
2072
2073/* wakeup readers to upto the writer limits */
2074int
214d78a2
A
2075ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders,
2076 uint32_t updatebits, int *wokenp)
f1a1da6c
A
2077{
2078 ksyn_queue_t kq;
2079 int failedwakeup = 0;
2080 int numwoken = 0;
2081 kern_return_t kret = KERN_SUCCESS;
2082 uint32_t lbits = 0;
2083
2084 lbits = updatebits;
2085
2086 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
214d78a2
A
2087 while ((kq->ksynq_count != 0) &&
2088 (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
f1a1da6c
A
2089 kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
2090 if (kret == KERN_NOT_WAITING) {
2091 failedwakeup++;
2092 }
2093 numwoken++;
2094 }
2095
2096 if (wokenp != NULL)
2097 *wokenp = numwoken;
2098 return(failedwakeup);
2099}
2100
2101
214d78a2
A
2102/*
2103 * This handles the unlock grants for next set on rw_unlock() or on arrival
2104 * of all preposted waiters.
2105 */
f1a1da6c 2106int
214d78a2
A
2107kwq_handle_unlock(ksyn_wait_queue_t kwq, __unused uint32_t mgen, uint32_t rw_wc,
2108 uint32_t *updatep, int flags, int *blockp, uint32_t premgen)
f1a1da6c
A
2109{
2110 uint32_t low_writer, limitrdnum;
2111 int rwtype, error=0;
214d78a2 2112 int allreaders, nfailed;
f1a1da6c
A
2113 uint32_t updatebits=0, numneeded = 0;;
2114 int prepost = flags & KW_UNLOCK_PREPOST;
2115 thread_t preth = THREAD_NULL;
2116 ksyn_waitq_element_t kwe;
2117 uthread_t uth;
2118 thread_t th;
2119 int woken = 0;
2120 int block = 1;
2121 uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
2122 kern_return_t kret = KERN_SUCCESS;
2123 ksyn_queue_t kq;
2124 int curthreturns = 0;
2125
2126 if (prepost != 0) {
2127 preth = current_thread();
2128 }
2129
2130 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2131 kwq->kw_lastseqword = rw_wc;
2132 kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
214d78a2 2133 kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
f1a1da6c
A
2134
2135 error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
2136#if __TESTPANICS__
2137 if (error != 0)
2138 panic("rwunlock: cannot fails to slot next round of threads");
2139#endif /* __TESTPANICS__ */
2140
214d78a2 2141 low_writer = lowest[KSYN_QUEUE_WRITE];
f1a1da6c
A
2142
2143 allreaders = 0;
2144 updatebits = 0;
2145
2146 switch (rwtype & PTH_RW_TYPE_MASK) {
2147 case PTH_RW_TYPE_READ: {
2148 // XXX
2149 /* what about the preflight which is LREAD or READ ?? */
2150 if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
2151 if (rwtype & PTH_RWSHFT_TYPE_WRITE) {
2152 updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
2153 }
2154 }
2155 limitrdnum = 0;
2156 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2157 limitrdnum = low_writer;
2158 } else {
2159 allreaders = 1;
2160 }
2161
2162 numneeded = 0;
2163
2164 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2165 limitrdnum = low_writer;
2166 numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
2167 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
2168 curthreturns = 1;
2169 numneeded += 1;
2170 }
2171 } else {
2172 // no writers at all
2173 // no other waiters only readers
214d78a2 2174 kwq->kw_kflags |= KSYN_KWF_OVERLAP_GUARD;
f1a1da6c
A
2175 numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
2176 if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
2177 curthreturns = 1;
2178 numneeded += 1;
2179 }
2180 }
2181
2182 updatebits += (numneeded << PTHRW_COUNT_SHIFT);
2183
2184 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2185
2186 if (curthreturns != 0) {
2187 block = 0;
2188 uth = current_uthread();
2189 kwe = pthread_kern->uthread_get_uukwe(uth);
2190 kwe->kwe_psynchretval = updatebits;
2191 }
2192
2193
214d78a2
A
2194 nfailed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders,
2195 updatebits, &woken);
2196 if (nfailed != 0) {
2197 _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_READ, nfailed,
2198 limitrdnum, updatebits);
f1a1da6c
A
2199 }
2200
2201 error = 0;
2202
214d78a2
A
2203 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) &&
2204 ((updatebits & PTH_RWL_WBIT) == 0)) {
f1a1da6c 2205 panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
214d78a2 2206 }
f1a1da6c
A
2207 }
2208 break;
2209
2210 case PTH_RW_TYPE_WRITE: {
2211
2212 /* only one thread is goin to be granted */
2213 updatebits |= (PTHRW_INC);
2214 updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
2215
2216 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
2217 block = 0;
214d78a2 2218 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
f1a1da6c
A
2219 updatebits |= PTH_RWL_WBIT;
2220 }
2221 th = preth;
2222 uth = pthread_kern->get_bsdthread_info(th);
2223 kwe = pthread_kern->uthread_get_uukwe(uth);
2224 kwe->kwe_psynchretval = updatebits;
2225 } else {
2226 /* we are not granting writelock to the preposting thread */
2227 /* if there are writers present or the preposting write thread then W bit is to be set */
214d78a2 2228 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count > 1 ||
f1a1da6c
A
2229 (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
2230 updatebits |= PTH_RWL_WBIT;
2231 }
2232 /* setup next in the queue */
214d78a2 2233 kret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, NULL, updatebits);
f1a1da6c 2234 if (kret == KERN_NOT_WAITING) {
214d78a2
A
2235 _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
2236 low_writer, updatebits);
f1a1da6c
A
2237 }
2238 error = 0;
2239 }
2240 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
214d78a2
A
2241 if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) !=
2242 (PTH_RWL_KBIT | PTH_RWL_EBIT)) {
f1a1da6c 2243 panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
214d78a2 2244 }
f1a1da6c
A
2245 }
2246 break;
2247
2248 default:
2249 panic("rwunlock: invalid type for lock grants");
2250
2251 };
2252
2253 if (updatep != NULL)
2254 *updatep = updatebits;
2255 if (blockp != NULL)
2256 *blockp = block;
2257 return(error);
2258}
2259
2260/************* Indiv queue support routines ************************/
2261void
2262ksyn_queue_init(ksyn_queue_t kq)
2263{
2264 TAILQ_INIT(&kq->ksynq_kwelist);
2265 kq->ksynq_count = 0;
2266 kq->ksynq_firstnum = 0;
2267 kq->ksynq_lastnum = 0;
2268}
2269
2270int
214d78a2
A
2271ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe,
2272 uint32_t mgen, int fit)
f1a1da6c
A
2273{
2274 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2275 uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
2276 int res = 0;
2277
2278 if (kwe->kwe_kwqqueue != NULL) {
2279 panic("adding enqueued item to another queue");
2280 }
2281
2282 if (kq->ksynq_count == 0) {
2283 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2284 kq->ksynq_firstnum = lockseq;
2285 kq->ksynq_lastnum = lockseq;
2286 } else if (fit == FIRSTFIT) {
2287 /* TBD: if retry bit is set for mutex, add it to the head */
2288 /* firstfit, arriving order */
2289 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2290 if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2291 kq->ksynq_firstnum = lockseq;
2292 }
2293 if (is_seqhigher(lockseq, kq->ksynq_lastnum)) {
2294 kq->ksynq_lastnum = lockseq;
2295 }
2296 } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
214d78a2
A
2297 /* During prepost when a thread is getting cancelled, we could have
2298 * two with same seq */
f1a1da6c
A
2299 res = EBUSY;
2300 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
2301 ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
214d78a2
A
2302 if (tmp != NULL && tmp->kwe_uth != NULL &&
2303 pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
f1a1da6c
A
2304 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2305 res = 0;
2306 }
2307 }
2308 } else if (is_seqlower(kq->ksynq_lastnum, lockseq)) { // XXX is_seqhigher
2309 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2310 kq->ksynq_lastnum = lockseq;
2311 } else if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2312 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2313 kq->ksynq_firstnum = lockseq;
2314 } else {
2315 ksyn_waitq_element_t q_kwe, r_kwe;
2316
2317 res = ESRCH;
2318 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2319 if (is_seqhigher(q_kwe->kwe_lockseq, lockseq)) {
2320 TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list);
2321 res = 0;
2322 break;
2323 }
2324 }
2325 }
2326
2327 if (res == 0) {
2328 kwe->kwe_kwqqueue = kwq;
2329 kq->ksynq_count++;
2330 kwq->kw_inqueue++;
2331 update_low_high(kwq, lockseq);
2332 }
2333 return res;
2334}
2335
2336void
214d78a2
A
2337ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq,
2338 ksyn_waitq_element_t kwe)
f1a1da6c
A
2339{
2340 if (kq->ksynq_count == 0) {
2341 panic("removing item from empty queue");
2342 }
2343
2344 if (kwe->kwe_kwqqueue != kwq) {
2345 panic("removing item from wrong queue");
2346 }
2347
2348 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
2349 kwe->kwe_list.tqe_next = NULL;
2350 kwe->kwe_list.tqe_prev = NULL;
2351 kwe->kwe_kwqqueue = NULL;
2352
2353 if (--kq->ksynq_count > 0) {
2354 ksyn_waitq_element_t tmp;
2355 tmp = TAILQ_FIRST(&kq->ksynq_kwelist);
2356 kq->ksynq_firstnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2357 tmp = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head);
2358 kq->ksynq_lastnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2359 } else {
2360 kq->ksynq_firstnum = 0;
2361 kq->ksynq_lastnum = 0;
2362 }
2363
2364 if (--kwq->kw_inqueue > 0) {
2365 uint32_t curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
2366 if (kwq->kw_lowseq == curseq) {
2367 kwq->kw_lowseq = find_nextlowseq(kwq);
2368 }
2369 if (kwq->kw_highseq == curseq) {
2370 kwq->kw_highseq = find_nexthighseq(kwq);
2371 }
2372 } else {
2373 kwq->kw_lowseq = 0;
2374 kwq->kw_highseq = 0;
2375 }
2376}
2377
2378ksyn_waitq_element_t
214d78a2
A
2379ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
2380 uint32_t seq)
f1a1da6c
A
2381{
2382 ksyn_waitq_element_t kwe;
2383
2384 // XXX: should stop searching when higher sequence number is seen
2385 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2386 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) {
2387 return kwe;
2388 }
2389 }
2390 return NULL;
2391}
2392
2393/* find the thread at the target sequence (or a broadcast/prepost at or above) */
2394ksyn_waitq_element_t
2395ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
2396{
2397 ksyn_waitq_element_t result = NULL;
2398 ksyn_waitq_element_t kwe;
2399 uint32_t lgen = (cgen & PTHRW_COUNT_MASK);
2400
2401 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2402 if (is_seqhigher_eq(kwe->kwe_lockseq, cgen)) {
2403 result = kwe;
2404
2405 // KWE_THREAD_INWAIT must be strictly equal
214d78a2
A
2406 if (kwe->kwe_state == KWE_THREAD_INWAIT &&
2407 (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
f1a1da6c
A
2408 result = NULL;
2409 }
2410 break;
2411 }
2412 }
2413 return result;
2414}
2415
2416/* look for a thread at lockseq, a */
2417ksyn_waitq_element_t
214d78a2
A
2418ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
2419 uint32_t uptoseq, uint32_t signalseq)
f1a1da6c
A
2420{
2421 ksyn_waitq_element_t result = NULL;
2422 ksyn_waitq_element_t q_kwe, r_kwe;
2423
2424 // XXX
2425 /* case where wrap in the tail of the queue exists */
2426 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2427 if (q_kwe->kwe_state == KWE_THREAD_PREPOST) {
2428 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2429 return result;
2430 }
2431 }
214d78a2
A
2432 if (q_kwe->kwe_state == KWE_THREAD_PREPOST |
2433 q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
f1a1da6c
A
2434 /* match any prepost at our same uptoseq or any broadcast above */
2435 if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
2436 continue;
2437 }
2438 return q_kwe;
2439 } else if (q_kwe->kwe_state == KWE_THREAD_INWAIT) {
2440 /*
2441 * Match any (non-cancelled) thread at or below our upto sequence -
2442 * but prefer an exact match to our signal sequence (if present) to
2443 * keep exact matches happening.
2444 */
2445 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2446 return result;
2447 }
2448 if (q_kwe->kwe_kwqqueue == kwq) {
2449 if (!pthread_kern->uthread_is_cancelled(q_kwe->kwe_uth)) {
2450 /* if equal or higher than our signal sequence, return this one */
2451 if (is_seqhigher_eq(q_kwe->kwe_lockseq, signalseq)) {
2452 return q_kwe;
2453 }
2454
2455 /* otherwise, just remember this eligible thread and move on */
2456 if (result == NULL) {
2457 result = q_kwe;
2458 }
2459 }
2460 }
2461 } else {
2462 panic("ksyn_queue_find_signalseq(): unknown wait queue element type (%d)\n", q_kwe->kwe_state);
2463 }
2464 }
2465 return result;
2466}
2467
2468void
2469ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
2470{
2471 ksyn_waitq_element_t kwe;
2472 uint32_t tseq = upto & PTHRW_COUNT_MASK;
2473 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
214d78a2
A
2474 uint32_t freed = 0, signaled = 0;
2475
2476 PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_START, kwq->kw_addr,
2477 kqi, upto, all);
f1a1da6c
A
2478
2479 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
2480 if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
2481 break;
2482 }
2483 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2484 /*
2485 * This scenario is typically noticed when the cvar is
2486 * reinited and the new waiters are waiting. We can
2487 * return them as spurious wait so the cvar state gets
2488 * reset correctly.
2489 */
214d78a2
A
2490
2491 PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
2492 kwq->kw_inqueue, 1);
f1a1da6c
A
2493
2494 /* skip canceled ones */
2495 /* wake the rest */
2496 /* set M bit to indicate to waking CV to retun Inc val */
214d78a2
A
2497 (void)ksyn_signal(kwq, kqi, kwe,
2498 PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
2499 signaled++;
f1a1da6c 2500 } else {
214d78a2
A
2501 PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
2502 kwq->kw_inqueue, 2);
f1a1da6c 2503 ksyn_queue_remove_item(kwq, kq, kwe);
214d78a2 2504 zfree(kwe_zone, kwe);
f1a1da6c 2505 kwq->kw_fakecount--;
214d78a2 2506 freed++;
f1a1da6c
A
2507 }
2508 }
214d78a2
A
2509
2510 PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_END, kwq->kw_addr, freed,
2511 signaled, kwq->kw_inqueue);
f1a1da6c
A
2512}
2513
2514/*************************************************************************/
2515
2516void
2517update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
2518{
2519 if (kwq->kw_inqueue == 1) {
2520 kwq->kw_lowseq = lockseq;
2521 kwq->kw_highseq = lockseq;
2522 } else {
2523 if (is_seqlower(lockseq, kwq->kw_lowseq)) {
2524 kwq->kw_lowseq = lockseq;
2525 }
2526 if (is_seqhigher(lockseq, kwq->kw_highseq)) {
2527 kwq->kw_highseq = lockseq;
2528 }
2529 }
2530}
2531
2532uint32_t
2533find_nextlowseq(ksyn_wait_queue_t kwq)
2534{
2535 uint32_t lowest = 0;
2536 int first = 1;
2537 int i;
2538
2539 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2540 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2541 uint32_t current = kwq->kw_ksynqueues[i].ksynq_firstnum;
2542 if (first || is_seqlower(current, lowest)) {
2543 lowest = current;
2544 first = 0;
2545 }
2546 }
2547 }
2548
2549 return lowest;
2550}
2551
2552uint32_t
2553find_nexthighseq(ksyn_wait_queue_t kwq)
2554{
2555 uint32_t highest = 0;
2556 int first = 1;
2557 int i;
2558
2559 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2560 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2561 uint32_t current = kwq->kw_ksynqueues[i].ksynq_lastnum;
2562 if (first || is_seqhigher(current, highest)) {
2563 highest = current;
2564 first = 0;
2565 }
2566 }
2567 }
2568
2569 return highest;
2570}
2571
2572int
214d78a2
A
2573find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters,
2574 uint32_t *countp)
f1a1da6c
A
2575{
2576 int i;
2577 uint32_t count = 0;
2578
2579 for (i = 0; i< KSYN_QUEUE_MAX; i++) {
2580 count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto);
2581 if (count >= nwaiters) {
2582 break;
2583 }
2584 }
2585
2586 if (countp != NULL) {
2587 *countp = count;
2588 }
2589
2590 if (count == 0) {
2591 return 0;
2592 } else if (count >= nwaiters) {
2593 return 1;
2594 } else {
2595 return 0;
2596 }
2597}
2598
2599
2600uint32_t
2601ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
2602{
2603 uint32_t i = 0;
2604 ksyn_waitq_element_t kwe, newkwe;
2605
2606 if (kq->ksynq_count == 0 || is_seqhigher(kq->ksynq_firstnum, upto)) {
2607 return 0;
2608 }
2609 if (upto == kq->ksynq_firstnum) {
2610 return 1;
2611 }
2612 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2613 uint32_t curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK);
2614 if (is_seqhigher(curval, upto)) {
2615 break;
2616 }
2617 ++i;
2618 if (upto == curval) {
2619 break;
2620 }
2621 }
2622 return i;
2623}
2624
2625/* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */
2626void
2627ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
2628{
2629 ksyn_waitq_element_t kwe, newkwe;
2630 uint32_t updatebits = 0;
214d78a2 2631 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
f1a1da6c
A
2632
2633 struct ksyn_queue kfreeq;
2634 ksyn_queue_init(&kfreeq);
214d78a2
A
2635
2636 PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_START, ckwq->kw_addr, upto,
2637 ckwq->kw_inqueue, 0);
f1a1da6c
A
2638
2639retry:
2640 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2641 if (is_seqhigher(kwe->kwe_lockseq, upto)) {
2642 // outside our range
2643 break;
2644 }
2645
2646 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2647 // Wake only non-canceled threads waiting on this CV.
2648 if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
214d78a2
A
2649 PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe, 0, 1);
2650 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
f1a1da6c
A
2651 updatebits += PTHRW_INC;
2652 }
2653 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
2654 kwe->kwe_state == KWE_THREAD_PREPOST) {
214d78a2
A
2655 PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe,
2656 kwe->kwe_state, 2);
f1a1da6c
A
2657 ksyn_queue_remove_item(ckwq, kq, kwe);
2658 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2659 ckwq->kw_fakecount--;
2660 } else {
2661 panic("unknown kwe state\n");
2662 }
2663 }
2664
2665 /* Need to enter a broadcast in the queue (if not already at L == S) */
2666
2667 if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
214d78a2
A
2668 PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, ckwq->kw_lword,
2669 ckwq->kw_sword, 3);
2670
f1a1da6c
A
2671 newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
2672 if (newkwe == NULL) {
2673 ksyn_wqunlock(ckwq);
214d78a2 2674 newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
f1a1da6c
A
2675 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2676 ksyn_wqlock(ckwq);
2677 goto retry;
2678 } else {
2679 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2680 ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
214d78a2 2681 PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, newkwe, 0, 4);
f1a1da6c
A
2682 }
2683 }
2684
2685 // free up any remaining things stumbled across above
2686 while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
2687 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
214d78a2 2688 zfree(kwe_zone, kwe);
f1a1da6c 2689 }
214d78a2
A
2690
2691 PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_END, ckwq->kw_addr,
2692 updatebits, 0, 0);
f1a1da6c
A
2693
2694 if (updatep != NULL) {
214d78a2 2695 *updatep |= updatebits;
f1a1da6c
A
2696 }
2697}
2698
2699void
2700ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
2701{
2702 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
2703 if (ckwq->kw_inqueue != 0) {
2704 /* FREE THE QUEUE */
214d78a2 2705 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 0);
f1a1da6c
A
2706#if __TESTPANICS__
2707 if (ckwq->kw_inqueue != 0)
2708 panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
2709#endif /* __TESTPANICS__ */
2710 }
2711 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
2712 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
2713 *updatebits |= PTH_RWS_CV_CBIT;
2714 } else if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
2715 // only fake entries are present in the queue
2716 *updatebits |= PTH_RWS_CV_PBIT;
2717 }
2718}
2719
2720void
2721psynch_zoneinit(void)
2722{
214d78a2
A
2723 kwq_zone = zinit(sizeof(struct ksyn_wait_queue),
2724 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
2725 kwe_zone = zinit(sizeof(struct ksyn_waitq_element),
2726 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
f1a1da6c 2727}
010efe49
A
2728
2729void *
2730_pthread_get_thread_kwq(thread_t thread)
2731{
2732 assert(thread);
2733 struct uthread * uthread = pthread_kern->get_bsdthread_info(thread);
2734 assert(uthread);
2735 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uthread);
2736 assert(kwe);
2737 ksyn_wait_queue_t kwq = kwe->kwe_kwqqueue;
2738 return kwq;
2739}
2740
2741/* This function is used by stackshot to determine why a thread is blocked, and report
2742 * who owns the object that the thread is blocked on. It should *only* be called if the
2743 * `block_hint' field in the relevant thread's struct is populated with something related
2744 * to pthread sync objects.
2745 */
2746void
214d78a2
A
2747_pthread_find_owner(thread_t thread,
2748 struct stackshot_thread_waitinfo * waitinfo)
010efe49
A
2749{
2750 ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread);
2751 switch (waitinfo->wait_type) {
2752 case kThreadWaitPThreadMutex:
2753 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX);
214d78a2 2754 waitinfo->owner = thread_tid(kwq->kw_owner);
010efe49
A
2755 waitinfo->context = kwq->kw_addr;
2756 break;
2757 /* Owner of rwlock not stored in kernel space due to races. Punt
2758 * and hope that the userspace address is helpful enough. */
2759 case kThreadWaitPThreadRWLockRead:
2760 case kThreadWaitPThreadRWLockWrite:
2761 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK);
2762 waitinfo->owner = 0;
2763 waitinfo->context = kwq->kw_addr;
2764 break;
2765 /* Condvars don't have owners, so just give the userspace address. */
2766 case kThreadWaitPThreadCondVar:
2767 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
2768 waitinfo->owner = 0;
2769 waitinfo->context = kwq->kw_addr;
2770 break;
2771 case kThreadWaitNone:
2772 default:
2773 waitinfo->owner = 0;
2774 waitinfo->context = 0;
2775 break;
2776 }
2777}