]> git.saurik.com Git - apple/libpthread.git/blame - kern/kern_synch.c
libpthread-330.250.2.tar.gz
[apple/libpthread.git] / kern / kern_synch.c
CommitLineData
f1a1da6c
A
1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29/*
30 * pthread_support.c
31 */
32
33#include <sys/param.h>
34#include <sys/queue.h>
35#include <sys/resourcevar.h>
36//#include <sys/proc_internal.h>
37#include <sys/kauth.h>
38#include <sys/systm.h>
39#include <sys/timeb.h>
40#include <sys/times.h>
41#include <sys/time.h>
42#include <sys/acct.h>
43#include <sys/kernel.h>
44#include <sys/wait.h>
45#include <sys/signalvar.h>
46#include <sys/syslog.h>
47#include <sys/stat.h>
48#include <sys/lock.h>
49#include <sys/kdebug.h>
50//#include <sys/sysproto.h>
51//#include <sys/pthread_internal.h>
52#include <sys/vm.h>
53#include <sys/user.h>
54
55#include <mach/mach_types.h>
56#include <mach/vm_prot.h>
57#include <mach/semaphore.h>
58#include <mach/sync_policy.h>
59#include <mach/task.h>
60#include <kern/kern_types.h>
61#include <kern/task.h>
62#include <kern/clock.h>
63#include <mach/kern_return.h>
64#include <kern/thread.h>
65#include <kern/sched_prim.h>
66#include <kern/thread_call.h>
67#include <kern/kalloc.h>
68#include <kern/zalloc.h>
69#include <kern/sched_prim.h>
70#include <kern/processor.h>
010efe49 71#include <kern/block_hint.h>
214d78a2 72#include <kern/turnstile.h>
f1a1da6c
A
73//#include <kern/mach_param.h>
74#include <mach/mach_vm.h>
75#include <mach/mach_param.h>
76#include <mach/thread_policy.h>
77#include <mach/message.h>
78#include <mach/port.h>
79//#include <vm/vm_protos.h>
80#include <vm/vm_map.h>
81#include <mach/vm_region.h>
82
83#include <libkern/OSAtomic.h>
84
85#include <pexpert/pexpert.h>
f1a1da6c
A
86
87#include "kern_internal.h"
88#include "synch_internal.h"
89#include "kern_trace.h"
90
91typedef struct uthread *uthread_t;
92
93//#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
94#define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
214d78a2 95#define __FAILEDUSERTEST2__(s, x...) do { printf("PSYNCH: pid[%d]: " s "\n", proc_pid(current_proc()), x); } while (0)
f1a1da6c
A
96
97lck_mtx_t *pthread_list_mlock;
98
99#define PTH_HASHSIZE 100
100
101static LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl;
102static unsigned long pthhash;
103
104static LIST_HEAD(, ksyn_wait_queue) pth_free_list;
105
106static zone_t kwq_zone; /* zone for allocation of ksyn_queue */
107static zone_t kwe_zone; /* zone for allocation of ksyn_waitq_element */
108
109#define SEQFIT 0
110#define FIRSTFIT 1
111
112struct ksyn_queue {
113 TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist;
114 uint32_t ksynq_count; /* number of entries in queue */
115 uint32_t ksynq_firstnum; /* lowest seq in queue */
116 uint32_t ksynq_lastnum; /* highest seq in queue */
117};
118typedef struct ksyn_queue *ksyn_queue_t;
119
214d78a2 120typedef enum {
f1a1da6c 121 KSYN_QUEUE_READ = 0,
214d78a2 122 KSYN_QUEUE_WRITE,
f1a1da6c 123 KSYN_QUEUE_MAX,
214d78a2
A
124} kwq_queue_type_t;
125
126typedef enum {
127 KWQ_INTR_NONE = 0,
128 KWQ_INTR_READ = 0x1,
129 KWQ_INTR_WRITE = 0x2,
130} kwq_intr_type_t;
f1a1da6c
A
131
132struct ksyn_wait_queue {
133 LIST_ENTRY(ksyn_wait_queue) kw_hash;
134 LIST_ENTRY(ksyn_wait_queue) kw_list;
135 user_addr_t kw_addr;
214d78a2 136 thread_t kw_owner; /* current owner or THREAD_NULL, has a +1 */
f1a1da6c
A
137 uint64_t kw_object; /* object backing in shared mode */
138 uint64_t kw_offset; /* offset inside the object in shared mode */
139 int kw_pflags; /* flags under listlock protection */
140 struct timeval kw_ts; /* timeval need for upkeep before free */
141 int kw_iocount; /* inuse reference */
142 int kw_dropcount; /* current users unlocking... */
143
144 int kw_type; /* queue type like mutex, cvar, etc */
145 uint32_t kw_inqueue; /* num of waiters held */
146 uint32_t kw_fakecount; /* number of error/prepost fakes */
147 uint32_t kw_highseq; /* highest seq in the queue */
148 uint32_t kw_lowseq; /* lowest seq in the queue */
149 uint32_t kw_lword; /* L value from userland */
150 uint32_t kw_uword; /* U world value from userland */
151 uint32_t kw_sword; /* S word value from userland */
152 uint32_t kw_lastunlockseq; /* the last seq that unlocked */
153 /* for CV to be used as the seq kernel has seen so far */
154#define kw_cvkernelseq kw_lastunlockseq
155 uint32_t kw_lastseqword; /* the last seq that unlocked */
156 /* for mutex and cvar we need to track I bit values */
157 uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */
214d78a2
A
158 struct {
159 uint32_t count; /* prepost count */
160 uint32_t lseq; /* prepost target seq */
161 uint32_t sseq; /* prepost target sword, in cvar used for mutexowned */
162 } kw_prepost;
163 struct {
164 kwq_intr_type_t type; /* type of failed wakueps */
165 uint32_t count; /* prepost of missed wakeup due to intrs */
166 uint32_t seq; /* prepost of missed wakeup limit seq */
167 uint32_t returnbits; /* return bits value for missed wakeup threads */
168 } kw_intr;
f1a1da6c
A
169
170 int kw_kflags;
171 int kw_qos_override; /* QoS of max waiter during contention period */
214d78a2 172 struct turnstile *kw_turnstile;
f1a1da6c 173 struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */
214d78a2 174 lck_spin_t kw_lock; /* spinlock protecting this structure */
f1a1da6c
A
175};
176typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
177
178#define TID_ZERO (uint64_t)0
179
180/* bits needed in handling the rwlock unlock */
181#define PTH_RW_TYPE_READ 0x01
182#define PTH_RW_TYPE_WRITE 0x04
183#define PTH_RW_TYPE_MASK 0xff
184#define PTH_RW_TYPE_SHIFT 8
185
186#define PTH_RWSHFT_TYPE_READ 0x0100
187#define PTH_RWSHFT_TYPE_WRITE 0x0400
188#define PTH_RWSHFT_TYPE_MASK 0xff00
189
190/*
191 * Mutex pshared attributes
192 */
193#define PTHREAD_PROCESS_SHARED _PTHREAD_MTX_OPT_PSHARED
194#define PTHREAD_PROCESS_PRIVATE 0x20
195#define PTHREAD_PSHARED_FLAGS_MASK 0x30
196
197/*
198 * Mutex policy attributes
199 */
214d78a2
A
200#define _PTHREAD_MTX_OPT_POLICY_FAIRSHARE 0x040 /* 1 */
201#define _PTHREAD_MTX_OPT_POLICY_FIRSTFIT 0x080 /* 2 */
202#define _PTHREAD_MTX_OPT_POLICY_MASK 0x1c0
f1a1da6c
A
203
204/* pflags */
205#define KSYN_WQ_INHASH 2
206#define KSYN_WQ_SHARED 4
207#define KSYN_WQ_WAITING 8 /* threads waiting for this wq to be available */
208#define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */
209
210/* kflags */
214d78a2
A
211#define KSYN_KWF_INITCLEARED 0x1 /* the init status found and preposts cleared */
212#define KSYN_KWF_ZEROEDOUT 0x2 /* the lword, etc are inited to 0 */
213#define KSYN_KWF_QOS_APPLIED 0x4 /* QoS override applied to owner */
214#define KSYN_KWF_OVERLAP_GUARD 0x8 /* overlap guard */
f1a1da6c
A
215
216#define KSYN_CLEANUP_DEADLINE 10
217static int psynch_cleanupset;
218thread_call_t psynch_thcall;
219
220#define KSYN_WQTYPE_INWAIT 0x1000
221#define KSYN_WQTYPE_INDROP 0x2000
222#define KSYN_WQTYPE_MTX 0x01
223#define KSYN_WQTYPE_CVAR 0x02
224#define KSYN_WQTYPE_RWLOCK 0x04
225#define KSYN_WQTYPE_SEMA 0x08
226#define KSYN_WQTYPE_MASK 0xff
227
228#define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
229
214d78a2
A
230static inline int
231_kwq_type(ksyn_wait_queue_t kwq)
f1a1da6c 232{
214d78a2 233 return (kwq->kw_type & KSYN_WQTYPE_MASK);
f1a1da6c
A
234}
235
214d78a2
A
236static inline bool
237_kwq_use_turnstile(ksyn_wait_queue_t kwq)
f1a1da6c 238{
214d78a2
A
239 // <rdar://problem/15926625> If we had writer-owner information from the
240 // rwlock then we could use the turnstile to push on it. For now, only
241 // plain mutexes use it.
242 return (_kwq_type(kwq) == KSYN_WQTYPE_MTX);
f1a1da6c
A
243}
244
214d78a2
A
245#define KW_UNLOCK_PREPOST 0x01
246#define KW_UNLOCK_PREPOST_READLOCK 0x08
247#define KW_UNLOCK_PREPOST_WRLOCK 0x20
f1a1da6c
A
248
249static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset);
250static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
251static void ksyn_wqrelease(ksyn_wait_queue_t mkwq, int qfreenow, int wqtype);
252static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp);
253
254static int _wait_result_to_errno(wait_result_t result);
255
214d78a2
A
256static int ksyn_wait(ksyn_wait_queue_t, kwq_queue_type_t, uint32_t, int, uint64_t, uint16_t, thread_continue_t, block_hint_t);
257static kern_return_t ksyn_signal(ksyn_wait_queue_t, kwq_queue_type_t, ksyn_waitq_element_t, uint32_t);
f1a1da6c
A
258static void ksyn_freeallkwe(ksyn_queue_t kq);
259
214d78a2 260static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t, thread_t *);
f1a1da6c
A
261
262static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
263
264static void ksyn_queue_init(ksyn_queue_t kq);
265static int ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int firstfit);
266static void ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe);
267static void ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all);
268
269static void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq);
270static uint32_t find_nextlowseq(ksyn_wait_queue_t kwq);
271static uint32_t find_nexthighseq(ksyn_wait_queue_t kwq);
272static int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp);
273
274static uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto);
275
276static ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen);
277static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep);
278static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
279static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
280
214d78a2
A
281static void __dead2 psynch_cvcontinue(void *, wait_result_t);
282static void __dead2 psynch_mtxcontinue(void *, wait_result_t);
283static void __dead2 psynch_rw_rdcontinue(void *, wait_result_t);
284static void __dead2 psynch_rw_wrcontinue(void *, wait_result_t);
f1a1da6c
A
285
286static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
287static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
288static ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq);
289
290static void
291UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc)
292{
293 int sinit = ((rw_wc & PTH_RWS_CV_CBIT) != 0);
294
295 // assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
296
297 if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) {
298 /* the values of L,U and S are cleared out due to L==S in previous transition */
299 kwq->kw_lword = mgen;
300 kwq->kw_uword = ugen;
301 kwq->kw_sword = rw_wc;
302 kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT;
303 } else {
304 if (is_seqhigher(mgen, kwq->kw_lword)) {
305 kwq->kw_lword = mgen;
306 }
307 if (is_seqhigher(ugen, kwq->kw_uword)) {
308 kwq->kw_uword = ugen;
309 }
310 if (sinit && is_seqhigher(rw_wc, kwq->kw_sword)) {
311 kwq->kw_sword = rw_wc;
312 }
313 }
314 if (sinit && is_seqlower(kwq->kw_cvkernelseq, rw_wc)) {
315 kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK);
316 }
317}
318
214d78a2
A
319static inline void
320_kwq_clear_preposted_wakeup(ksyn_wait_queue_t kwq)
321{
322 kwq->kw_prepost.lseq = 0;
323 kwq->kw_prepost.sseq = PTHRW_RWS_INIT;
324 kwq->kw_prepost.count = 0;
325}
326
327static inline void
328_kwq_mark_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t count,
329 uint32_t lseq, uint32_t sseq)
330{
331 kwq->kw_prepost.count = count;
332 kwq->kw_prepost.lseq = lseq;
333 kwq->kw_prepost.sseq = sseq;
334}
335
336static inline void
337_kwq_clear_interrupted_wakeup(ksyn_wait_queue_t kwq)
338{
339 kwq->kw_intr.type = KWQ_INTR_NONE;
340 kwq->kw_intr.count = 0;
341 kwq->kw_intr.seq = 0;
342 kwq->kw_intr.returnbits = 0;
343}
344
345static inline void
346_kwq_mark_interruped_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
347 uint32_t count, uint32_t lseq, uint32_t returnbits)
348{
349 kwq->kw_intr.count = count;
350 kwq->kw_intr.seq = lseq;
351 kwq->kw_intr.returnbits = returnbits;
352 kwq->kw_intr.type = type;
353}
354
355static void
356_kwq_destroy(ksyn_wait_queue_t kwq)
357{
358 if (kwq->kw_owner) {
359 thread_deallocate(kwq->kw_owner);
360 }
361 lck_spin_destroy(&kwq->kw_lock, pthread_lck_grp);
362 zfree(kwq_zone, kwq);
363}
364
365#define KWQ_SET_OWNER_TRANSFER_REF 0x1
366
367static inline thread_t
368_kwq_set_owner(ksyn_wait_queue_t kwq, thread_t new_owner, int flags)
369{
370 thread_t old_owner = kwq->kw_owner;
371 if (old_owner == new_owner) {
372 if (flags & KWQ_SET_OWNER_TRANSFER_REF) return new_owner;
373 return THREAD_NULL;
374 }
375 if ((flags & KWQ_SET_OWNER_TRANSFER_REF) == 0) {
376 thread_reference(new_owner);
377 }
378 kwq->kw_owner = new_owner;
379 return old_owner;
380}
381
382static inline thread_t
383_kwq_clear_owner(ksyn_wait_queue_t kwq)
384{
385 return _kwq_set_owner(kwq, THREAD_NULL, KWQ_SET_OWNER_TRANSFER_REF);
386}
387
388static inline void
389_kwq_cleanup_old_owner(thread_t *thread)
390{
391 if (*thread) {
392 thread_deallocate(*thread);
393 *thread = THREAD_NULL;
394 }
395}
396
397static void
398CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
399{
400 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
401 if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
402 panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
403 }
404 };
405 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
406 kwq->kw_nextseqword = PTHRW_RWS_INIT;
407 kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
408 };
409 _kwq_clear_preposted_wakeup(kwq);
410 kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
411 kwq->kw_lastseqword = PTHRW_RWS_INIT;
412 _kwq_clear_interrupted_wakeup(kwq);
413 kwq->kw_lword = 0;
414 kwq->kw_uword = 0;
415 kwq->kw_sword = PTHRW_RWS_INIT;
416}
417
418static bool
419_kwq_handle_preposted_wakeup(ksyn_wait_queue_t kwq, uint32_t type,
420 uint32_t lseq, uint32_t *retval)
421{
422 if (kwq->kw_prepost.count == 0 ||
423 !is_seqlower_eq(lseq, kwq->kw_prepost.lseq)) {
424 return false;
425 }
426
427 kwq->kw_prepost.count--;
428 if (kwq->kw_prepost.count > 0) {
429 return false;
430 }
431
432 int error, should_block = 0;
433 uint32_t updatebits = 0;
434 uint32_t pp_lseq = kwq->kw_prepost.lseq;
435 uint32_t pp_sseq = kwq->kw_prepost.sseq;
436 _kwq_clear_preposted_wakeup(kwq);
437
438 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
439
440 error = kwq_handle_unlock(kwq, pp_lseq, pp_sseq, &updatebits,
441 (type | KW_UNLOCK_PREPOST), &should_block, lseq);
442 if (error) {
443 panic("_kwq_handle_preposted_wakeup: kwq_handle_unlock failed %d",
444 error);
445 }
446
447 if (should_block) {
448 return false;
449 }
450 *retval = updatebits;
451 return true;
452}
453
454static bool
455_kwq_handle_overlap(ksyn_wait_queue_t kwq, uint32_t type, uint32_t lgenval,
456 uint32_t rw_wc, uint32_t *retval)
457{
458 int res = 0;
459
460 // overlaps only occur on read lockers
461 if (type != PTH_RW_TYPE_READ) {
462 return false;
463 }
464
465 // check for overlap and no pending W bit (indicates writers)
466 if ((kwq->kw_kflags & KSYN_KWF_OVERLAP_GUARD) &&
467 !is_rws_savemask_set(rw_wc) && !is_rwl_wbit_set(lgenval)) {
468 /* overlap is set, so no need to check for valid state for overlap */
469
470 if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
471 /* increase the next expected seq by one */
472 kwq->kw_nextseqword += PTHRW_INC;
473 /* set count by one & bits from the nextseq and add M bit */
474 *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
475 res = 1;
476 }
477 }
478 return res;
479}
480
481static inline bool
482_kwq_is_used(ksyn_wait_queue_t kwq)
483{
484 return (kwq->kw_inqueue != 0 || kwq->kw_prepost.count != 0 ||
485 kwq->kw_intr.count != 0);
486}
487
488/*
489 * consumes a pending interrupted waiter, returns true if the current
490 * thread should return back to userspace because it was previously
491 * interrupted.
492 */
493static inline bool
494_kwq_handle_interrupted_wakeup(ksyn_wait_queue_t kwq, kwq_intr_type_t type,
495 uint32_t lseq, uint32_t *retval)
496{
497 if (kwq->kw_intr.count != 0 && kwq->kw_intr.type == type &&
498 (!kwq->kw_intr.seq || is_seqlower_eq(lseq, kwq->kw_intr.seq))) {
499 kwq->kw_intr.count--;
500 *retval = kwq->kw_intr.returnbits;
501 if (kwq->kw_intr.returnbits == 0) {
502 _kwq_clear_interrupted_wakeup(kwq);
503 }
504 return true;
505 }
506 return false;
507}
508
f1a1da6c
A
509static void
510pthread_list_lock(void)
511{
512 lck_mtx_lock(pthread_list_mlock);
513}
514
515static void
516pthread_list_unlock(void)
517{
518 lck_mtx_unlock(pthread_list_mlock);
519}
520
521static void
522ksyn_wqlock(ksyn_wait_queue_t kwq)
523{
214d78a2 524 lck_spin_lock(&kwq->kw_lock);
f1a1da6c
A
525}
526
527static void
528ksyn_wqunlock(ksyn_wait_queue_t kwq)
529{
214d78a2 530 lck_spin_unlock(&kwq->kw_lock);
f1a1da6c
A
531}
532
f1a1da6c
A
533/* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
534static uint32_t
214d78a2
A
535_psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen,
536 int flags)
f1a1da6c
A
537{
538 kern_return_t ret;
539 uint32_t returnbits = 0;
214d78a2
A
540 uint32_t updatebits = 0;
541 int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK) ==
542 _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
f1a1da6c 543 uint32_t nextgen = (ugen + PTHRW_INC);
214d78a2 544 thread_t old_owner = THREAD_NULL;
f1a1da6c
A
545
546 ksyn_wqlock(kwq);
547 kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
f1a1da6c
A
548
549redrive:
214d78a2
A
550 updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
551 (PTH_RWL_EBIT | PTH_RWL_KBIT);
552
f1a1da6c
A
553 if (firstfit) {
554 if (kwq->kw_inqueue == 0) {
214d78a2
A
555 uint32_t count = kwq->kw_prepost.count + 1;
556 // Increment the number of preposters we have waiting
557 _kwq_mark_preposted_wakeup(kwq, count, mgen & PTHRW_COUNT_MASK, 0);
558 // We don't know the current owner as we've determined this mutex
559 // drop should have a preposted locker inbound into the kernel but
560 // we have no way of knowing who it is. When it arrives, the lock
561 // path will update the turnstile owner and return it to userspace.
562 old_owner = _kwq_clear_owner(kwq);
563 pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
564 &kwq->kw_turnstile);
565 PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
566 kwq->kw_prepost.lseq, count, 0);
f1a1da6c
A
567 } else {
568 // signal first waiter
214d78a2 569 ret = ksyn_mtxsignal(kwq, NULL, updatebits, &old_owner);
f1a1da6c 570 if (ret == KERN_NOT_WAITING) {
214d78a2
A
571 // <rdar://problem/39093536> ksyn_mtxsignal attempts to signal
572 // the thread but it sets up the turnstile inheritor first.
573 // That means we can't redrive the mutex in a loop without
574 // dropping the wq lock and cleaning up the turnstile state.
575 ksyn_wqunlock(kwq);
576 pthread_kern->psynch_wait_cleanup();
577 _kwq_cleanup_old_owner(&old_owner);
578 ksyn_wqlock(kwq);
f1a1da6c
A
579 goto redrive;
580 }
581 }
582 } else {
214d78a2 583 bool prepost = false;
f1a1da6c
A
584 if (kwq->kw_inqueue == 0) {
585 // No waiters in the queue.
214d78a2 586 prepost = true;
f1a1da6c 587 } else {
214d78a2 588 uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum & PTHRW_COUNT_MASK);
f1a1da6c
A
589 if (low_writer == nextgen) {
590 /* next seq to be granted found */
591 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
214d78a2
A
592 ret = ksyn_mtxsignal(kwq, NULL,
593 updatebits | PTH_RWL_MTX_WAIT, &old_owner);
f1a1da6c
A
594 if (ret == KERN_NOT_WAITING) {
595 /* interrupt post */
214d78a2
A
596 _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
597 nextgen, updatebits);
f1a1da6c 598 }
f1a1da6c 599 } else if (is_seqhigher(low_writer, nextgen)) {
214d78a2 600 prepost = true;
f1a1da6c
A
601 } else {
602 //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
603 ksyn_waitq_element_t kwe;
214d78a2
A
604 kwe = ksyn_queue_find_seq(kwq,
605 &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], nextgen);
f1a1da6c
A
606 if (kwe != NULL) {
607 /* next seq to be granted found */
608 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
214d78a2
A
609 ret = ksyn_mtxsignal(kwq, kwe,
610 updatebits | PTH_RWL_MTX_WAIT, &old_owner);
f1a1da6c
A
611 if (ret == KERN_NOT_WAITING) {
612 goto redrive;
613 }
614 } else {
214d78a2 615 prepost = true;
f1a1da6c
A
616 }
617 }
618 }
619 if (prepost) {
214d78a2 620 if (kwq->kw_prepost.count != 0) {
f1a1da6c
A
621 __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
622 } else {
214d78a2
A
623 _kwq_mark_preposted_wakeup(kwq, 1, nextgen & PTHRW_COUNT_MASK,
624 0);
f1a1da6c 625 }
214d78a2
A
626 old_owner = _kwq_clear_owner(kwq);
627 pthread_kern->psynch_wait_update_owner(kwq, THREAD_NULL,
628 &kwq->kw_turnstile);
f1a1da6c
A
629 }
630 }
214d78a2 631
f1a1da6c 632 ksyn_wqunlock(kwq);
214d78a2
A
633 pthread_kern->psynch_wait_cleanup();
634 _kwq_cleanup_old_owner(&old_owner);
f1a1da6c
A
635 ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
636 return returnbits;
637}
638
639static int
640_ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
641{
642 int res = (lgenval & PTHRW_RWL_INIT) != 0;
643 if (res) {
644 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
645 /* first to notice the reset of the lock, clear preposts */
646 CLEAR_REINIT_BITS(kwq);
647 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
648 }
649 }
650 return res;
651}
652
214d78a2
A
653/*
654 * psynch_mutexwait: This system call is used for contended psynch mutexes to
655 * block.
656 */
657int
658_psynch_mutexwait(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
659 uint32_t ugen, uint64_t tid, uint32_t flags, uint32_t *retval)
f1a1da6c 660{
214d78a2
A
661 ksyn_wait_queue_t kwq;
662 int error = 0;
663 int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
664 == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
665 int ins_flags = SEQFIT;
666 uint32_t lseq = (mgen & PTHRW_COUNT_MASK);
667 uint32_t updatebits = 0;
668 thread_t tid_th = THREAD_NULL, old_owner = THREAD_NULL;
f1a1da6c 669
214d78a2
A
670 if (firstfit) {
671 /* first fit */
672 ins_flags = FIRSTFIT;
f1a1da6c 673 }
f1a1da6c 674
214d78a2
A
675 error = ksyn_wqfind(mutex, mgen, ugen, 0, flags,
676 (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX), &kwq);
677 if (error != 0) {
678 return error;
679 }
f1a1da6c 680
214d78a2
A
681again:
682 ksyn_wqlock(kwq);
f1a1da6c 683
214d78a2
A
684 if (_kwq_handle_interrupted_wakeup(kwq, KWQ_INTR_WRITE, lseq, retval)) {
685 old_owner = _kwq_set_owner(kwq, current_thread(), 0);
686 pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
687 &kwq->kw_turnstile);
688 ksyn_wqunlock(kwq);
689 _kwq_cleanup_old_owner(&old_owner);
690 goto out;
f1a1da6c 691 }
f1a1da6c 692
214d78a2
A
693 if (kwq->kw_prepost.count && (firstfit || (lseq == kwq->kw_prepost.lseq))) {
694 /* got preposted lock */
695 kwq->kw_prepost.count--;
696
697 if (!firstfit) {
698 if (kwq->kw_prepost.count > 0) {
699 __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
700 kwq->kw_prepost.lseq += PTHRW_INC; /* look for next one */
701 ksyn_wqunlock(kwq);
702 error = EINVAL;
703 goto out;
f1a1da6c 704 }
214d78a2 705 _kwq_clear_preposted_wakeup(kwq);
f1a1da6c 706 }
f1a1da6c 707
214d78a2
A
708 if (kwq->kw_inqueue == 0) {
709 updatebits = lseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
f1a1da6c 710 } else {
214d78a2
A
711 updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) |
712 (PTH_RWL_KBIT | PTH_RWL_EBIT);
f1a1da6c 713 }
214d78a2 714 updatebits &= ~PTH_RWL_MTX_WAIT;
f1a1da6c 715
214d78a2
A
716 if (updatebits == 0) {
717 __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
f1a1da6c 718 }
f1a1da6c 719
214d78a2
A
720 PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
721 kwq->kw_prepost.lseq, kwq->kw_prepost.count, 1);
f1a1da6c 722
214d78a2
A
723 old_owner = _kwq_set_owner(kwq, current_thread(), 0);
724 pthread_kern->psynch_wait_update_owner(kwq, kwq->kw_owner,
725 &kwq->kw_turnstile);
726
727 ksyn_wqunlock(kwq);
728 _kwq_cleanup_old_owner(&old_owner);
729 *retval = updatebits;
730 goto out;
f1a1da6c 731 }
f1a1da6c 732
214d78a2
A
733 // mutexwait passes in an owner hint at the time userspace contended for
734 // the mutex, however, the owner tid in the userspace data structure may be
735 // unset or SWITCHING (-1), or it may correspond to a stale snapshot after
736 // the lock has subsequently been unlocked by another thread.
737 if (tid == thread_tid(kwq->kw_owner)) {
738 // userspace and kernel agree
739 } else if (tid == 0) {
f1a1da6c 740 // contender came in before owner could write TID
214d78a2
A
741 // let's assume that what the kernel knows is accurate
742 // for all we know this waiter came in late in the kernel
743 } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT &&
744 is_seqlower(ugen, kwq->kw_lastunlockseq)) {
745 // owner is stale, someone has come in and unlocked since this
746 // contended read the TID, so assume what is known in the kernel is
747 // accurate
f1a1da6c 748 } else if (tid == PTHREAD_MTX_TID_SWITCHING) {
214d78a2
A
749 // userspace didn't know the owner because it was being unlocked, but
750 // that unlocker hasn't reached the kernel yet. So assume what is known
751 // in the kernel is accurate
f1a1da6c 752 } else {
214d78a2
A
753 // hint is being passed in for a specific thread, and we have no reason
754 // not to trust it (like the kernel unlock sequence being higher)
755 //
756 // So resolve the hint to a thread_t if we haven't done so yet
757 // and redrive as we dropped the lock
758 if (tid_th == THREAD_NULL) {
f1a1da6c 759 ksyn_wqunlock(kwq);
214d78a2
A
760 tid_th = pthread_kern->task_findtid(current_task(), tid);
761 if (tid_th == THREAD_NULL) tid = 0;
762 goto again;
f1a1da6c 763 }
214d78a2 764 tid_th = _kwq_set_owner(kwq, tid_th, KWQ_SET_OWNER_TRANSFER_REF);
f1a1da6c 765 }
f1a1da6c 766
214d78a2
A
767 if (tid_th) {
768 // We are on our way to block, and can't drop the spinlock anymore
769 pthread_kern->thread_deallocate_safe(tid_th);
770 tid_th = THREAD_NULL;
771 }
772 error = ksyn_wait(kwq, KSYN_QUEUE_WRITE, mgen, ins_flags, 0, 0,
773 psynch_mtxcontinue, kThreadWaitPThreadMutex);
f1a1da6c
A
774 // ksyn_wait drops wait queue lock
775out:
214d78a2
A
776 pthread_kern->psynch_wait_cleanup();
777 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
778 if (tid_th) {
779 thread_deallocate(tid_th);
780 }
f1a1da6c
A
781 return error;
782}
783
214d78a2 784void __dead2
f1a1da6c
A
785psynch_mtxcontinue(void *parameter, wait_result_t result)
786{
787 uthread_t uth = current_uthread();
788 ksyn_wait_queue_t kwq = parameter;
789 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
214d78a2
A
790
791 ksyn_wqlock(kwq);
792
f1a1da6c
A
793 int error = _wait_result_to_errno(result);
794 if (error != 0) {
f1a1da6c 795 if (kwe->kwe_kwqqueue) {
214d78a2 796 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
f1a1da6c 797 }
f1a1da6c
A
798 } else {
799 uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
800 pthread_kern->uthread_set_returnval(uth, updatebits);
214d78a2
A
801
802 if (updatebits == 0) {
f1a1da6c 803 __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
214d78a2 804 }
f1a1da6c 805 }
214d78a2
A
806
807 pthread_kern->psynch_wait_complete(kwq, &kwq->kw_turnstile);
808
809 ksyn_wqunlock(kwq);
810 pthread_kern->psynch_wait_cleanup();
811 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_MTX));
f1a1da6c 812 pthread_kern->unix_syscall_return(error);
214d78a2
A
813 __builtin_unreachable();
814}
815
816static void __dead2
817_psynch_rw_continue(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
818 wait_result_t result)
819{
820 uthread_t uth = current_uthread();
821 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
822
823 ksyn_wqlock(kwq);
824
825 int error = _wait_result_to_errno(result);
826 if (error != 0) {
827 if (kwe->kwe_kwqqueue) {
828 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
829 }
830 } else {
831 pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
832 }
833
834 ksyn_wqunlock(kwq);
835 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
836
837 pthread_kern->unix_syscall_return(error);
838 __builtin_unreachable();
839}
840
841void __dead2
842psynch_rw_rdcontinue(void *parameter, wait_result_t result)
843{
844 _psynch_rw_continue(parameter, KSYN_QUEUE_READ, result);
845}
846
847void __dead2
848psynch_rw_wrcontinue(void *parameter, wait_result_t result)
849{
850 _psynch_rw_continue(parameter, KSYN_QUEUE_WRITE, result);
f1a1da6c
A
851}
852
853/*
854 * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
855 */
856int
214d78a2
A
857_psynch_mutexdrop(__unused proc_t p, user_addr_t mutex, uint32_t mgen,
858 uint32_t ugen, uint64_t tid __unused, uint32_t flags, uint32_t *retval)
f1a1da6c
A
859{
860 int res;
861 ksyn_wait_queue_t kwq;
214d78a2 862
f1a1da6c
A
863 res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
864 if (res == 0) {
865 uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
866 /* drops the kwq reference */
867 if (retval) {
868 *retval = updateval;
869 }
870 }
871
872 return res;
873}
874
875static kern_return_t
214d78a2
A
876ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe,
877 uint32_t updateval, thread_t *old_owner)
f1a1da6c
A
878{
879 kern_return_t ret;
880
881 if (!kwe) {
214d78a2 882 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_kwelist);
f1a1da6c
A
883 if (!kwe) {
884 panic("ksyn_mtxsignal: panic signaling empty queue");
885 }
886 }
887
214d78a2
A
888 PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_START, kwq->kw_addr, kwe,
889 thread_tid(kwe->kwe_thread), kwq->kw_inqueue);
f1a1da6c 890
214d78a2
A
891 ret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, kwe, updateval);
892 if (ret == KERN_SUCCESS) {
893 *old_owner = _kwq_set_owner(kwq, kwe->kwe_thread, 0);
894 } else {
895 *old_owner = _kwq_clear_owner(kwq);
f1a1da6c 896 }
214d78a2
A
897 PTHREAD_TRACE(psynch_mutex_kwqsignal | DBG_FUNC_END, kwq->kw_addr, kwe,
898 ret, 0);
f1a1da6c
A
899 return ret;
900}
901
902
903static void
214d78a2 904ksyn_prepost(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t state,
f1a1da6c
A
905 uint32_t lockseq)
906{
907 bzero(kwe, sizeof(*kwe));
908 kwe->kwe_state = state;
909 kwe->kwe_lockseq = lockseq;
910 kwe->kwe_count = 1;
214d78a2
A
911
912 (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITE, kwe, lockseq, SEQFIT);
f1a1da6c
A
913 kwq->kw_fakecount++;
914}
915
916static void
214d78a2
A
917ksyn_cvsignal(ksyn_wait_queue_t ckwq, thread_t th, uint32_t uptoseq,
918 uint32_t signalseq, uint32_t *updatebits, int *broadcast,
919 ksyn_waitq_element_t *nkwep)
f1a1da6c
A
920{
921 ksyn_waitq_element_t kwe = NULL;
922 ksyn_waitq_element_t nkwe = NULL;
214d78a2
A
923 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
924
f1a1da6c 925 uptoseq &= PTHRW_COUNT_MASK;
214d78a2 926
f1a1da6c
A
927 // Find the specified thread to wake.
928 if (th != THREAD_NULL) {
929 uthread_t uth = pthread_kern->get_bsdthread_info(th);
930 kwe = pthread_kern->uthread_get_uukwe(uth);
931 if (kwe->kwe_kwqqueue != ckwq ||
932 is_seqhigher(kwe->kwe_lockseq, uptoseq)) {
933 // Unless it's no longer waiting on this CV...
934 kwe = NULL;
935 // ...in which case we post a broadcast instead.
936 *broadcast = 1;
937 return;
938 }
939 }
214d78a2 940
f1a1da6c
A
941 // If no thread was specified, find any thread to wake (with the right
942 // sequence number).
943 while (th == THREAD_NULL) {
944 if (kwe == NULL) {
945 kwe = ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq);
946 }
947 if (kwe == NULL && nkwe == NULL) {
948 // No eligible entries; need to allocate a new
949 // entry to prepost. Loop to rescan after
950 // reacquiring the lock after allocation in
951 // case anything new shows up.
952 ksyn_wqunlock(ckwq);
214d78a2 953 nkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
f1a1da6c
A
954 ksyn_wqlock(ckwq);
955 } else {
956 break;
957 }
958 }
214d78a2 959
f1a1da6c
A
960 if (kwe != NULL) {
961 // If we found a thread to wake...
962 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
963 if (is_seqlower(kwe->kwe_lockseq, signalseq)) {
964 /*
965 * A valid thread in our range, but lower than our signal.
966 * Matching it may leave our match with nobody to wake it if/when
967 * it arrives (the signal originally meant for this thread might
968 * not successfully wake it).
969 *
970 * Convert to broadcast - may cause some spurious wakeups
971 * (allowed by spec), but avoids starvation (better choice).
972 */
973 *broadcast = 1;
974 } else {
214d78a2 975 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
f1a1da6c
A
976 *updatebits += PTHRW_INC;
977 }
978 } else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
979 // Merge with existing prepost at same uptoseq.
980 kwe->kwe_count += 1;
981 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
982 // Existing broadcasts subsume this signal.
983 } else {
984 panic("unknown kwe state\n");
985 }
986 if (nkwe) {
987 /*
988 * If we allocated a new kwe above but then found a different kwe to
989 * use then we need to deallocate the spare one.
990 */
214d78a2 991 zfree(kwe_zone, nkwe);
f1a1da6c
A
992 nkwe = NULL;
993 }
994 } else if (nkwe != NULL) {
995 // ... otherwise, insert the newly allocated prepost.
996 ksyn_prepost(ckwq, nkwe, KWE_THREAD_PREPOST, uptoseq);
997 nkwe = NULL;
998 } else {
999 panic("failed to allocate kwe\n");
1000 }
214d78a2 1001
f1a1da6c
A
1002 *nkwep = nkwe;
1003}
1004
1005static int
214d78a2
A
1006__psynch_cvsignal(user_addr_t cv, uint32_t cgen, uint32_t cugen,
1007 uint32_t csgen, uint32_t flags, int broadcast,
1008 mach_port_name_t threadport, uint32_t *retval)
f1a1da6c
A
1009{
1010 int error = 0;
1011 thread_t th = THREAD_NULL;
1012 ksyn_wait_queue_t kwq;
1013
1014 uint32_t uptoseq = cgen & PTHRW_COUNT_MASK;
1015 uint32_t fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
1016
1017 // validate sane L, U, and S values
1018 if ((threadport == 0 && is_seqhigher(fromseq, uptoseq)) || is_seqhigher(csgen, uptoseq)) {
1019 __FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n");
1020 return EINVAL;
1021 }
1022
1023 if (threadport != 0) {
1024 th = port_name_to_thread((mach_port_name_t)threadport);
1025 if (th == THREAD_NULL) {
1026 return ESRCH;
1027 }
1028 }
1029
1030 error = ksyn_wqfind(cv, cgen, cugen, csgen, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &kwq);
1031 if (error == 0) {
1032 uint32_t updatebits = 0;
1033 ksyn_waitq_element_t nkwe = NULL;
1034
1035 ksyn_wqlock(kwq);
1036
1037 // update L, U and S...
1038 UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
214d78a2
A
1039
1040 PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_START, kwq->kw_addr,
1041 fromseq, uptoseq, broadcast);
1042
f1a1da6c
A
1043 if (!broadcast) {
1044 // No need to signal if the CV is already balanced.
1045 if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
214d78a2
A
1046 ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits,
1047 &broadcast, &nkwe);
1048 PTHREAD_TRACE(psynch_cvar_signal, kwq->kw_addr, broadcast, 0,0);
f1a1da6c
A
1049 }
1050 }
1051
1052 if (broadcast) {
1053 ksyn_handle_cvbroad(kwq, uptoseq, &updatebits);
1054 }
1055
1056 kwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
1057 // set C or P bits and free if needed
1058 ksyn_cvupdate_fixup(kwq, &updatebits);
1059 *retval = updatebits;
214d78a2
A
1060
1061 PTHREAD_TRACE(psynch_cvar_signal | DBG_FUNC_END, kwq->kw_addr,
1062 updatebits, 0, 0);
f1a1da6c
A
1063
1064 ksyn_wqunlock(kwq);
214d78a2
A
1065
1066 pthread_kern->psynch_wait_cleanup();
f1a1da6c
A
1067
1068 if (nkwe != NULL) {
214d78a2 1069 zfree(kwe_zone, nkwe);
f1a1da6c
A
1070 }
1071
1072 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
1073 }
1074
1075 if (th != NULL) {
1076 thread_deallocate(th);
1077 }
1078
1079 return error;
1080}
1081
1082/*
1083 * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
1084 */
1085int
214d78a2
A
1086_psynch_cvbroad(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
1087 uint64_t cvudgen, uint32_t flags, __unused user_addr_t mutex,
1088 __unused uint64_t mugen, __unused uint64_t tid, uint32_t *retval)
f1a1da6c
A
1089{
1090 uint32_t diffgen = cvudgen & 0xffffffff;
1091 uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
1092 if (count > pthread_kern->get_task_threadmax()) {
1093 __FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n");
1094 return EBUSY;
1095 }
1096
1097 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1098 uint32_t cgen = cvlsgen & 0xffffffff;
1099 uint32_t cugen = (cvudgen >> 32) & 0xffffffff;
1100
1101 return __psynch_cvsignal(cv, cgen, cugen, csgen, flags, 1, 0, retval);
1102}
1103
1104/*
1105 * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
1106 */
1107int
214d78a2
A
1108_psynch_cvsignal(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
1109 uint32_t cvugen, int threadport, __unused user_addr_t mutex,
1110 __unused uint64_t mugen, __unused uint64_t tid, uint32_t flags,
f1a1da6c
A
1111 uint32_t *retval)
1112{
1113 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1114 uint32_t cgen = cvlsgen & 0xffffffff;
1115
1116 return __psynch_cvsignal(cv, cgen, cvugen, csgen, flags, 0, threadport, retval);
1117}
1118
1119/*
1120 * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
1121 */
1122int
214d78a2
A
1123_psynch_cvwait(__unused proc_t p, user_addr_t cv, uint64_t cvlsgen,
1124 uint32_t cvugen, user_addr_t mutex, uint64_t mugen, uint32_t flags,
1125 int64_t sec, uint32_t nsec, uint32_t *retval)
f1a1da6c
A
1126{
1127 int error = 0;
1128 uint32_t updatebits = 0;
1129 ksyn_wait_queue_t ckwq = NULL;
1130 ksyn_waitq_element_t kwe, nkwe = NULL;
1131
1132 /* for conformance reasons */
1133 pthread_kern->__pthread_testcancel(0);
1134
1135 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1136 uint32_t cgen = cvlsgen & 0xffffffff;
1137 uint32_t ugen = (mugen >> 32) & 0xffffffff;
1138 uint32_t mgen = mugen & 0xffffffff;
1139
1140 uint32_t lockseq = (cgen & PTHRW_COUNT_MASK);
1141
1142 /*
1143 * In cvwait U word can be out of range as cv could be used only for
1144 * timeouts. However S word needs to be within bounds and validated at
1145 * user level as well.
1146 */
1147 if (is_seqhigher_eq(csgen, lockseq) != 0) {
1148 __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
1149 return EINVAL;
1150 }
214d78a2
A
1151
1152 PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_START, cv, mutex, cgen, 0);
f1a1da6c
A
1153
1154 error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
1155 if (error != 0) {
1156 return error;
1157 }
1158
1159 if (mutex != 0) {
214d78a2
A
1160 uint32_t mutexrv = 0;
1161 error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, &mutexrv);
f1a1da6c
A
1162 if (error != 0) {
1163 goto out;
1164 }
1165 }
1166
1167 ksyn_wqlock(ckwq);
1168
1169 // update L, U and S...
1170 UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
1171
1172 /* Look for the sequence for prepost (or conflicting thread */
214d78a2 1173 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
f1a1da6c
A
1174 kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
1175 if (kwe != NULL) {
1176 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
1177 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) {
1178 /* we can safely consume a reference, so do so */
1179 if (--kwe->kwe_count == 0) {
1180 ksyn_queue_remove_item(ckwq, kq, kwe);
1181 ckwq->kw_fakecount--;
1182 nkwe = kwe;
1183 }
1184 } else {
1185 /*
1186 * consuming a prepost higher than our lock sequence is valid, but
1187 * can leave the higher thread without a match. Convert the entry
1188 * to a broadcast to compensate for this.
1189 */
1190 ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits);
1191#if __TESTPANICS__
1192 if (updatebits != 0)
1193 panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", updatebits);
1194#endif /* __TESTPANICS__ */
1195 }
1196 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
1197 // XXX
1198 // Nothing to do.
1199 } else if (kwe->kwe_state == KWE_THREAD_INWAIT) {
1200 __FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n");
1201 error = EBUSY;
1202 } else {
1203 panic("psync_cvwait: unexpected wait queue element type\n");
1204 }
1205
1206 if (error == 0) {
214d78a2 1207 updatebits |= PTHRW_INC;
f1a1da6c
A
1208 ckwq->kw_sword += PTHRW_INC;
1209
1210 /* set C or P bits and free if needed */
1211 ksyn_cvupdate_fixup(ckwq, &updatebits);
1212 *retval = updatebits;
1213 }
1214 } else {
1215 uint64_t abstime = 0;
214d78a2 1216 uint16_t kwe_flags = 0;
f1a1da6c
A
1217
1218 if (sec != 0 || (nsec & 0x3fffffff) != 0) {
1219 struct timespec ts;
1220 ts.tv_sec = (__darwin_time_t)sec;
1221 ts.tv_nsec = (nsec & 0x3fffffff);
214d78a2
A
1222 nanoseconds_to_absolutetime(
1223 (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
f1a1da6c
A
1224 clock_absolutetime_interval_to_deadline(abstime, &abstime);
1225 }
214d78a2
A
1226
1227 PTHREAD_TRACE(psynch_cvar_kwait, cv, mutex, kwe_flags, 1);
f1a1da6c 1228
214d78a2
A
1229 error = ksyn_wait(ckwq, KSYN_QUEUE_WRITE, cgen, SEQFIT, abstime,
1230 kwe_flags, psynch_cvcontinue, kThreadWaitPThreadCondVar);
f1a1da6c
A
1231 // ksyn_wait drops wait queue lock
1232 }
1233
1234 ksyn_wqunlock(ckwq);
214d78a2 1235
f1a1da6c 1236 if (nkwe != NULL) {
214d78a2 1237 zfree(kwe_zone, nkwe);
f1a1da6c
A
1238 }
1239out:
214d78a2
A
1240
1241 PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, cv, error, updatebits, 2);
1242
f1a1da6c
A
1243 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1244 return error;
1245}
1246
1247
214d78a2 1248void __dead2
f1a1da6c
A
1249psynch_cvcontinue(void *parameter, wait_result_t result)
1250{
1251 uthread_t uth = current_uthread();
1252 ksyn_wait_queue_t ckwq = parameter;
1253 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
214d78a2 1254
f1a1da6c
A
1255 int error = _wait_result_to_errno(result);
1256 if (error != 0) {
1257 ksyn_wqlock(ckwq);
1258 /* just in case it got woken up as we were granting */
214d78a2
A
1259 int retval = kwe->kwe_psynchretval;
1260 pthread_kern->uthread_set_returnval(uth, retval);
f1a1da6c
A
1261
1262 if (kwe->kwe_kwqqueue) {
214d78a2 1263 ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE], kwe);
f1a1da6c
A
1264 }
1265 if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
1266 /* the condition var granted.
1267 * reset the error so that the thread returns back.
1268 */
1269 error = 0;
1270 /* no need to set any bits just return as cvsig/broad covers this */
1271 } else {
1272 ckwq->kw_sword += PTHRW_INC;
1273
1274 /* set C and P bits, in the local error */
1275 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
214d78a2
A
1276 PTHREAD_TRACE(psynch_cvar_zeroed, ckwq->kw_addr,
1277 ckwq->kw_lword, ckwq->kw_sword, ckwq->kw_inqueue);
1278 error |= ECVCLEARED;
f1a1da6c 1279 if (ckwq->kw_inqueue != 0) {
214d78a2 1280 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 1);
f1a1da6c
A
1281 }
1282 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
1283 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
1284 } else {
1285 /* everythig in the queue is a fake entry ? */
1286 if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
214d78a2 1287 error |= ECVPREPOST;
f1a1da6c
A
1288 }
1289 }
1290 }
1291 ksyn_wqunlock(ckwq);
214d78a2
A
1292
1293 PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
1294 error, 0, 3);
f1a1da6c
A
1295 } else {
1296 int val = 0;
1297 // PTH_RWL_MTX_WAIT is removed
1298 if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
1299 val = PTHRW_INC | PTH_RWS_CV_CBIT;
1300 }
214d78a2
A
1301 PTHREAD_TRACE(psynch_cvar_kwait | DBG_FUNC_END, ckwq->kw_addr,
1302 val, 0, 4);
f1a1da6c
A
1303 pthread_kern->uthread_set_returnval(uth, val);
1304 }
1305
1306 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1307 pthread_kern->unix_syscall_return(error);
214d78a2 1308 __builtin_unreachable();
f1a1da6c
A
1309}
1310
1311/*
1312 * psynch_cvclrprepost: This system call clears pending prepost if present.
1313 */
1314int
214d78a2
A
1315_psynch_cvclrprepost(__unused proc_t p, user_addr_t cv, uint32_t cvgen,
1316 uint32_t cvugen, uint32_t cvsgen, __unused uint32_t prepocnt,
1317 uint32_t preposeq, uint32_t flags, int *retval)
f1a1da6c
A
1318{
1319 int error = 0;
1320 int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
1321 int wqtype = (mutex ? KSYN_WQTYPE_MTX : KSYN_WQTYPE_CVAR) | KSYN_WQTYPE_INDROP;
1322 ksyn_wait_queue_t kwq = NULL;
1323
1324 *retval = 0;
1325
214d78a2
A
1326 error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype,
1327 &kwq);
f1a1da6c
A
1328 if (error != 0) {
1329 return error;
1330 }
1331
1332 ksyn_wqlock(kwq);
1333
1334 if (mutex) {
214d78a2
A
1335 int firstfit = (flags & _PTHREAD_MTX_OPT_POLICY_MASK)
1336 == _PTHREAD_MTX_OPT_POLICY_FIRSTFIT;
1337 if (firstfit && kwq->kw_prepost.count) {
1338 if (is_seqlower_eq(kwq->kw_prepost.lseq, cvgen)) {
1339 PTHREAD_TRACE(psynch_mutex_kwqprepost, kwq->kw_addr,
1340 kwq->kw_prepost.lseq, 0, 2);
1341 _kwq_clear_preposted_wakeup(kwq);
f1a1da6c
A
1342 }
1343 }
1344 } else {
214d78a2
A
1345 PTHREAD_TRACE(psynch_cvar_clrprepost, kwq->kw_addr, wqtype,
1346 preposeq, 0);
1347 ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITE, preposeq, 0);
f1a1da6c
A
1348 }
1349
1350 ksyn_wqunlock(kwq);
1351 ksyn_wqrelease(kwq, 1, wqtype);
1352 return error;
1353}
1354
1355/* ***************** pthread_rwlock ************************ */
1356
1357static int
214d78a2
A
1358__psynch_rw_lock(int type, user_addr_t rwlock, uint32_t lgenval,
1359 uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
f1a1da6c 1360{
214d78a2
A
1361 uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
1362 ksyn_wait_queue_t kwq;
1363 int error, prepost_type, kqi;
1364 thread_continue_t tc;
f1a1da6c
A
1365
1366 if (type == PTH_RW_TYPE_READ) {
1367 prepost_type = KW_UNLOCK_PREPOST_READLOCK;
1368 kqi = KSYN_QUEUE_READ;
214d78a2 1369 tc = psynch_rw_rdcontinue;
f1a1da6c
A
1370 } else {
1371 prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
214d78a2
A
1372 kqi = KSYN_QUEUE_WRITE;
1373 tc = psynch_rw_wrcontinue;
f1a1da6c
A
1374 }
1375
214d78a2
A
1376 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
1377 (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK), &kwq);
1378 if (error != 0) {
1379 return error;
1380 }
f1a1da6c 1381
214d78a2
A
1382 ksyn_wqlock(kwq);
1383 _ksyn_check_init(kwq, lgenval);
1384 if (_kwq_handle_interrupted_wakeup(kwq, type, lockseq, retval) ||
1385 // handle overlap first as they are not counted against pre_rwwc
1386 // handle_overlap uses the flags in lgenval (vs. lockseq)
1387 _kwq_handle_overlap(kwq, type, lgenval, rw_wc, retval) ||
1388 _kwq_handle_preposted_wakeup(kwq, prepost_type, lockseq, retval)) {
1389 ksyn_wqunlock(kwq);
1390 goto out;
f1a1da6c 1391 }
214d78a2
A
1392
1393 block_hint_t block_hint = type == PTH_RW_TYPE_READ ?
1394 kThreadWaitPThreadRWLockRead : kThreadWaitPThreadRWLockWrite;
1395 error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, 0, tc, block_hint);
1396 // ksyn_wait drops wait queue lock
1397out:
1398 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_RWLOCK));
f1a1da6c
A
1399 return error;
1400}
1401
1402/*
1403 * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
1404 */
1405int
214d78a2
A
1406_psynch_rw_rdlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
1407 uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
f1a1da6c 1408{
214d78a2
A
1409 return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc,
1410 flags, retval);
f1a1da6c
A
1411}
1412
1413/*
1414 * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
1415 */
1416int
214d78a2
A
1417_psynch_rw_longrdlock(__unused proc_t p, __unused user_addr_t rwlock,
1418 __unused uint32_t lgenval, __unused uint32_t ugenval,
1419 __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
f1a1da6c
A
1420{
1421 return ESRCH;
1422}
1423
1424
1425/*
1426 * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
1427 */
1428int
214d78a2
A
1429_psynch_rw_wrlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
1430 uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
f1a1da6c 1431{
214d78a2
A
1432 return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval,
1433 rw_wc, flags, retval);
f1a1da6c
A
1434}
1435
1436/*
1437 * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
1438 */
1439int
214d78a2
A
1440_psynch_rw_yieldwrlock(__unused proc_t p, __unused user_addr_t rwlock,
1441 __unused uint32_t lgenval, __unused uint32_t ugenval,
1442 __unused uint32_t rw_wc, __unused int flags, __unused uint32_t *retval)
f1a1da6c
A
1443{
1444 return ESRCH;
1445}
1446
1447/*
1448 * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate
1449 * reader/writer variety lock.
1450 */
1451int
214d78a2
A
1452_psynch_rw_unlock(__unused proc_t p, user_addr_t rwlock, uint32_t lgenval,
1453 uint32_t ugenval, uint32_t rw_wc, int flags, uint32_t *retval)
f1a1da6c
A
1454{
1455 int error = 0;
1456 ksyn_wait_queue_t kwq;
1457 uint32_t updatebits = 0;
1458 int diff;
1459 uint32_t count = 0;
1460 uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
3a6437e6
A
1461 int clearedkflags = 0;
1462
214d78a2
A
1463 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags,
1464 (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
f1a1da6c
A
1465 if (error != 0) {
1466 return(error);
1467 }
1468
1469 ksyn_wqlock(kwq);
1470 int isinit = _ksyn_check_init(kwq, lgenval);
1471
1472 /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
214d78a2
A
1473 if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) &&
1474 (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
f1a1da6c
A
1475 error = 0;
1476 goto out;
1477 }
1478
1479 /* If L-U != num of waiters, then it needs to be preposted or spr */
1480 diff = find_diff(lgenval, ugenval);
1481
1482 if (find_seq_till(kwq, curgen, diff, &count) == 0) {
1483 if ((count == 0) || (count < (uint32_t)diff))
1484 goto prepost;
1485 }
1486
1487 /* no prepost and all threads are in place, reset the bit */
1488 if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
1489 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
3a6437e6 1490 clearedkflags = 1;
f1a1da6c
A
1491 }
1492
1493 /* can handle unlock now */
1494
214d78a2 1495 _kwq_clear_preposted_wakeup(kwq);
f1a1da6c
A
1496
1497 error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
1498#if __TESTPANICS__
1499 if (error != 0)
1500 panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error);
1501#endif /* __TESTPANICS__ */
1502out:
1503 if (error == 0) {
1504 /* update bits?? */
1505 *retval = updatebits;
1506 }
3a6437e6 1507
214d78a2
A
1508 // <rdar://problem/22244050> If any of the wakeups failed because they
1509 // already returned to userspace because of a signal then we need to ensure
1510 // that the reset state is not cleared when that thread returns. Otherwise,
3a6437e6 1511 // _pthread_rwlock_lock will clear the interrupted state before it is read.
214d78a2 1512 if (clearedkflags != 0 && kwq->kw_intr.count > 0) {
3a6437e6
A
1513 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
1514 }
f1a1da6c
A
1515
1516 ksyn_wqunlock(kwq);
214d78a2 1517 pthread_kern->psynch_wait_cleanup();
f1a1da6c
A
1518 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
1519
1520 return(error);
1521
1522prepost:
1523 /* update if the new seq is higher than prev prepost, or first set */
214d78a2
A
1524 if (is_rws_sbit_set(kwq->kw_prepost.sseq) ||
1525 is_seqhigher_eq(rw_wc, kwq->kw_prepost.sseq)) {
1526 _kwq_mark_preposted_wakeup(kwq, diff - count, curgen, rw_wc);
f1a1da6c
A
1527 updatebits = lgenval; /* let this not do unlock handling */
1528 }
1529 error = 0;
1530 goto out;
1531}
1532
1533
1534/* ************************************************************************** */
1535void
1536pth_global_hashinit(void)
1537{
1538 pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash);
1539}
1540
1541void
1542_pth_proc_hashinit(proc_t p)
1543{
1544 void *ptr = hashinit(PTH_HASHSIZE, M_PCB, &pthhash);
1545 if (ptr == NULL) {
1546 panic("pth_proc_hashinit: hash init returned 0\n");
1547 }
1548
1549 pthread_kern->proc_set_pthhash(p, ptr);
1550}
1551
1552
1553static int
214d78a2
A
1554ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags,
1555 ksyn_wait_queue_t *out_kwq, struct pthhashhead **out_hashptr,
1556 uint64_t *out_object, uint64_t *out_offset)
f1a1da6c
A
1557{
1558 int res = 0;
1559 ksyn_wait_queue_t kwq;
1560 uint64_t object = 0, offset = 0;
1561 struct pthhashhead *hashptr;
1562 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1563 hashptr = pth_glob_hashtbl;
1564 res = ksyn_findobj(uaddr, &object, &offset);
1565 if (res == 0) {
1566 LIST_FOREACH(kwq, &hashptr[object & pthhash], kw_hash) {
1567 if (kwq->kw_object == object && kwq->kw_offset == offset) {
1568 break;
1569 }
1570 }
1571 } else {
1572 kwq = NULL;
1573 }
1574 } else {
1575 hashptr = pthread_kern->proc_get_pthhash(p);
1576 LIST_FOREACH(kwq, &hashptr[uaddr & pthhash], kw_hash) {
1577 if (kwq->kw_addr == uaddr) {
1578 break;
1579 }
1580 }
1581 }
1582 *out_kwq = kwq;
1583 *out_object = object;
1584 *out_offset = offset;
1585 *out_hashptr = hashptr;
1586 return res;
1587}
1588
1589void
1590_pth_proc_hashdelete(proc_t p)
1591{
1592 struct pthhashhead * hashptr;
1593 ksyn_wait_queue_t kwq;
1594 unsigned long hashsize = pthhash + 1;
1595 unsigned long i;
1596
1597 hashptr = pthread_kern->proc_get_pthhash(p);
1598 pthread_kern->proc_set_pthhash(p, NULL);
1599 if (hashptr == NULL) {
1600 return;
1601 }
1602
1603 pthread_list_lock();
1604 for(i= 0; i < hashsize; i++) {
1605 while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) {
1606 if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) {
1607 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1608 LIST_REMOVE(kwq, kw_hash);
1609 }
1610 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1611 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1612 LIST_REMOVE(kwq, kw_list);
1613 }
1614 pthread_list_unlock();
1615 /* release fake entries if present for cvars */
1616 if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
214d78a2
A
1617 ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITE]);
1618 _kwq_destroy(kwq);
f1a1da6c
A
1619 pthread_list_lock();
1620 }
1621 }
1622 pthread_list_unlock();
1623 FREE(hashptr, M_PROC);
1624}
1625
1626/* no lock held for this as the waitqueue is getting freed */
1627void
1628ksyn_freeallkwe(ksyn_queue_t kq)
1629{
1630 ksyn_waitq_element_t kwe;
1631 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
1632 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
1633 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
214d78a2 1634 zfree(kwe_zone, kwe);
f1a1da6c
A
1635 }
1636 }
1637}
1638
214d78a2
A
1639static inline void
1640_kwq_report_inuse(ksyn_wait_queue_t kwq)
1641{
1642 if (kwq->kw_prepost.count != 0) {
1643 __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [pre %d:0x%x:0x%x]",
1644 (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_prepost.count,
1645 kwq->kw_prepost.lseq, kwq->kw_prepost.sseq);
1646 PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
1647 kwq->kw_type, 1, 0);
1648 }
1649 if (kwq->kw_intr.count != 0) {
1650 __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [intr %d:0x%x:0x%x:0x%x]",
1651 (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_intr.count,
1652 kwq->kw_intr.type, kwq->kw_intr.seq,
1653 kwq->kw_intr.returnbits);
1654 PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
1655 kwq->kw_type, 2, 0);
1656 }
1657 if (kwq->kw_iocount) {
1658 __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [ioc %d:%d]",
1659 (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_iocount,
1660 kwq->kw_dropcount);
1661 PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr,
1662 kwq->kw_type, 3, 0);
1663 }
1664 if (kwq->kw_inqueue) {
1665 __FAILEDUSERTEST2__("uaddr 0x%llx busy for synch type 0x%x [inq %d:%d]",
1666 (uint64_t)kwq->kw_addr, kwq->kw_type, kwq->kw_inqueue,
1667 kwq->kw_fakecount);
1668 PTHREAD_TRACE(psynch_mutex_kwqcollision, kwq->kw_addr, kwq->kw_type,
1669 4, 0);
1670 }
1671}
1672
f1a1da6c
A
1673/* find kernel waitqueue, if not present create one. Grants a reference */
1674int
214d78a2
A
1675ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen,
1676 int flags, int wqtype, ksyn_wait_queue_t *kwqp)
f1a1da6c
A
1677{
1678 int res = 0;
1679 ksyn_wait_queue_t kwq = NULL;
1680 ksyn_wait_queue_t nkwq = NULL;
1681 struct pthhashhead *hashptr;
1682 proc_t p = current_proc();
1683
1684 uint64_t object = 0, offset = 0;
1685 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1686 res = ksyn_findobj(uaddr, &object, &offset);
1687 hashptr = pth_glob_hashtbl;
1688 } else {
1689 hashptr = pthread_kern->proc_get_pthhash(p);
1690 }
1691
1692 while (res == 0) {
1693 pthread_list_lock();
214d78a2
A
1694 res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr,
1695 &object, &offset);
f1a1da6c 1696 if (res != 0) {
3404ec80 1697 pthread_list_unlock();
f1a1da6c
A
1698 break;
1699 }
1700 if (kwq == NULL && nkwq == NULL) {
1701 // Drop the lock to allocate a new kwq and retry.
1702 pthread_list_unlock();
1703
214d78a2 1704 nkwq = (ksyn_wait_queue_t)zalloc(kwq_zone);
f1a1da6c
A
1705 bzero(nkwq, sizeof(struct ksyn_wait_queue));
1706 int i;
1707 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
1708 ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
1709 }
214d78a2 1710 lck_spin_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
f1a1da6c
A
1711 continue;
1712 } else if (kwq == NULL && nkwq != NULL) {
1713 // Still not found, add the new kwq to the hash.
1714 kwq = nkwq;
1715 nkwq = NULL; // Don't free.
1716 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1717 kwq->kw_pflags |= KSYN_WQ_SHARED;
1718 LIST_INSERT_HEAD(&hashptr[object & pthhash], kwq, kw_hash);
1719 } else {
1720 LIST_INSERT_HEAD(&hashptr[uaddr & pthhash], kwq, kw_hash);
1721 }
1722 kwq->kw_pflags |= KSYN_WQ_INHASH;
1723 } else if (kwq != NULL) {
1724 // Found an existing kwq, use it.
1725 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1726 LIST_REMOVE(kwq, kw_list);
1727 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1728 }
1729 if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
214d78a2 1730 if (!_kwq_is_used(kwq)) {
f1a1da6c
A
1731 if (kwq->kw_iocount == 0) {
1732 kwq->kw_type = 0; // mark for reinitialization
214d78a2
A
1733 } else if (kwq->kw_iocount == 1 &&
1734 kwq->kw_dropcount == kwq->kw_iocount) {
f1a1da6c
A
1735 /* if all users are unlockers then wait for it to finish */
1736 kwq->kw_pflags |= KSYN_WQ_WAITING;
1737 // Drop the lock and wait for the kwq to be free.
214d78a2
A
1738 (void)msleep(&kwq->kw_pflags, pthread_list_mlock,
1739 PDROP, "ksyn_wqfind", 0);
f1a1da6c
A
1740 continue;
1741 } else {
214d78a2 1742 _kwq_report_inuse(kwq);
f1a1da6c
A
1743 res = EINVAL;
1744 }
1745 } else {
214d78a2 1746 _kwq_report_inuse(kwq);
f1a1da6c
A
1747 res = EINVAL;
1748 }
1749 }
1750 }
1751 if (res == 0) {
1752 if (kwq->kw_type == 0) {
1753 kwq->kw_addr = uaddr;
1754 kwq->kw_object = object;
1755 kwq->kw_offset = offset;
1756 kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
1757 CLEAR_REINIT_BITS(kwq);
1758 kwq->kw_lword = mgen;
1759 kwq->kw_uword = ugen;
1760 kwq->kw_sword = sgen;
214d78a2 1761 kwq->kw_owner = THREAD_NULL;
f1a1da6c
A
1762 kwq->kw_kflags = 0;
1763 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
214d78a2
A
1764 PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_START, uaddr,
1765 kwq->kw_type, kwq, 0);
1766 PTHREAD_TRACE(psynch_mutex_kwqallocate | DBG_FUNC_END, uaddr,
1767 mgen, ugen, sgen);
f1a1da6c
A
1768 }
1769 kwq->kw_iocount++;
1770 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1771 kwq->kw_dropcount++;
1772 }
1773 }
3404ec80 1774 pthread_list_unlock();
f1a1da6c
A
1775 break;
1776 }
f1a1da6c
A
1777 if (kwqp != NULL) {
1778 *kwqp = kwq;
1779 }
1780 if (nkwq) {
214d78a2 1781 _kwq_destroy(nkwq);
f1a1da6c
A
1782 }
1783 return res;
1784}
1785
1786/* Reference from find is dropped here. Starts the free process if needed */
1787void
1788ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
1789{
1790 uint64_t deadline;
1791 ksyn_wait_queue_t free_elem = NULL;
1792
1793 pthread_list_lock();
1794 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1795 kwq->kw_dropcount--;
1796 }
1797 if (--kwq->kw_iocount == 0) {
1798 if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
1799 /* some one is waiting for the waitqueue, wake them up */
1800 kwq->kw_pflags &= ~KSYN_WQ_WAITING;
1801 wakeup(&kwq->kw_pflags);
1802 }
1803
214d78a2
A
1804 if (!_kwq_is_used(kwq)) {
1805 if (kwq->kw_turnstile) {
1806 panic("kw_turnstile still non-null upon release");
1807 }
1808
1809 PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_START,
1810 kwq->kw_addr, kwq->kw_type, qfreenow, 0);
1811 PTHREAD_TRACE(psynch_mutex_kwqdeallocate | DBG_FUNC_END,
1812 kwq->kw_addr, kwq->kw_lword, kwq->kw_uword, kwq->kw_sword);
1813
f1a1da6c
A
1814 if (qfreenow == 0) {
1815 microuptime(&kwq->kw_ts);
1816 LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
1817 kwq->kw_pflags |= KSYN_WQ_FLIST;
1818 if (psynch_cleanupset == 0) {
1819 struct timeval t;
1820 microuptime(&t);
1821 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1822 deadline = tvtoabstime(&t);
1823 thread_call_enter_delayed(psynch_thcall, deadline);
1824 psynch_cleanupset = 1;
1825 }
1826 } else {
1827 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1828 LIST_REMOVE(kwq, kw_hash);
1829 free_elem = kwq;
1830 }
1831 }
1832 }
1833 pthread_list_unlock();
1834 if (free_elem != NULL) {
214d78a2 1835 _kwq_destroy(free_elem);
f1a1da6c
A
1836 }
1837}
1838
1839/* responsible to free the waitqueues */
1840void
1841psynch_wq_cleanup(__unused void *param, __unused void * param1)
1842{
214d78a2 1843 ksyn_wait_queue_t kwq, tmp;
f1a1da6c
A
1844 struct timeval t;
1845 int reschedule = 0;
1846 uint64_t deadline = 0;
1847 LIST_HEAD(, ksyn_wait_queue) freelist;
1848 LIST_INIT(&freelist);
1849
1850 pthread_list_lock();
1851
1852 microuptime(&t);
1853
1854 LIST_FOREACH(kwq, &pth_free_list, kw_list) {
214d78a2 1855 if (_kwq_is_used(kwq) || kwq->kw_iocount != 0) {
f1a1da6c
A
1856 // still in use
1857 continue;
1858 }
1859 __darwin_time_t diff = t.tv_sec - kwq->kw_ts.tv_sec;
1860 if (diff < 0)
1861 diff *= -1;
1862 if (diff >= KSYN_CLEANUP_DEADLINE) {
1863 kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
1864 LIST_REMOVE(kwq, kw_hash);
1865 LIST_REMOVE(kwq, kw_list);
1866 LIST_INSERT_HEAD(&freelist, kwq, kw_list);
1867 } else {
1868 reschedule = 1;
1869 }
1870
1871 }
1872 if (reschedule != 0) {
1873 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1874 deadline = tvtoabstime(&t);
1875 thread_call_enter_delayed(psynch_thcall, deadline);
1876 psynch_cleanupset = 1;
1877 } else {
1878 psynch_cleanupset = 0;
1879 }
1880 pthread_list_unlock();
1881
214d78a2
A
1882 LIST_FOREACH_SAFE(kwq, &freelist, kw_list, tmp) {
1883 _kwq_destroy(kwq);
f1a1da6c
A
1884 }
1885}
1886
1887static int
1888_wait_result_to_errno(wait_result_t result)
1889{
1890 int res = 0;
1891 switch (result) {
1892 case THREAD_TIMED_OUT:
1893 res = ETIMEDOUT;
1894 break;
1895 case THREAD_INTERRUPTED:
1896 res = EINTR;
1897 break;
1898 }
1899 return res;
1900}
1901
1902int
214d78a2
A
1903ksyn_wait(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi, uint32_t lockseq,
1904 int fit, uint64_t abstime, uint16_t kwe_flags,
1905 thread_continue_t continuation, block_hint_t block_hint)
f1a1da6c 1906{
f1a1da6c
A
1907 thread_t th = current_thread();
1908 uthread_t uth = pthread_kern->get_bsdthread_info(th);
214d78a2
A
1909 struct turnstile **tstore = NULL;
1910 int res;
1911
1912 assert(continuation != THREAD_CONTINUE_NULL);
1913
f1a1da6c
A
1914 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1915 bzero(kwe, sizeof(*kwe));
1916 kwe->kwe_count = 1;
1917 kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
1918 kwe->kwe_state = KWE_THREAD_INWAIT;
1919 kwe->kwe_uth = uth;
214d78a2
A
1920 kwe->kwe_thread = th;
1921 kwe->kwe_flags = kwe_flags;
f1a1da6c
A
1922
1923 res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
1924 if (res != 0) {
1925 //panic("psynch_rw_wrlock: failed to enqueue\n"); // XXX
1926 ksyn_wqunlock(kwq);
1927 return res;
1928 }
214d78a2
A
1929
1930 PTHREAD_TRACE(psynch_mutex_kwqwait, kwq->kw_addr, kwq->kw_inqueue,
1931 kwq->kw_prepost.count, kwq->kw_intr.count);
1932
1933 if (_kwq_use_turnstile(kwq)) {
1934 // pthread mutexes and rwlocks both (at least sometimes) know their
1935 // owner and can use turnstiles. Otherwise, we pass NULL as the
1936 // tstore to the shims so they wait on the global waitq.
1937 tstore = &kwq->kw_turnstile;
1938 }
1939
1940 pthread_kern->psynch_wait_prepare((uintptr_t)kwq, tstore, kwq->kw_owner,
1941 block_hint, abstime);
1942
f1a1da6c 1943 ksyn_wqunlock(kwq);
214d78a2
A
1944
1945 if (tstore) {
1946 pthread_kern->psynch_wait_update_complete(kwq->kw_turnstile);
f1a1da6c
A
1947 }
1948
214d78a2
A
1949 thread_block_parameter(continuation, kwq);
1950
1951 // NOT REACHED
1952 panic("ksyn_wait continuation returned");
1953 __builtin_unreachable();
f1a1da6c
A
1954}
1955
1956kern_return_t
214d78a2
A
1957ksyn_signal(ksyn_wait_queue_t kwq, kwq_queue_type_t kqi,
1958 ksyn_waitq_element_t kwe, uint32_t updateval)
f1a1da6c
A
1959{
1960 kern_return_t ret;
214d78a2 1961 struct turnstile **tstore = NULL;
f1a1da6c
A
1962
1963 // If no wait element was specified, wake the first.
1964 if (!kwe) {
1965 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[kqi].ksynq_kwelist);
1966 if (!kwe) {
1967 panic("ksyn_signal: panic signaling empty queue");
1968 }
1969 }
1970
1971 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1972 panic("ksyn_signal: panic signaling non-waiting element");
1973 }
1974
1975 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1976 kwe->kwe_psynchretval = updateval;
1977
214d78a2
A
1978 if (_kwq_use_turnstile(kwq)) {
1979 tstore = &kwq->kw_turnstile;
1980 }
1981
1982 ret = pthread_kern->psynch_wait_wakeup(kwq, kwe, tstore);
1983
f1a1da6c
A
1984 if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
1985 panic("ksyn_signal: panic waking up thread %x\n", ret);
1986 }
1987 return ret;
1988}
1989
1990int
1991ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
1992{
1993 kern_return_t ret;
1994 vm_page_info_basic_data_t info;
1995 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
214d78a2
A
1996 ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr,
1997 VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
f1a1da6c
A
1998 if (ret != KERN_SUCCESS) {
1999 return EINVAL;
2000 }
2001
2002 if (objectp != NULL) {
2003 *objectp = (uint64_t)info.object_id;
2004 }
2005 if (offsetp != NULL) {
2006 *offsetp = (uint64_t)info.offset;
2007 }
2008
2009 return(0);
2010}
2011
2012
2013/* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
2014int
214d78a2
A
2015kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen,
2016 int *typep, uint32_t lowest[])
f1a1da6c
A
2017{
2018 uint32_t kw_fr, kw_fwr, low;
2019 int type = 0, lowtype, typenum[2] = { 0 };
2020 uint32_t numbers[2] = { 0 };
2021 int count = 0, i;
2022
214d78a2
A
2023 if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) ||
2024 ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
f1a1da6c
A
2025 type |= PTH_RWSHFT_TYPE_READ;
2026 /* read entries are present */
2027 if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
2028 kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
214d78a2
A
2029 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) &&
2030 (is_seqlower(premgen, kw_fr) != 0))
f1a1da6c
A
2031 kw_fr = premgen;
2032 } else
2033 kw_fr = premgen;
2034
2035 lowest[KSYN_QUEUE_READ] = kw_fr;
2036 numbers[count]= kw_fr;
2037 typenum[count] = PTH_RW_TYPE_READ;
2038 count++;
2039 } else
2040 lowest[KSYN_QUEUE_READ] = 0;
2041
214d78a2
A
2042 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) ||
2043 ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
f1a1da6c
A
2044 type |= PTH_RWSHFT_TYPE_WRITE;
2045 /* read entries are present */
214d78a2
A
2046 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
2047 kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_firstnum;
2048 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) &&
2049 (is_seqlower(premgen, kw_fwr) != 0))
f1a1da6c
A
2050 kw_fwr = premgen;
2051 } else
2052 kw_fwr = premgen;
2053
214d78a2 2054 lowest[KSYN_QUEUE_WRITE] = kw_fwr;
f1a1da6c
A
2055 numbers[count]= kw_fwr;
2056 typenum[count] = PTH_RW_TYPE_WRITE;
2057 count++;
2058 } else
214d78a2 2059 lowest[KSYN_QUEUE_WRITE] = 0;
f1a1da6c
A
2060
2061#if __TESTPANICS__
2062 if (count == 0)
2063 panic("nothing in the queue???\n");
2064#endif /* __TESTPANICS__ */
2065
2066 low = numbers[0];
2067 lowtype = typenum[0];
2068 if (count > 1) {
2069 for (i = 1; i< count; i++) {
2070 if (is_seqlower(numbers[i] , low) != 0) {
2071 low = numbers[i];
2072 lowtype = typenum[i];
2073 }
2074 }
2075 }
2076 type |= lowtype;
2077
2078 if (typep != 0)
2079 *typep = type;
2080 return(0);
2081}
2082
2083/* wakeup readers to upto the writer limits */
2084int
214d78a2
A
2085ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders,
2086 uint32_t updatebits, int *wokenp)
f1a1da6c
A
2087{
2088 ksyn_queue_t kq;
2089 int failedwakeup = 0;
2090 int numwoken = 0;
2091 kern_return_t kret = KERN_SUCCESS;
2092 uint32_t lbits = 0;
2093
2094 lbits = updatebits;
2095
2096 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
214d78a2
A
2097 while ((kq->ksynq_count != 0) &&
2098 (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
f1a1da6c
A
2099 kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
2100 if (kret == KERN_NOT_WAITING) {
2101 failedwakeup++;
2102 }
2103 numwoken++;
2104 }
2105
2106 if (wokenp != NULL)
2107 *wokenp = numwoken;
2108 return(failedwakeup);
2109}
2110
2111
214d78a2
A
2112/*
2113 * This handles the unlock grants for next set on rw_unlock() or on arrival
2114 * of all preposted waiters.
2115 */
f1a1da6c 2116int
214d78a2
A
2117kwq_handle_unlock(ksyn_wait_queue_t kwq, __unused uint32_t mgen, uint32_t rw_wc,
2118 uint32_t *updatep, int flags, int *blockp, uint32_t premgen)
f1a1da6c
A
2119{
2120 uint32_t low_writer, limitrdnum;
2121 int rwtype, error=0;
214d78a2 2122 int allreaders, nfailed;
f1a1da6c
A
2123 uint32_t updatebits=0, numneeded = 0;;
2124 int prepost = flags & KW_UNLOCK_PREPOST;
2125 thread_t preth = THREAD_NULL;
2126 ksyn_waitq_element_t kwe;
2127 uthread_t uth;
2128 thread_t th;
2129 int woken = 0;
2130 int block = 1;
2131 uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
2132 kern_return_t kret = KERN_SUCCESS;
2133 ksyn_queue_t kq;
2134 int curthreturns = 0;
2135
2136 if (prepost != 0) {
2137 preth = current_thread();
2138 }
2139
2140 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2141 kwq->kw_lastseqword = rw_wc;
2142 kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
214d78a2 2143 kwq->kw_kflags &= ~KSYN_KWF_OVERLAP_GUARD;
f1a1da6c
A
2144
2145 error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
2146#if __TESTPANICS__
2147 if (error != 0)
2148 panic("rwunlock: cannot fails to slot next round of threads");
2149#endif /* __TESTPANICS__ */
2150
214d78a2 2151 low_writer = lowest[KSYN_QUEUE_WRITE];
f1a1da6c
A
2152
2153 allreaders = 0;
2154 updatebits = 0;
2155
2156 switch (rwtype & PTH_RW_TYPE_MASK) {
2157 case PTH_RW_TYPE_READ: {
2158 // XXX
2159 /* what about the preflight which is LREAD or READ ?? */
2160 if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
2161 if (rwtype & PTH_RWSHFT_TYPE_WRITE) {
2162 updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
2163 }
2164 }
2165 limitrdnum = 0;
2166 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2167 limitrdnum = low_writer;
2168 } else {
2169 allreaders = 1;
2170 }
2171
2172 numneeded = 0;
2173
2174 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2175 limitrdnum = low_writer;
2176 numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
2177 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
2178 curthreturns = 1;
2179 numneeded += 1;
2180 }
2181 } else {
2182 // no writers at all
2183 // no other waiters only readers
214d78a2 2184 kwq->kw_kflags |= KSYN_KWF_OVERLAP_GUARD;
f1a1da6c
A
2185 numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
2186 if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
2187 curthreturns = 1;
2188 numneeded += 1;
2189 }
2190 }
2191
2192 updatebits += (numneeded << PTHRW_COUNT_SHIFT);
2193
2194 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2195
2196 if (curthreturns != 0) {
2197 block = 0;
2198 uth = current_uthread();
2199 kwe = pthread_kern->uthread_get_uukwe(uth);
2200 kwe->kwe_psynchretval = updatebits;
2201 }
2202
2203
214d78a2
A
2204 nfailed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders,
2205 updatebits, &woken);
2206 if (nfailed != 0) {
2207 _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_READ, nfailed,
2208 limitrdnum, updatebits);
f1a1da6c
A
2209 }
2210
2211 error = 0;
2212
214d78a2
A
2213 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) &&
2214 ((updatebits & PTH_RWL_WBIT) == 0)) {
f1a1da6c 2215 panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
214d78a2 2216 }
f1a1da6c
A
2217 }
2218 break;
2219
2220 case PTH_RW_TYPE_WRITE: {
2221
2222 /* only one thread is goin to be granted */
2223 updatebits |= (PTHRW_INC);
2224 updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
2225
2226 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
2227 block = 0;
214d78a2 2228 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count != 0) {
f1a1da6c
A
2229 updatebits |= PTH_RWL_WBIT;
2230 }
2231 th = preth;
2232 uth = pthread_kern->get_bsdthread_info(th);
2233 kwe = pthread_kern->uthread_get_uukwe(uth);
2234 kwe->kwe_psynchretval = updatebits;
2235 } else {
2236 /* we are not granting writelock to the preposting thread */
2237 /* if there are writers present or the preposting write thread then W bit is to be set */
214d78a2 2238 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITE].ksynq_count > 1 ||
f1a1da6c
A
2239 (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
2240 updatebits |= PTH_RWL_WBIT;
2241 }
2242 /* setup next in the queue */
214d78a2 2243 kret = ksyn_signal(kwq, KSYN_QUEUE_WRITE, NULL, updatebits);
f1a1da6c 2244 if (kret == KERN_NOT_WAITING) {
214d78a2
A
2245 _kwq_mark_interruped_wakeup(kwq, KWQ_INTR_WRITE, 1,
2246 low_writer, updatebits);
f1a1da6c
A
2247 }
2248 error = 0;
2249 }
2250 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
214d78a2
A
2251 if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) !=
2252 (PTH_RWL_KBIT | PTH_RWL_EBIT)) {
f1a1da6c 2253 panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
214d78a2 2254 }
f1a1da6c
A
2255 }
2256 break;
2257
2258 default:
2259 panic("rwunlock: invalid type for lock grants");
2260
2261 };
2262
2263 if (updatep != NULL)
2264 *updatep = updatebits;
2265 if (blockp != NULL)
2266 *blockp = block;
2267 return(error);
2268}
2269
2270/************* Indiv queue support routines ************************/
2271void
2272ksyn_queue_init(ksyn_queue_t kq)
2273{
2274 TAILQ_INIT(&kq->ksynq_kwelist);
2275 kq->ksynq_count = 0;
2276 kq->ksynq_firstnum = 0;
2277 kq->ksynq_lastnum = 0;
2278}
2279
2280int
214d78a2
A
2281ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe,
2282 uint32_t mgen, int fit)
f1a1da6c
A
2283{
2284 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2285 uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
2286 int res = 0;
2287
2288 if (kwe->kwe_kwqqueue != NULL) {
2289 panic("adding enqueued item to another queue");
2290 }
2291
2292 if (kq->ksynq_count == 0) {
2293 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2294 kq->ksynq_firstnum = lockseq;
2295 kq->ksynq_lastnum = lockseq;
2296 } else if (fit == FIRSTFIT) {
2297 /* TBD: if retry bit is set for mutex, add it to the head */
2298 /* firstfit, arriving order */
2299 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2300 if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2301 kq->ksynq_firstnum = lockseq;
2302 }
2303 if (is_seqhigher(lockseq, kq->ksynq_lastnum)) {
2304 kq->ksynq_lastnum = lockseq;
2305 }
2306 } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
214d78a2
A
2307 /* During prepost when a thread is getting cancelled, we could have
2308 * two with same seq */
f1a1da6c
A
2309 res = EBUSY;
2310 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
2311 ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
214d78a2
A
2312 if (tmp != NULL && tmp->kwe_uth != NULL &&
2313 pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
f1a1da6c
A
2314 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2315 res = 0;
2316 }
2317 }
2318 } else if (is_seqlower(kq->ksynq_lastnum, lockseq)) { // XXX is_seqhigher
2319 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2320 kq->ksynq_lastnum = lockseq;
2321 } else if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2322 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2323 kq->ksynq_firstnum = lockseq;
2324 } else {
2325 ksyn_waitq_element_t q_kwe, r_kwe;
2326
2327 res = ESRCH;
2328 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2329 if (is_seqhigher(q_kwe->kwe_lockseq, lockseq)) {
2330 TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list);
2331 res = 0;
2332 break;
2333 }
2334 }
2335 }
2336
2337 if (res == 0) {
2338 kwe->kwe_kwqqueue = kwq;
2339 kq->ksynq_count++;
2340 kwq->kw_inqueue++;
2341 update_low_high(kwq, lockseq);
2342 }
2343 return res;
2344}
2345
2346void
214d78a2
A
2347ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq,
2348 ksyn_waitq_element_t kwe)
f1a1da6c
A
2349{
2350 if (kq->ksynq_count == 0) {
2351 panic("removing item from empty queue");
2352 }
2353
2354 if (kwe->kwe_kwqqueue != kwq) {
2355 panic("removing item from wrong queue");
2356 }
2357
2358 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
2359 kwe->kwe_list.tqe_next = NULL;
2360 kwe->kwe_list.tqe_prev = NULL;
2361 kwe->kwe_kwqqueue = NULL;
2362
2363 if (--kq->ksynq_count > 0) {
2364 ksyn_waitq_element_t tmp;
2365 tmp = TAILQ_FIRST(&kq->ksynq_kwelist);
2366 kq->ksynq_firstnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2367 tmp = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head);
2368 kq->ksynq_lastnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2369 } else {
2370 kq->ksynq_firstnum = 0;
2371 kq->ksynq_lastnum = 0;
2372 }
2373
2374 if (--kwq->kw_inqueue > 0) {
2375 uint32_t curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
2376 if (kwq->kw_lowseq == curseq) {
2377 kwq->kw_lowseq = find_nextlowseq(kwq);
2378 }
2379 if (kwq->kw_highseq == curseq) {
2380 kwq->kw_highseq = find_nexthighseq(kwq);
2381 }
2382 } else {
2383 kwq->kw_lowseq = 0;
2384 kwq->kw_highseq = 0;
2385 }
2386}
2387
2388ksyn_waitq_element_t
214d78a2
A
2389ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
2390 uint32_t seq)
f1a1da6c
A
2391{
2392 ksyn_waitq_element_t kwe;
2393
2394 // XXX: should stop searching when higher sequence number is seen
2395 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2396 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) {
2397 return kwe;
2398 }
2399 }
2400 return NULL;
2401}
2402
2403/* find the thread at the target sequence (or a broadcast/prepost at or above) */
2404ksyn_waitq_element_t
2405ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
2406{
2407 ksyn_waitq_element_t result = NULL;
2408 ksyn_waitq_element_t kwe;
2409 uint32_t lgen = (cgen & PTHRW_COUNT_MASK);
2410
2411 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2412 if (is_seqhigher_eq(kwe->kwe_lockseq, cgen)) {
2413 result = kwe;
2414
2415 // KWE_THREAD_INWAIT must be strictly equal
214d78a2
A
2416 if (kwe->kwe_state == KWE_THREAD_INWAIT &&
2417 (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
f1a1da6c
A
2418 result = NULL;
2419 }
2420 break;
2421 }
2422 }
2423 return result;
2424}
2425
2426/* look for a thread at lockseq, a */
2427ksyn_waitq_element_t
214d78a2
A
2428ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq,
2429 uint32_t uptoseq, uint32_t signalseq)
f1a1da6c
A
2430{
2431 ksyn_waitq_element_t result = NULL;
2432 ksyn_waitq_element_t q_kwe, r_kwe;
2433
2434 // XXX
2435 /* case where wrap in the tail of the queue exists */
2436 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2437 if (q_kwe->kwe_state == KWE_THREAD_PREPOST) {
2438 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2439 return result;
2440 }
2441 }
214d78a2
A
2442 if (q_kwe->kwe_state == KWE_THREAD_PREPOST |
2443 q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
f1a1da6c
A
2444 /* match any prepost at our same uptoseq or any broadcast above */
2445 if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
2446 continue;
2447 }
2448 return q_kwe;
2449 } else if (q_kwe->kwe_state == KWE_THREAD_INWAIT) {
2450 /*
2451 * Match any (non-cancelled) thread at or below our upto sequence -
2452 * but prefer an exact match to our signal sequence (if present) to
2453 * keep exact matches happening.
2454 */
2455 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2456 return result;
2457 }
2458 if (q_kwe->kwe_kwqqueue == kwq) {
2459 if (!pthread_kern->uthread_is_cancelled(q_kwe->kwe_uth)) {
2460 /* if equal or higher than our signal sequence, return this one */
2461 if (is_seqhigher_eq(q_kwe->kwe_lockseq, signalseq)) {
2462 return q_kwe;
2463 }
2464
2465 /* otherwise, just remember this eligible thread and move on */
2466 if (result == NULL) {
2467 result = q_kwe;
2468 }
2469 }
2470 }
2471 } else {
2472 panic("ksyn_queue_find_signalseq(): unknown wait queue element type (%d)\n", q_kwe->kwe_state);
2473 }
2474 }
2475 return result;
2476}
2477
2478void
2479ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
2480{
2481 ksyn_waitq_element_t kwe;
2482 uint32_t tseq = upto & PTHRW_COUNT_MASK;
2483 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
214d78a2
A
2484 uint32_t freed = 0, signaled = 0;
2485
2486 PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_START, kwq->kw_addr,
2487 kqi, upto, all);
f1a1da6c
A
2488
2489 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
2490 if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
2491 break;
2492 }
2493 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2494 /*
2495 * This scenario is typically noticed when the cvar is
2496 * reinited and the new waiters are waiting. We can
2497 * return them as spurious wait so the cvar state gets
2498 * reset correctly.
2499 */
214d78a2
A
2500
2501 PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
2502 kwq->kw_inqueue, 1);
f1a1da6c
A
2503
2504 /* skip canceled ones */
2505 /* wake the rest */
2506 /* set M bit to indicate to waking CV to retun Inc val */
214d78a2
A
2507 (void)ksyn_signal(kwq, kqi, kwe,
2508 PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
2509 signaled++;
f1a1da6c 2510 } else {
214d78a2
A
2511 PTHREAD_TRACE(psynch_cvar_freeitems, kwq->kw_addr, kwe,
2512 kwq->kw_inqueue, 2);
f1a1da6c 2513 ksyn_queue_remove_item(kwq, kq, kwe);
214d78a2 2514 zfree(kwe_zone, kwe);
f1a1da6c 2515 kwq->kw_fakecount--;
214d78a2 2516 freed++;
f1a1da6c
A
2517 }
2518 }
214d78a2
A
2519
2520 PTHREAD_TRACE(psynch_cvar_freeitems | DBG_FUNC_END, kwq->kw_addr, freed,
2521 signaled, kwq->kw_inqueue);
f1a1da6c
A
2522}
2523
2524/*************************************************************************/
2525
2526void
2527update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
2528{
2529 if (kwq->kw_inqueue == 1) {
2530 kwq->kw_lowseq = lockseq;
2531 kwq->kw_highseq = lockseq;
2532 } else {
2533 if (is_seqlower(lockseq, kwq->kw_lowseq)) {
2534 kwq->kw_lowseq = lockseq;
2535 }
2536 if (is_seqhigher(lockseq, kwq->kw_highseq)) {
2537 kwq->kw_highseq = lockseq;
2538 }
2539 }
2540}
2541
2542uint32_t
2543find_nextlowseq(ksyn_wait_queue_t kwq)
2544{
2545 uint32_t lowest = 0;
2546 int first = 1;
2547 int i;
2548
2549 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2550 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2551 uint32_t current = kwq->kw_ksynqueues[i].ksynq_firstnum;
2552 if (first || is_seqlower(current, lowest)) {
2553 lowest = current;
2554 first = 0;
2555 }
2556 }
2557 }
2558
2559 return lowest;
2560}
2561
2562uint32_t
2563find_nexthighseq(ksyn_wait_queue_t kwq)
2564{
2565 uint32_t highest = 0;
2566 int first = 1;
2567 int i;
2568
2569 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2570 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2571 uint32_t current = kwq->kw_ksynqueues[i].ksynq_lastnum;
2572 if (first || is_seqhigher(current, highest)) {
2573 highest = current;
2574 first = 0;
2575 }
2576 }
2577 }
2578
2579 return highest;
2580}
2581
2582int
214d78a2
A
2583find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters,
2584 uint32_t *countp)
f1a1da6c
A
2585{
2586 int i;
2587 uint32_t count = 0;
2588
2589 for (i = 0; i< KSYN_QUEUE_MAX; i++) {
2590 count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto);
2591 if (count >= nwaiters) {
2592 break;
2593 }
2594 }
2595
2596 if (countp != NULL) {
2597 *countp = count;
2598 }
2599
2600 if (count == 0) {
2601 return 0;
2602 } else if (count >= nwaiters) {
2603 return 1;
2604 } else {
2605 return 0;
2606 }
2607}
2608
2609
2610uint32_t
2611ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
2612{
2613 uint32_t i = 0;
2614 ksyn_waitq_element_t kwe, newkwe;
2615
2616 if (kq->ksynq_count == 0 || is_seqhigher(kq->ksynq_firstnum, upto)) {
2617 return 0;
2618 }
2619 if (upto == kq->ksynq_firstnum) {
2620 return 1;
2621 }
2622 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2623 uint32_t curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK);
2624 if (is_seqhigher(curval, upto)) {
2625 break;
2626 }
2627 ++i;
2628 if (upto == curval) {
2629 break;
2630 }
2631 }
2632 return i;
2633}
2634
2635/* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */
2636void
2637ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
2638{
2639 ksyn_waitq_element_t kwe, newkwe;
2640 uint32_t updatebits = 0;
214d78a2 2641 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITE];
f1a1da6c
A
2642
2643 struct ksyn_queue kfreeq;
2644 ksyn_queue_init(&kfreeq);
214d78a2
A
2645
2646 PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_START, ckwq->kw_addr, upto,
2647 ckwq->kw_inqueue, 0);
f1a1da6c
A
2648
2649retry:
2650 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2651 if (is_seqhigher(kwe->kwe_lockseq, upto)) {
2652 // outside our range
2653 break;
2654 }
2655
2656 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2657 // Wake only non-canceled threads waiting on this CV.
2658 if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
214d78a2
A
2659 PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe, 0, 1);
2660 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITE, kwe, PTH_RWL_MTX_WAIT);
f1a1da6c
A
2661 updatebits += PTHRW_INC;
2662 }
2663 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
2664 kwe->kwe_state == KWE_THREAD_PREPOST) {
214d78a2
A
2665 PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, kwe,
2666 kwe->kwe_state, 2);
f1a1da6c
A
2667 ksyn_queue_remove_item(ckwq, kq, kwe);
2668 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2669 ckwq->kw_fakecount--;
2670 } else {
2671 panic("unknown kwe state\n");
2672 }
2673 }
2674
2675 /* Need to enter a broadcast in the queue (if not already at L == S) */
2676
2677 if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
214d78a2
A
2678 PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, ckwq->kw_lword,
2679 ckwq->kw_sword, 3);
2680
f1a1da6c
A
2681 newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
2682 if (newkwe == NULL) {
2683 ksyn_wqunlock(ckwq);
214d78a2 2684 newkwe = (ksyn_waitq_element_t)zalloc(kwe_zone);
f1a1da6c
A
2685 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2686 ksyn_wqlock(ckwq);
2687 goto retry;
2688 } else {
2689 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2690 ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
214d78a2 2691 PTHREAD_TRACE(psynch_cvar_broadcast, ckwq->kw_addr, newkwe, 0, 4);
f1a1da6c
A
2692 }
2693 }
2694
2695 // free up any remaining things stumbled across above
2696 while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
2697 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
214d78a2 2698 zfree(kwe_zone, kwe);
f1a1da6c 2699 }
214d78a2
A
2700
2701 PTHREAD_TRACE(psynch_cvar_broadcast | DBG_FUNC_END, ckwq->kw_addr,
2702 updatebits, 0, 0);
f1a1da6c
A
2703
2704 if (updatep != NULL) {
214d78a2 2705 *updatep |= updatebits;
f1a1da6c
A
2706 }
2707}
2708
2709void
2710ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
2711{
2712 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
2713 if (ckwq->kw_inqueue != 0) {
2714 /* FREE THE QUEUE */
214d78a2 2715 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITE, ckwq->kw_lword, 0);
f1a1da6c
A
2716#if __TESTPANICS__
2717 if (ckwq->kw_inqueue != 0)
2718 panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
2719#endif /* __TESTPANICS__ */
2720 }
2721 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
2722 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
2723 *updatebits |= PTH_RWS_CV_CBIT;
2724 } else if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
2725 // only fake entries are present in the queue
2726 *updatebits |= PTH_RWS_CV_PBIT;
2727 }
2728}
2729
2730void
2731psynch_zoneinit(void)
2732{
214d78a2
A
2733 kwq_zone = zinit(sizeof(struct ksyn_wait_queue),
2734 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
2735 kwe_zone = zinit(sizeof(struct ksyn_waitq_element),
2736 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
f1a1da6c 2737}
010efe49
A
2738
2739void *
2740_pthread_get_thread_kwq(thread_t thread)
2741{
2742 assert(thread);
2743 struct uthread * uthread = pthread_kern->get_bsdthread_info(thread);
2744 assert(uthread);
2745 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uthread);
2746 assert(kwe);
2747 ksyn_wait_queue_t kwq = kwe->kwe_kwqqueue;
2748 return kwq;
2749}
2750
2751/* This function is used by stackshot to determine why a thread is blocked, and report
2752 * who owns the object that the thread is blocked on. It should *only* be called if the
2753 * `block_hint' field in the relevant thread's struct is populated with something related
2754 * to pthread sync objects.
2755 */
2756void
214d78a2
A
2757_pthread_find_owner(thread_t thread,
2758 struct stackshot_thread_waitinfo * waitinfo)
010efe49
A
2759{
2760 ksyn_wait_queue_t kwq = _pthread_get_thread_kwq(thread);
2761 switch (waitinfo->wait_type) {
2762 case kThreadWaitPThreadMutex:
2763 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_MTX);
214d78a2 2764 waitinfo->owner = thread_tid(kwq->kw_owner);
010efe49
A
2765 waitinfo->context = kwq->kw_addr;
2766 break;
2767 /* Owner of rwlock not stored in kernel space due to races. Punt
2768 * and hope that the userspace address is helpful enough. */
2769 case kThreadWaitPThreadRWLockRead:
2770 case kThreadWaitPThreadRWLockWrite:
2771 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK);
2772 waitinfo->owner = 0;
2773 waitinfo->context = kwq->kw_addr;
2774 break;
2775 /* Condvars don't have owners, so just give the userspace address. */
2776 case kThreadWaitPThreadCondVar:
2777 assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
2778 waitinfo->owner = 0;
2779 waitinfo->context = kwq->kw_addr;
2780 break;
2781 case kThreadWaitNone:
2782 default:
2783 waitinfo->owner = 0;
2784 waitinfo->context = 0;
2785 break;
2786 }
2787}