]> git.saurik.com Git - apple/libpthread.git/blob - kern/kern_synch.c
1996f3f1a32d623a6683d5fa918e1a066c509da6
[apple/libpthread.git] / kern / kern_synch.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* Copyright (c) 1995-2005 Apple Computer, Inc. All Rights Reserved */
29 /*
30 * pthread_support.c
31 */
32
33 #include <sys/param.h>
34 #include <sys/queue.h>
35 #include <sys/resourcevar.h>
36 //#include <sys/proc_internal.h>
37 #include <sys/kauth.h>
38 #include <sys/systm.h>
39 #include <sys/timeb.h>
40 #include <sys/times.h>
41 #include <sys/time.h>
42 #include <sys/acct.h>
43 #include <sys/kernel.h>
44 #include <sys/wait.h>
45 #include <sys/signalvar.h>
46 #include <sys/syslog.h>
47 #include <sys/stat.h>
48 #include <sys/lock.h>
49 #include <sys/kdebug.h>
50 //#include <sys/sysproto.h>
51 //#include <sys/pthread_internal.h>
52 #include <sys/vm.h>
53 #include <sys/user.h>
54
55 #include <mach/mach_types.h>
56 #include <mach/vm_prot.h>
57 #include <mach/semaphore.h>
58 #include <mach/sync_policy.h>
59 #include <mach/task.h>
60 #include <kern/kern_types.h>
61 #include <kern/task.h>
62 #include <kern/clock.h>
63 #include <mach/kern_return.h>
64 #include <kern/thread.h>
65 #include <kern/sched_prim.h>
66 #include <kern/thread_call.h>
67 #include <kern/kalloc.h>
68 #include <kern/zalloc.h>
69 #include <kern/sched_prim.h>
70 #include <kern/processor.h>
71 #include <kern/wait_queue.h>
72 //#include <kern/mach_param.h>
73 #include <mach/mach_vm.h>
74 #include <mach/mach_param.h>
75 #include <mach/thread_policy.h>
76 #include <mach/message.h>
77 #include <mach/port.h>
78 //#include <vm/vm_protos.h>
79 #include <vm/vm_map.h>
80 #include <mach/vm_region.h>
81
82 #include <libkern/OSAtomic.h>
83
84 #include <pexpert/pexpert.h>
85 #include <sys/pthread_shims.h>
86
87 #include "kern_internal.h"
88 #include "synch_internal.h"
89 #include "kern_trace.h"
90
91 typedef struct uthread *uthread_t;
92
93 //#define __FAILEDUSERTEST__(s) do { panic(s); } while (0)
94 #define __FAILEDUSERTEST__(s) do { printf("PSYNCH: pid[%d]: %s\n", proc_pid(current_proc()), s); } while (0)
95
96 #define ECVCERORR 256
97 #define ECVPERORR 512
98
99 lck_mtx_t *pthread_list_mlock;
100
101 #define PTH_HASHSIZE 100
102
103 static LIST_HEAD(pthhashhead, ksyn_wait_queue) *pth_glob_hashtbl;
104 static unsigned long pthhash;
105
106 static LIST_HEAD(, ksyn_wait_queue) pth_free_list;
107
108 static zone_t kwq_zone; /* zone for allocation of ksyn_queue */
109 static zone_t kwe_zone; /* zone for allocation of ksyn_waitq_element */
110
111 #define SEQFIT 0
112 #define FIRSTFIT 1
113
114 struct ksyn_queue {
115 TAILQ_HEAD(ksynq_kwelist_head, ksyn_waitq_element) ksynq_kwelist;
116 uint32_t ksynq_count; /* number of entries in queue */
117 uint32_t ksynq_firstnum; /* lowest seq in queue */
118 uint32_t ksynq_lastnum; /* highest seq in queue */
119 };
120 typedef struct ksyn_queue *ksyn_queue_t;
121
122 enum {
123 KSYN_QUEUE_READ = 0,
124 KSYN_QUEUE_WRITER,
125 KSYN_QUEUE_MAX,
126 };
127
128 struct ksyn_wait_queue {
129 LIST_ENTRY(ksyn_wait_queue) kw_hash;
130 LIST_ENTRY(ksyn_wait_queue) kw_list;
131 user_addr_t kw_addr;
132 uint64_t kw_owner;
133 uint64_t kw_object; /* object backing in shared mode */
134 uint64_t kw_offset; /* offset inside the object in shared mode */
135 int kw_pflags; /* flags under listlock protection */
136 struct timeval kw_ts; /* timeval need for upkeep before free */
137 int kw_iocount; /* inuse reference */
138 int kw_dropcount; /* current users unlocking... */
139
140 int kw_type; /* queue type like mutex, cvar, etc */
141 uint32_t kw_inqueue; /* num of waiters held */
142 uint32_t kw_fakecount; /* number of error/prepost fakes */
143 uint32_t kw_highseq; /* highest seq in the queue */
144 uint32_t kw_lowseq; /* lowest seq in the queue */
145 uint32_t kw_lword; /* L value from userland */
146 uint32_t kw_uword; /* U world value from userland */
147 uint32_t kw_sword; /* S word value from userland */
148 uint32_t kw_lastunlockseq; /* the last seq that unlocked */
149 /* for CV to be used as the seq kernel has seen so far */
150 #define kw_cvkernelseq kw_lastunlockseq
151 uint32_t kw_lastseqword; /* the last seq that unlocked */
152 /* for mutex and cvar we need to track I bit values */
153 uint32_t kw_nextseqword; /* the last seq that unlocked; with num of waiters */
154 uint32_t kw_overlapwatch; /* chance for overlaps */
155 uint32_t kw_pre_rwwc; /* prepost count */
156 uint32_t kw_pre_lockseq; /* prepost target seq */
157 uint32_t kw_pre_sseq; /* prepost target sword, in cvar used for mutexowned */
158 uint32_t kw_pre_intrcount; /* prepost of missed wakeup due to intrs */
159 uint32_t kw_pre_intrseq; /* prepost of missed wakeup limit seq */
160 uint32_t kw_pre_intrretbits; /* return bits value for missed wakeup threads */
161 uint32_t kw_pre_intrtype; /* type of failed wakueps*/
162
163 int kw_kflags;
164 int kw_qos_override; /* QoS of max waiter during contention period */
165 struct ksyn_queue kw_ksynqueues[KSYN_QUEUE_MAX]; /* queues to hold threads */
166 lck_mtx_t kw_lock; /* mutex lock protecting this structure */
167 };
168 typedef struct ksyn_wait_queue * ksyn_wait_queue_t;
169
170 #define TID_ZERO (uint64_t)0
171
172 /* bits needed in handling the rwlock unlock */
173 #define PTH_RW_TYPE_READ 0x01
174 #define PTH_RW_TYPE_WRITE 0x04
175 #define PTH_RW_TYPE_MASK 0xff
176 #define PTH_RW_TYPE_SHIFT 8
177
178 #define PTH_RWSHFT_TYPE_READ 0x0100
179 #define PTH_RWSHFT_TYPE_WRITE 0x0400
180 #define PTH_RWSHFT_TYPE_MASK 0xff00
181
182 /*
183 * Mutex pshared attributes
184 */
185 #define PTHREAD_PROCESS_SHARED _PTHREAD_MTX_OPT_PSHARED
186 #define PTHREAD_PROCESS_PRIVATE 0x20
187 #define PTHREAD_PSHARED_FLAGS_MASK 0x30
188
189 /*
190 * Mutex policy attributes
191 */
192 #define _PTHREAD_MUTEX_POLICY_NONE 0
193 #define _PTHREAD_MUTEX_POLICY_FAIRSHARE 0x040 /* 1 */
194 #define _PTHREAD_MUTEX_POLICY_FIRSTFIT 0x080 /* 2 */
195 #define _PTHREAD_MUTEX_POLICY_REALTIME 0x0c0 /* 3 */
196 #define _PTHREAD_MUTEX_POLICY_ADAPTIVE 0x100 /* 4 */
197 #define _PTHREAD_MUTEX_POLICY_PRIPROTECT 0x140 /* 5 */
198 #define _PTHREAD_MUTEX_POLICY_PRIINHERIT 0x180 /* 6 */
199 #define PTHREAD_POLICY_FLAGS_MASK 0x1c0
200
201 /* pflags */
202 #define KSYN_WQ_INHASH 2
203 #define KSYN_WQ_SHARED 4
204 #define KSYN_WQ_WAITING 8 /* threads waiting for this wq to be available */
205 #define KSYN_WQ_FLIST 0X10 /* in free list to be freed after a short delay */
206
207 /* kflags */
208 #define KSYN_KWF_INITCLEARED 1 /* the init status found and preposts cleared */
209 #define KSYN_KWF_ZEROEDOUT 2 /* the lword, etc are inited to 0 */
210 #define KSYN_KWF_QOS_APPLIED 4 /* QoS override applied to owner */
211
212 #define KSYN_CLEANUP_DEADLINE 10
213 static int psynch_cleanupset;
214 thread_call_t psynch_thcall;
215
216 #define KSYN_WQTYPE_INWAIT 0x1000
217 #define KSYN_WQTYPE_INDROP 0x2000
218 #define KSYN_WQTYPE_MTX 0x01
219 #define KSYN_WQTYPE_CVAR 0x02
220 #define KSYN_WQTYPE_RWLOCK 0x04
221 #define KSYN_WQTYPE_SEMA 0x08
222 #define KSYN_WQTYPE_MASK 0xff
223
224 #define KSYN_WQTYPE_MUTEXDROP (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_MTX)
225
226 #define KW_UNLOCK_PREPOST 0x01
227 #define KW_UNLOCK_PREPOST_READLOCK 0x08
228 #define KW_UNLOCK_PREPOST_WRLOCK 0x20
229
230 static void
231 CLEAR_PREPOST_BITS(ksyn_wait_queue_t kwq)
232 {
233 kwq->kw_pre_lockseq = 0;
234 kwq->kw_pre_sseq = PTHRW_RWS_INIT;
235 kwq->kw_pre_rwwc = 0;
236 }
237
238 static void
239 CLEAR_INTR_PREPOST_BITS(ksyn_wait_queue_t kwq)
240 {
241 kwq->kw_pre_intrcount = 0;
242 kwq->kw_pre_intrseq = 0;
243 kwq->kw_pre_intrretbits = 0;
244 kwq->kw_pre_intrtype = 0;
245 }
246
247 static void
248 CLEAR_REINIT_BITS(ksyn_wait_queue_t kwq)
249 {
250 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) {
251 if (kwq->kw_inqueue != 0 && kwq->kw_inqueue != kwq->kw_fakecount) {
252 panic("CV:entries in queue durinmg reinit %d:%d\n",kwq->kw_inqueue, kwq->kw_fakecount);
253 }
254 };
255 if ((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_RWLOCK) {
256 kwq->kw_nextseqword = PTHRW_RWS_INIT;
257 kwq->kw_overlapwatch = 0;
258 };
259 CLEAR_PREPOST_BITS(kwq);
260 kwq->kw_lastunlockseq = PTHRW_RWL_INIT;
261 kwq->kw_lastseqword = PTHRW_RWS_INIT;
262 CLEAR_INTR_PREPOST_BITS(kwq);
263 kwq->kw_lword = 0;
264 kwq->kw_uword = 0;
265 kwq->kw_sword = PTHRW_RWS_INIT;
266 }
267
268 static int ksyn_wq_hash_lookup(user_addr_t uaddr, proc_t p, int flags, ksyn_wait_queue_t *kwq, struct pthhashhead **hashptr, uint64_t *object, uint64_t *offset);
269 static int ksyn_wqfind(user_addr_t mutex, uint32_t mgen, uint32_t ugen, uint32_t rw_wc, int flags, int wqtype , ksyn_wait_queue_t *wq);
270 static void ksyn_wqrelease(ksyn_wait_queue_t mkwq, int qfreenow, int wqtype);
271 static int ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp);
272
273 static int _wait_result_to_errno(wait_result_t result);
274
275 static int ksyn_wait(ksyn_wait_queue_t, int, uint32_t, int, uint64_t, thread_continue_t);
276 static kern_return_t ksyn_signal(ksyn_wait_queue_t, int, ksyn_waitq_element_t, uint32_t);
277 static void ksyn_freeallkwe(ksyn_queue_t kq);
278
279 static kern_return_t ksyn_mtxsignal(ksyn_wait_queue_t, ksyn_waitq_element_t kwe, uint32_t);
280 static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t, uint64_t tid, boolean_t prepost);
281 static void ksyn_mtx_transfer_qos_override(ksyn_wait_queue_t, ksyn_waitq_element_t);
282 static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t);
283
284 static int kwq_handle_unlock(ksyn_wait_queue_t, uint32_t mgen, uint32_t rw_wc, uint32_t *updatep, int flags, int *blockp, uint32_t premgen);
285
286 static void ksyn_queue_init(ksyn_queue_t kq);
287 static int ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int firstfit);
288 static void ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe);
289 static void ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all);
290
291 static void update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq);
292 static uint32_t find_nextlowseq(ksyn_wait_queue_t kwq);
293 static uint32_t find_nexthighseq(ksyn_wait_queue_t kwq);
294 static int find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp);
295
296 static uint32_t ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto);
297
298 static ksyn_waitq_element_t ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen);
299 static void ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep);
300 static void ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatep);
301 static ksyn_waitq_element_t ksyn_queue_find_signalseq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t toseq, uint32_t lockseq);
302
303 static void psynch_cvcontinue(void *, wait_result_t);
304 static void psynch_mtxcontinue(void *, wait_result_t);
305
306 static int ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp);
307 static int kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *type, uint32_t lowest[]);
308 static ksyn_waitq_element_t ksyn_queue_find_seq(ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq);
309
310 static void
311 UPDATE_CVKWQ(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, uint32_t rw_wc)
312 {
313 int sinit = ((rw_wc & PTH_RWS_CV_CBIT) != 0);
314
315 // assert((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR);
316
317 if ((kwq->kw_kflags & KSYN_KWF_ZEROEDOUT) != 0) {
318 /* the values of L,U and S are cleared out due to L==S in previous transition */
319 kwq->kw_lword = mgen;
320 kwq->kw_uword = ugen;
321 kwq->kw_sword = rw_wc;
322 kwq->kw_kflags &= ~KSYN_KWF_ZEROEDOUT;
323 } else {
324 if (is_seqhigher(mgen, kwq->kw_lword)) {
325 kwq->kw_lword = mgen;
326 }
327 if (is_seqhigher(ugen, kwq->kw_uword)) {
328 kwq->kw_uword = ugen;
329 }
330 if (sinit && is_seqhigher(rw_wc, kwq->kw_sword)) {
331 kwq->kw_sword = rw_wc;
332 }
333 }
334 if (sinit && is_seqlower(kwq->kw_cvkernelseq, rw_wc)) {
335 kwq->kw_cvkernelseq = (rw_wc & PTHRW_COUNT_MASK);
336 }
337 }
338
339 static void
340 pthread_list_lock(void)
341 {
342 lck_mtx_lock(pthread_list_mlock);
343 }
344
345 static void
346 pthread_list_unlock(void)
347 {
348 lck_mtx_unlock(pthread_list_mlock);
349 }
350
351 static void
352 ksyn_wqlock(ksyn_wait_queue_t kwq)
353 {
354
355 lck_mtx_lock(&kwq->kw_lock);
356 }
357
358 static void
359 ksyn_wqunlock(ksyn_wait_queue_t kwq)
360 {
361 lck_mtx_unlock(&kwq->kw_lock);
362 }
363
364
365 /* routine to drop the mutex unlocks , used both for mutexunlock system call and drop during cond wait */
366 static uint32_t
367 _psynch_mutexdrop_internal(ksyn_wait_queue_t kwq, uint32_t mgen, uint32_t ugen, int flags)
368 {
369 kern_return_t ret;
370 uint32_t returnbits = 0;
371 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
372 uint32_t nextgen = (ugen + PTHRW_INC);
373
374 ksyn_wqlock(kwq);
375 kwq->kw_lastunlockseq = (ugen & PTHRW_COUNT_MASK);
376 uint32_t updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_EBIT | PTH_RWL_KBIT);
377
378 redrive:
379 if (firstfit) {
380 if (kwq->kw_inqueue == 0) {
381 // not set or the new lock sequence is higher
382 if (kwq->kw_pre_rwwc == 0 || is_seqhigher(mgen, kwq->kw_pre_lockseq)) {
383 kwq->kw_pre_lockseq = (mgen & PTHRW_COUNT_MASK);
384 }
385 kwq->kw_pre_rwwc = 1;
386 ksyn_mtx_drop_qos_override(kwq);
387 kwq->kw_owner = 0;
388 // indicate prepost content in kernel
389 returnbits = mgen | PTH_RWL_PBIT;
390 } else {
391 // signal first waiter
392 ret = ksyn_mtxsignal(kwq, NULL, updatebits);
393 if (ret == KERN_NOT_WAITING) {
394 goto redrive;
395 }
396 }
397 } else {
398 int prepost = 0;
399 if (kwq->kw_inqueue == 0) {
400 // No waiters in the queue.
401 prepost = 1;
402 } else {
403 uint32_t low_writer = (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum & PTHRW_COUNT_MASK);
404 if (low_writer == nextgen) {
405 /* next seq to be granted found */
406 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
407 ret = ksyn_mtxsignal(kwq, NULL, updatebits | PTH_RWL_MTX_WAIT);
408 if (ret == KERN_NOT_WAITING) {
409 /* interrupt post */
410 kwq->kw_pre_intrcount = 1;
411 kwq->kw_pre_intrseq = nextgen;
412 kwq->kw_pre_intrretbits = updatebits;
413 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
414 }
415
416 } else if (is_seqhigher(low_writer, nextgen)) {
417 prepost = 1;
418 } else {
419 //__FAILEDUSERTEST__("psynch_mutexdrop_internal: FS mutex unlock sequence higher than the lowest one is queue\n");
420 ksyn_waitq_element_t kwe;
421 kwe = ksyn_queue_find_seq(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], nextgen);
422 if (kwe != NULL) {
423 /* next seq to be granted found */
424 /* since the grant could be cv, make sure mutex wait is set incase the thread interrupted out */
425 ret = ksyn_mtxsignal(kwq, kwe, updatebits | PTH_RWL_MTX_WAIT);
426 if (ret == KERN_NOT_WAITING) {
427 goto redrive;
428 }
429 } else {
430 prepost = 1;
431 }
432 }
433 }
434 if (prepost) {
435 ksyn_mtx_drop_qos_override(kwq);
436 kwq->kw_owner = 0;
437 if (++kwq->kw_pre_rwwc > 1) {
438 __FAILEDUSERTEST__("_psynch_mutexdrop_internal: multiple preposts\n");
439 } else {
440 kwq->kw_pre_lockseq = (nextgen & PTHRW_COUNT_MASK);
441 }
442 }
443 }
444
445 ksyn_wqunlock(kwq);
446 ksyn_wqrelease(kwq, 1, KSYN_WQTYPE_MUTEXDROP);
447 return returnbits;
448 }
449
450 static int
451 _ksyn_check_init(ksyn_wait_queue_t kwq, uint32_t lgenval)
452 {
453 int res = (lgenval & PTHRW_RWL_INIT) != 0;
454 if (res) {
455 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) == 0) {
456 /* first to notice the reset of the lock, clear preposts */
457 CLEAR_REINIT_BITS(kwq);
458 kwq->kw_kflags |= KSYN_KWF_INITCLEARED;
459 }
460 }
461 return res;
462 }
463
464 static int
465 _ksyn_handle_missed_wakeups(ksyn_wait_queue_t kwq,
466 uint32_t type,
467 uint32_t lockseq,
468 uint32_t *retval)
469 {
470 int res = 0;
471 if (kwq->kw_pre_intrcount != 0 &&
472 kwq->kw_pre_intrtype == type &&
473 is_seqlower_eq(lockseq, kwq->kw_pre_intrseq)) {
474 kwq->kw_pre_intrcount--;
475 *retval = kwq->kw_pre_intrretbits;
476 if (kwq->kw_pre_intrcount == 0) {
477 CLEAR_INTR_PREPOST_BITS(kwq);
478 }
479 res = 1;
480 }
481 return res;
482 }
483
484 static int
485 _ksyn_handle_overlap(ksyn_wait_queue_t kwq,
486 uint32_t lgenval,
487 uint32_t rw_wc,
488 uint32_t *retval)
489 {
490 int res = 0;
491
492 // check for overlap and no pending W bit (indicates writers)
493 if (kwq->kw_overlapwatch != 0 &&
494 (rw_wc & PTHRW_RWS_SAVEMASK) == 0 &&
495 (lgenval & PTH_RWL_WBIT) == 0) {
496 /* overlap is set, so no need to check for valid state for overlap */
497
498 if (is_seqlower_eq(rw_wc, kwq->kw_nextseqword) || is_seqhigher_eq(kwq->kw_lastseqword, rw_wc)) {
499 /* increase the next expected seq by one */
500 kwq->kw_nextseqword += PTHRW_INC;
501 /* set count by one & bits from the nextseq and add M bit */
502 *retval = PTHRW_INC | ((kwq->kw_nextseqword & PTHRW_BIT_MASK) | PTH_RWL_MBIT);
503 res = 1;
504 }
505 }
506 return res;
507 }
508
509 static int
510 _ksyn_handle_prepost(ksyn_wait_queue_t kwq,
511 uint32_t type,
512 uint32_t lockseq,
513 uint32_t *retval)
514 {
515 int res = 0;
516 if (kwq->kw_pre_rwwc != 0 && is_seqlower_eq(lockseq, kwq->kw_pre_lockseq)) {
517 kwq->kw_pre_rwwc--;
518 if (kwq->kw_pre_rwwc == 0) {
519 uint32_t preseq = kwq->kw_pre_lockseq;
520 uint32_t prerw_wc = kwq->kw_pre_sseq;
521 CLEAR_PREPOST_BITS(kwq);
522 if ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0){
523 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
524 }
525
526 int error, block;
527 uint32_t updatebits;
528 error = kwq_handle_unlock(kwq, preseq, prerw_wc, &updatebits, (type|KW_UNLOCK_PREPOST), &block, lockseq);
529 if (error != 0) {
530 panic("kwq_handle_unlock failed %d\n", error);
531 }
532
533 if (block == 0) {
534 *retval = updatebits;
535 res = 1;
536 }
537 }
538 }
539 return res;
540 }
541
542 /* Helpers for QoS override management. Only applies to mutexes */
543 static void ksyn_mtx_update_owner_qos_override(ksyn_wait_queue_t kwq, uint64_t tid, boolean_t prepost)
544 {
545 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
546 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
547 int waiter_qos = pthread_kern->proc_usynch_get_requested_thread_qos(current_uthread());
548
549 kwq->kw_qos_override = MAX(waiter_qos, kwq->kw_qos_override);
550
551 if (prepost && kwq->kw_inqueue == 0) {
552 // if there are no more waiters in the queue after the new (prepost-receiving) owner, we do not set an
553 // override, because the receiving owner may not re-enter the kernel to signal someone else if it is
554 // the last one to unlock. If other waiters end up entering the kernel, they will boost the owner
555 tid = 0;
556 }
557
558 if (tid != 0) {
559 if ((tid == kwq->kw_owner) && (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED)) {
560 // hint continues to be accurate, and a boost was already applied
561 pthread_kern->proc_usynch_thread_qos_add_override(NULL, tid, kwq->kw_qos_override, FALSE);
562 } else {
563 // either hint did not match previous owner, or hint was accurate but mutex was not contended enough for a boost previously
564 boolean_t boostsucceded;
565
566 boostsucceded = pthread_kern->proc_usynch_thread_qos_add_override(NULL, tid, kwq->kw_qos_override, TRUE);
567
568 if (boostsucceded) {
569 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
570 }
571
572 if (wasboosted && (tid != kwq->kw_owner) && (kwq->kw_owner != 0)) {
573 // the hint did not match the previous owner, so drop overrides
574 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
575 pthread_kern->proc_usynch_thread_qos_remove_override(NULL, kwq->kw_owner);
576 }
577 }
578 } else {
579 // new hint tells us that we don't know the owner, so drop any existing overrides
580 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
581 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
582
583 if (wasboosted && (kwq->kw_owner != 0)) {
584 // the hint did not match the previous owner, so drop overrides
585 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
586 pthread_kern->proc_usynch_thread_qos_remove_override(NULL, kwq->kw_owner);
587 }
588 }
589 }
590 }
591
592 static void ksyn_mtx_transfer_qos_override(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe)
593 {
594 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
595 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
596
597 if (kwq->kw_inqueue > 1) {
598 boolean_t boostsucceeded;
599
600 // More than one waiter, so resource will still be contended after handing off ownership
601 boostsucceeded = pthread_kern->proc_usynch_thread_qos_add_override(kwe->kwe_uth, 0, kwq->kw_qos_override, TRUE);
602
603 if (boostsucceeded) {
604 kwq->kw_kflags |= KSYN_KWF_QOS_APPLIED;
605 }
606 } else {
607 // kw_inqueue == 1 to get to this point, which means there will be no contention after this point
608 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
609 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
610 }
611
612 // Remove the override that was applied to kw_owner. There may have been a race,
613 // in which case it may not match the current thread
614 if (wasboosted) {
615 if (kwq->kw_owner == 0) {
616 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
617 } else if (thread_tid(current_thread()) != kwq->kw_owner) {
618 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
619 pthread_kern->proc_usynch_thread_qos_remove_override(NULL, kwq->kw_owner);
620 } else {
621 pthread_kern->proc_usynch_thread_qos_remove_override(current_uthread(), 0);
622 }
623 }
624 }
625 }
626
627 static void ksyn_mtx_drop_qos_override(ksyn_wait_queue_t kwq)
628 {
629 if (!(kwq->kw_pflags & KSYN_WQ_SHARED)) {
630 boolean_t wasboosted = (kwq->kw_kflags & KSYN_KWF_QOS_APPLIED) ? TRUE : FALSE;
631
632 // assume nobody else in queue if this routine was called
633 kwq->kw_kflags &= ~KSYN_KWF_QOS_APPLIED;
634 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
635
636 // Remove the override that was applied to kw_owner. There may have been a race,
637 // in which case it may not match the current thread
638 if (wasboosted) {
639 if (kwq->kw_owner == 0) {
640 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, 0, 0, 0, 0, 0);
641 } else if (thread_tid(current_thread()) != kwq->kw_owner) {
642 PTHREAD_TRACE(TRACE_psynch_ksyn_incorrect_owner, kwq->kw_owner, 0, 0, 0, 0);
643 pthread_kern->proc_usynch_thread_qos_remove_override(NULL, kwq->kw_owner);
644 } else {
645 pthread_kern->proc_usynch_thread_qos_remove_override(current_uthread(), 0);
646 }
647 }
648 }
649 }
650
651 /*
652 * psynch_mutexwait: This system call is used for contended psynch mutexes to block.
653 */
654
655 int
656 _psynch_mutexwait(__unused proc_t p,
657 user_addr_t mutex,
658 uint32_t mgen,
659 uint32_t ugen,
660 uint64_t tid,
661 uint32_t flags,
662 uint32_t *retval)
663 {
664 ksyn_wait_queue_t kwq;
665 int error=0;
666 int ins_flags;
667
668 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
669 uint32_t updatebits = 0;
670
671 uint32_t lockseq = (mgen & PTHRW_COUNT_MASK);
672
673 if (firstfit == 0) {
674 ins_flags = SEQFIT;
675 } else {
676 /* first fit */
677 ins_flags = FIRSTFIT;
678 }
679
680 error = ksyn_wqfind(mutex, mgen, ugen, 0, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX), &kwq);
681 if (error != 0) {
682 return(error);
683 }
684
685 ksyn_wqlock(kwq);
686
687 // mutexwait passes in an owner hint at the time userspace contended for the mutex, however, the
688 // owner tid in the userspace data structure may be unset or SWITCHING (-1), or it may correspond
689 // to a stale snapshot after the lock has subsequently been unlocked by another thread.
690 if (tid == 0) {
691 // contender came in before owner could write TID
692 tid = 0;
693 } else if (kwq->kw_lastunlockseq != PTHRW_RWL_INIT && is_seqlower(ugen, kwq->kw_lastunlockseq)) {
694 // owner is stale, someone has come in and unlocked since this contended read the TID, so
695 // assume what is known in the kernel is accurate
696 tid = kwq->kw_owner;
697 } else if (tid == PTHREAD_MTX_TID_SWITCHING) {
698 // userspace didn't know the owner because it was being unlocked, but that unlocker hasn't
699 // reached the kernel yet. So assume what is known in the kernel is accurate
700 tid = kwq->kw_owner;
701 } else {
702 // hint is being passed in for a specific thread, and we have no reason not to trust
703 // it (like the kernel unlock sequence being higher
704 }
705
706
707 if (_ksyn_handle_missed_wakeups(kwq, PTH_RW_TYPE_WRITE, lockseq, retval)) {
708 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
709 kwq->kw_owner = thread_tid(current_thread());
710
711 ksyn_wqunlock(kwq);
712 goto out;
713 }
714
715 if ((kwq->kw_pre_rwwc != 0) && ((ins_flags == FIRSTFIT) || ((lockseq & PTHRW_COUNT_MASK) == (kwq->kw_pre_lockseq & PTHRW_COUNT_MASK) ))) {
716 /* got preposted lock */
717 kwq->kw_pre_rwwc--;
718 if (kwq->kw_pre_rwwc == 0) {
719 CLEAR_PREPOST_BITS(kwq);
720 if (kwq->kw_inqueue == 0) {
721 updatebits = lockseq | (PTH_RWL_KBIT | PTH_RWL_EBIT);
722 } else {
723 updatebits = (kwq->kw_highseq & PTHRW_COUNT_MASK) | (PTH_RWL_KBIT | PTH_RWL_EBIT);
724 }
725 updatebits &= ~PTH_RWL_MTX_WAIT;
726
727 if (updatebits == 0) {
728 __FAILEDUSERTEST__("psynch_mutexwait(prepost): returning 0 lseq in mutexwait with no EBIT \n");
729 }
730
731 ksyn_mtx_update_owner_qos_override(kwq, thread_tid(current_thread()), TRUE);
732 kwq->kw_owner = thread_tid(current_thread());
733
734 ksyn_wqunlock(kwq);
735 *retval = updatebits;
736 goto out;
737 } else {
738 __FAILEDUSERTEST__("psynch_mutexwait: more than one prepost\n");
739 kwq->kw_pre_lockseq += PTHRW_INC; /* look for next one */
740 ksyn_wqunlock(kwq);
741 error = EINVAL;
742 goto out;
743 }
744 }
745
746 ksyn_mtx_update_owner_qos_override(kwq, tid, FALSE);
747 kwq->kw_owner = tid;
748
749 error = ksyn_wait(kwq, KSYN_QUEUE_WRITER, mgen, ins_flags, 0, psynch_mtxcontinue);
750 // ksyn_wait drops wait queue lock
751 out:
752 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
753 return error;
754 }
755
756 void
757 psynch_mtxcontinue(void *parameter, wait_result_t result)
758 {
759 uthread_t uth = current_uthread();
760 ksyn_wait_queue_t kwq = parameter;
761 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
762
763 int error = _wait_result_to_errno(result);
764 if (error != 0) {
765 ksyn_wqlock(kwq);
766 if (kwe->kwe_kwqqueue) {
767 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
768 }
769 ksyn_wqunlock(kwq);
770 } else {
771 uint32_t updatebits = kwe->kwe_psynchretval & ~PTH_RWL_MTX_WAIT;
772 pthread_kern->uthread_set_returnval(uth, updatebits);
773
774 if (updatebits == 0)
775 __FAILEDUSERTEST__("psynch_mutexwait: returning 0 lseq in mutexwait with no EBIT \n");
776 }
777 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_MTX));
778 pthread_kern->unix_syscall_return(error);
779 }
780
781 /*
782 * psynch_mutexdrop: This system call is used for unlock postings on contended psynch mutexes.
783 */
784 int
785 _psynch_mutexdrop(__unused proc_t p,
786 user_addr_t mutex,
787 uint32_t mgen,
788 uint32_t ugen,
789 uint64_t tid __unused,
790 uint32_t flags,
791 uint32_t *retval)
792 {
793 int res;
794 ksyn_wait_queue_t kwq;
795
796 res = ksyn_wqfind(mutex, mgen, ugen, 0, flags, KSYN_WQTYPE_MUTEXDROP, &kwq);
797 if (res == 0) {
798 uint32_t updateval = _psynch_mutexdrop_internal(kwq, mgen, ugen, flags);
799 /* drops the kwq reference */
800 if (retval) {
801 *retval = updateval;
802 }
803 }
804
805 return res;
806 }
807
808 static kern_return_t
809 ksyn_mtxsignal(ksyn_wait_queue_t kwq, ksyn_waitq_element_t kwe, uint32_t updateval)
810 {
811 kern_return_t ret;
812
813 if (!kwe) {
814 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_kwelist);
815 if (!kwe) {
816 panic("ksyn_mtxsignal: panic signaling empty queue");
817 }
818 }
819
820 ksyn_mtx_transfer_qos_override(kwq, kwe);
821 kwq->kw_owner = kwe->kwe_tid;
822
823 ret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, kwe, updateval);
824
825 // if waking the new owner failed, remove any overrides
826 if (ret != KERN_SUCCESS) {
827 ksyn_mtx_drop_qos_override(kwq);
828 kwq->kw_owner = 0;
829 }
830
831 return ret;
832 }
833
834
835 static void
836 ksyn_prepost(ksyn_wait_queue_t kwq,
837 ksyn_waitq_element_t kwe,
838 uint32_t state,
839 uint32_t lockseq)
840 {
841 bzero(kwe, sizeof(*kwe));
842 kwe->kwe_state = state;
843 kwe->kwe_lockseq = lockseq;
844 kwe->kwe_count = 1;
845
846 (void)ksyn_queue_insert(kwq, KSYN_QUEUE_WRITER, kwe, lockseq, SEQFIT);
847 kwq->kw_fakecount++;
848 }
849
850 static void
851 ksyn_cvsignal(ksyn_wait_queue_t ckwq,
852 thread_t th,
853 uint32_t uptoseq,
854 uint32_t signalseq,
855 uint32_t *updatebits,
856 int *broadcast,
857 ksyn_waitq_element_t *nkwep)
858 {
859 ksyn_waitq_element_t kwe = NULL;
860 ksyn_waitq_element_t nkwe = NULL;
861 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
862
863 uptoseq &= PTHRW_COUNT_MASK;
864
865 // Find the specified thread to wake.
866 if (th != THREAD_NULL) {
867 uthread_t uth = pthread_kern->get_bsdthread_info(th);
868 kwe = pthread_kern->uthread_get_uukwe(uth);
869 if (kwe->kwe_kwqqueue != ckwq ||
870 is_seqhigher(kwe->kwe_lockseq, uptoseq)) {
871 // Unless it's no longer waiting on this CV...
872 kwe = NULL;
873 // ...in which case we post a broadcast instead.
874 *broadcast = 1;
875 return;
876 }
877 }
878
879 // If no thread was specified, find any thread to wake (with the right
880 // sequence number).
881 while (th == THREAD_NULL) {
882 if (kwe == NULL) {
883 kwe = ksyn_queue_find_signalseq(ckwq, kq, uptoseq, signalseq);
884 }
885 if (kwe == NULL && nkwe == NULL) {
886 // No eligible entries; need to allocate a new
887 // entry to prepost. Loop to rescan after
888 // reacquiring the lock after allocation in
889 // case anything new shows up.
890 ksyn_wqunlock(ckwq);
891 nkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
892 ksyn_wqlock(ckwq);
893 } else {
894 break;
895 }
896 }
897
898 if (kwe != NULL) {
899 // If we found a thread to wake...
900 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
901 if (is_seqlower(kwe->kwe_lockseq, signalseq)) {
902 /*
903 * A valid thread in our range, but lower than our signal.
904 * Matching it may leave our match with nobody to wake it if/when
905 * it arrives (the signal originally meant for this thread might
906 * not successfully wake it).
907 *
908 * Convert to broadcast - may cause some spurious wakeups
909 * (allowed by spec), but avoids starvation (better choice).
910 */
911 *broadcast = 1;
912 } else {
913 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
914 *updatebits += PTHRW_INC;
915 }
916 } else if (kwe->kwe_state == KWE_THREAD_PREPOST) {
917 // Merge with existing prepost at same uptoseq.
918 kwe->kwe_count += 1;
919 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
920 // Existing broadcasts subsume this signal.
921 } else {
922 panic("unknown kwe state\n");
923 }
924 if (nkwe) {
925 /*
926 * If we allocated a new kwe above but then found a different kwe to
927 * use then we need to deallocate the spare one.
928 */
929 pthread_kern->zfree(kwe_zone, nkwe);
930 nkwe = NULL;
931 }
932 } else if (nkwe != NULL) {
933 // ... otherwise, insert the newly allocated prepost.
934 ksyn_prepost(ckwq, nkwe, KWE_THREAD_PREPOST, uptoseq);
935 nkwe = NULL;
936 } else {
937 panic("failed to allocate kwe\n");
938 }
939
940 *nkwep = nkwe;
941 }
942
943 static int
944 __psynch_cvsignal(user_addr_t cv,
945 uint32_t cgen,
946 uint32_t cugen,
947 uint32_t csgen,
948 uint32_t flags,
949 int broadcast,
950 mach_port_name_t threadport,
951 uint32_t *retval)
952 {
953 int error = 0;
954 thread_t th = THREAD_NULL;
955 ksyn_wait_queue_t kwq;
956
957 uint32_t uptoseq = cgen & PTHRW_COUNT_MASK;
958 uint32_t fromseq = (cugen & PTHRW_COUNT_MASK) + PTHRW_INC;
959
960 // validate sane L, U, and S values
961 if ((threadport == 0 && is_seqhigher(fromseq, uptoseq)) || is_seqhigher(csgen, uptoseq)) {
962 __FAILEDUSERTEST__("cvbroad: invalid L, U and S values\n");
963 return EINVAL;
964 }
965
966 if (threadport != 0) {
967 th = port_name_to_thread((mach_port_name_t)threadport);
968 if (th == THREAD_NULL) {
969 return ESRCH;
970 }
971 }
972
973 error = ksyn_wqfind(cv, cgen, cugen, csgen, flags, (KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INDROP), &kwq);
974 if (error == 0) {
975 uint32_t updatebits = 0;
976 ksyn_waitq_element_t nkwe = NULL;
977
978 ksyn_wqlock(kwq);
979
980 // update L, U and S...
981 UPDATE_CVKWQ(kwq, cgen, cugen, csgen);
982
983 if (!broadcast) {
984 // No need to signal if the CV is already balanced.
985 if (diff_genseq(kwq->kw_lword, kwq->kw_sword)) {
986 ksyn_cvsignal(kwq, th, uptoseq, fromseq, &updatebits, &broadcast, &nkwe);
987 }
988 }
989
990 if (broadcast) {
991 ksyn_handle_cvbroad(kwq, uptoseq, &updatebits);
992 }
993
994 kwq->kw_sword += (updatebits & PTHRW_COUNT_MASK);
995 // set C or P bits and free if needed
996 ksyn_cvupdate_fixup(kwq, &updatebits);
997 *retval = updatebits;
998
999 ksyn_wqunlock(kwq);
1000
1001 if (nkwe != NULL) {
1002 pthread_kern->zfree(kwe_zone, nkwe);
1003 }
1004
1005 ksyn_wqrelease(kwq, 1, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_CVAR));
1006 }
1007
1008 if (th != NULL) {
1009 thread_deallocate(th);
1010 }
1011
1012 return error;
1013 }
1014
1015 /*
1016 * psynch_cvbroad: This system call is used for broadcast posting on blocked waiters of psynch cvars.
1017 */
1018 int
1019 _psynch_cvbroad(__unused proc_t p,
1020 user_addr_t cv,
1021 uint64_t cvlsgen,
1022 uint64_t cvudgen,
1023 uint32_t flags,
1024 __unused user_addr_t mutex,
1025 __unused uint64_t mugen,
1026 __unused uint64_t tid,
1027 uint32_t *retval)
1028 {
1029 uint32_t diffgen = cvudgen & 0xffffffff;
1030 uint32_t count = diffgen >> PTHRW_COUNT_SHIFT;
1031 if (count > pthread_kern->get_task_threadmax()) {
1032 __FAILEDUSERTEST__("cvbroad: difference greater than maximum possible thread count\n");
1033 return EBUSY;
1034 }
1035
1036 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1037 uint32_t cgen = cvlsgen & 0xffffffff;
1038 uint32_t cugen = (cvudgen >> 32) & 0xffffffff;
1039
1040 return __psynch_cvsignal(cv, cgen, cugen, csgen, flags, 1, 0, retval);
1041 }
1042
1043 /*
1044 * psynch_cvsignal: This system call is used for signalling the blocked waiters of psynch cvars.
1045 */
1046 int
1047 _psynch_cvsignal(__unused proc_t p,
1048 user_addr_t cv,
1049 uint64_t cvlsgen,
1050 uint32_t cvugen,
1051 int threadport,
1052 __unused user_addr_t mutex,
1053 __unused uint64_t mugen,
1054 __unused uint64_t tid,
1055 uint32_t flags,
1056 uint32_t *retval)
1057 {
1058 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1059 uint32_t cgen = cvlsgen & 0xffffffff;
1060
1061 return __psynch_cvsignal(cv, cgen, cvugen, csgen, flags, 0, threadport, retval);
1062 }
1063
1064 /*
1065 * psynch_cvwait: This system call is used for psynch cvar waiters to block in kernel.
1066 */
1067 int
1068 _psynch_cvwait(__unused proc_t p,
1069 user_addr_t cv,
1070 uint64_t cvlsgen,
1071 uint32_t cvugen,
1072 user_addr_t mutex,
1073 uint64_t mugen,
1074 uint32_t flags,
1075 int64_t sec,
1076 uint32_t nsec,
1077 uint32_t *retval)
1078 {
1079 int error = 0;
1080 uint32_t updatebits = 0;
1081 ksyn_wait_queue_t ckwq = NULL;
1082 ksyn_waitq_element_t kwe, nkwe = NULL;
1083
1084 /* for conformance reasons */
1085 pthread_kern->__pthread_testcancel(0);
1086
1087 uint32_t csgen = (cvlsgen >> 32) & 0xffffffff;
1088 uint32_t cgen = cvlsgen & 0xffffffff;
1089 uint32_t ugen = (mugen >> 32) & 0xffffffff;
1090 uint32_t mgen = mugen & 0xffffffff;
1091
1092 uint32_t lockseq = (cgen & PTHRW_COUNT_MASK);
1093
1094 /*
1095 * In cvwait U word can be out of range as cv could be used only for
1096 * timeouts. However S word needs to be within bounds and validated at
1097 * user level as well.
1098 */
1099 if (is_seqhigher_eq(csgen, lockseq) != 0) {
1100 __FAILEDUSERTEST__("psync_cvwait; invalid sequence numbers\n");
1101 return EINVAL;
1102 }
1103
1104 error = ksyn_wqfind(cv, cgen, cvugen, csgen, flags, KSYN_WQTYPE_CVAR | KSYN_WQTYPE_INWAIT, &ckwq);
1105 if (error != 0) {
1106 return error;
1107 }
1108
1109 if (mutex != 0) {
1110 error = _psynch_mutexdrop(NULL, mutex, mgen, ugen, 0, flags, NULL);
1111 if (error != 0) {
1112 goto out;
1113 }
1114 }
1115
1116 ksyn_wqlock(ckwq);
1117
1118 // update L, U and S...
1119 UPDATE_CVKWQ(ckwq, cgen, cvugen, csgen);
1120
1121 /* Look for the sequence for prepost (or conflicting thread */
1122 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
1123 kwe = ksyn_queue_find_cvpreposeq(kq, lockseq);
1124 if (kwe != NULL) {
1125 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
1126 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == lockseq) {
1127 /* we can safely consume a reference, so do so */
1128 if (--kwe->kwe_count == 0) {
1129 ksyn_queue_remove_item(ckwq, kq, kwe);
1130 ckwq->kw_fakecount--;
1131 nkwe = kwe;
1132 }
1133 } else {
1134 /*
1135 * consuming a prepost higher than our lock sequence is valid, but
1136 * can leave the higher thread without a match. Convert the entry
1137 * to a broadcast to compensate for this.
1138 */
1139 ksyn_handle_cvbroad(ckwq, kwe->kwe_lockseq, &updatebits);
1140 #if __TESTPANICS__
1141 if (updatebits != 0)
1142 panic("psync_cvwait: convert pre-post to broadcast: woke up %d threads that shouldn't be there\n", updatebits);
1143 #endif /* __TESTPANICS__ */
1144 }
1145 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST) {
1146 // XXX
1147 // Nothing to do.
1148 } else if (kwe->kwe_state == KWE_THREAD_INWAIT) {
1149 __FAILEDUSERTEST__("cvwait: thread entry with same sequence already present\n");
1150 error = EBUSY;
1151 } else {
1152 panic("psync_cvwait: unexpected wait queue element type\n");
1153 }
1154
1155 if (error == 0) {
1156 updatebits = PTHRW_INC;
1157 ckwq->kw_sword += PTHRW_INC;
1158
1159 /* set C or P bits and free if needed */
1160 ksyn_cvupdate_fixup(ckwq, &updatebits);
1161 *retval = updatebits;
1162 }
1163 } else {
1164 uint64_t abstime = 0;
1165
1166 if (sec != 0 || (nsec & 0x3fffffff) != 0) {
1167 struct timespec ts;
1168 ts.tv_sec = (__darwin_time_t)sec;
1169 ts.tv_nsec = (nsec & 0x3fffffff);
1170 nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec, &abstime);
1171 clock_absolutetime_interval_to_deadline(abstime, &abstime);
1172 }
1173
1174 error = ksyn_wait(ckwq, KSYN_QUEUE_WRITER, cgen, SEQFIT, abstime, psynch_cvcontinue);
1175 // ksyn_wait drops wait queue lock
1176 }
1177
1178 ksyn_wqunlock(ckwq);
1179
1180 if (nkwe != NULL) {
1181 pthread_kern->zfree(kwe_zone, nkwe);
1182 }
1183 out:
1184 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1185 return error;
1186 }
1187
1188
1189 void
1190 psynch_cvcontinue(void *parameter, wait_result_t result)
1191 {
1192 uthread_t uth = current_uthread();
1193 ksyn_wait_queue_t ckwq = parameter;
1194 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1195
1196 int error = _wait_result_to_errno(result);
1197 if (error != 0) {
1198 ksyn_wqlock(ckwq);
1199 /* just in case it got woken up as we were granting */
1200 pthread_kern->uthread_set_returnval(uth, kwe->kwe_psynchretval);
1201
1202 if (kwe->kwe_kwqqueue) {
1203 ksyn_queue_remove_item(ckwq, &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER], kwe);
1204 }
1205 if ((kwe->kwe_psynchretval & PTH_RWL_MTX_WAIT) != 0) {
1206 /* the condition var granted.
1207 * reset the error so that the thread returns back.
1208 */
1209 error = 0;
1210 /* no need to set any bits just return as cvsig/broad covers this */
1211 } else {
1212 ckwq->kw_sword += PTHRW_INC;
1213
1214 /* set C and P bits, in the local error */
1215 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
1216 error |= ECVCERORR;
1217 if (ckwq->kw_inqueue != 0) {
1218 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 1);
1219 }
1220 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
1221 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
1222 } else {
1223 /* everythig in the queue is a fake entry ? */
1224 if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
1225 error |= ECVPERORR;
1226 }
1227 }
1228 }
1229 ksyn_wqunlock(ckwq);
1230 } else {
1231 int val = 0;
1232 // PTH_RWL_MTX_WAIT is removed
1233 if ((kwe->kwe_psynchretval & PTH_RWS_CV_MBIT) != 0) {
1234 val = PTHRW_INC | PTH_RWS_CV_CBIT;
1235 }
1236 pthread_kern->uthread_set_returnval(uth, val);
1237 }
1238
1239 ksyn_wqrelease(ckwq, 1, (KSYN_WQTYPE_INWAIT | KSYN_WQTYPE_CVAR));
1240 pthread_kern->unix_syscall_return(error);
1241 }
1242
1243 /*
1244 * psynch_cvclrprepost: This system call clears pending prepost if present.
1245 */
1246 int
1247 _psynch_cvclrprepost(__unused proc_t p,
1248 user_addr_t cv,
1249 uint32_t cvgen,
1250 uint32_t cvugen,
1251 uint32_t cvsgen,
1252 __unused uint32_t prepocnt,
1253 uint32_t preposeq,
1254 uint32_t flags,
1255 int *retval)
1256 {
1257 int error = 0;
1258 int mutex = (flags & _PTHREAD_MTX_OPT_MUTEX);
1259 int wqtype = (mutex ? KSYN_WQTYPE_MTX : KSYN_WQTYPE_CVAR) | KSYN_WQTYPE_INDROP;
1260 ksyn_wait_queue_t kwq = NULL;
1261
1262 *retval = 0;
1263
1264 error = ksyn_wqfind(cv, cvgen, cvugen, mutex ? 0 : cvsgen, flags, wqtype, &kwq);
1265 if (error != 0) {
1266 return error;
1267 }
1268
1269 ksyn_wqlock(kwq);
1270
1271 if (mutex) {
1272 int firstfit = (flags & PTHREAD_POLICY_FLAGS_MASK) == _PTHREAD_MUTEX_POLICY_FIRSTFIT;
1273 if (firstfit && kwq->kw_pre_rwwc != 0) {
1274 if (is_seqlower_eq(kwq->kw_pre_lockseq, cvgen)) {
1275 // clear prepost
1276 kwq->kw_pre_rwwc = 0;
1277 kwq->kw_pre_lockseq = 0;
1278 }
1279 }
1280 } else {
1281 ksyn_queue_free_items(kwq, KSYN_QUEUE_WRITER, preposeq, 0);
1282 }
1283
1284 ksyn_wqunlock(kwq);
1285 ksyn_wqrelease(kwq, 1, wqtype);
1286 return error;
1287 }
1288
1289 /* ***************** pthread_rwlock ************************ */
1290
1291 static int
1292 __psynch_rw_lock(int type,
1293 user_addr_t rwlock,
1294 uint32_t lgenval,
1295 uint32_t ugenval,
1296 uint32_t rw_wc,
1297 int flags,
1298 uint32_t *retval)
1299 {
1300 int prepost_type, kqi;
1301
1302 if (type == PTH_RW_TYPE_READ) {
1303 prepost_type = KW_UNLOCK_PREPOST_READLOCK;
1304 kqi = KSYN_QUEUE_READ;
1305 } else {
1306 prepost_type = KW_UNLOCK_PREPOST_WRLOCK;
1307 kqi = KSYN_QUEUE_WRITER;
1308 }
1309
1310 uint32_t lockseq = lgenval & PTHRW_COUNT_MASK;
1311
1312 int error;
1313 ksyn_wait_queue_t kwq;
1314 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK), &kwq);
1315 if (error == 0) {
1316 ksyn_wqlock(kwq);
1317 _ksyn_check_init(kwq, lgenval);
1318 if (_ksyn_handle_missed_wakeups(kwq, type, lockseq, retval) ||
1319 // handle overlap first as they are not counted against pre_rwwc
1320 (type == PTH_RW_TYPE_READ && _ksyn_handle_overlap(kwq, lgenval, rw_wc, retval)) ||
1321 _ksyn_handle_prepost(kwq, prepost_type, lockseq, retval)) {
1322 ksyn_wqunlock(kwq);
1323 } else {
1324 error = ksyn_wait(kwq, kqi, lgenval, SEQFIT, 0, THREAD_CONTINUE_NULL);
1325 // ksyn_wait drops wait queue lock
1326 if (error == 0) {
1327 uthread_t uth = current_uthread();
1328 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1329 *retval = kwe->kwe_psynchretval;
1330 }
1331 }
1332 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INWAIT|KSYN_WQTYPE_RWLOCK));
1333 }
1334 return error;
1335 }
1336
1337 /*
1338 * psynch_rw_rdlock: This system call is used for psync rwlock readers to block.
1339 */
1340 int
1341 _psynch_rw_rdlock(__unused proc_t p,
1342 user_addr_t rwlock,
1343 uint32_t lgenval,
1344 uint32_t ugenval,
1345 uint32_t rw_wc,
1346 int flags,
1347 uint32_t *retval)
1348 {
1349 return __psynch_rw_lock(PTH_RW_TYPE_READ, rwlock, lgenval, ugenval, rw_wc, flags, retval);
1350 }
1351
1352 /*
1353 * psynch_rw_longrdlock: This system call is used for psync rwlock long readers to block.
1354 */
1355 int
1356 _psynch_rw_longrdlock(__unused proc_t p,
1357 __unused user_addr_t rwlock,
1358 __unused uint32_t lgenval,
1359 __unused uint32_t ugenval,
1360 __unused uint32_t rw_wc,
1361 __unused int flags,
1362 __unused uint32_t *retval)
1363 {
1364 return ESRCH;
1365 }
1366
1367
1368 /*
1369 * psynch_rw_wrlock: This system call is used for psync rwlock writers to block.
1370 */
1371 int
1372 _psynch_rw_wrlock(__unused proc_t p,
1373 user_addr_t rwlock,
1374 uint32_t lgenval,
1375 uint32_t ugenval,
1376 uint32_t rw_wc,
1377 int flags,
1378 uint32_t *retval)
1379 {
1380 return __psynch_rw_lock(PTH_RW_TYPE_WRITE, rwlock, lgenval, ugenval, rw_wc, flags, retval);
1381 }
1382
1383 /*
1384 * psynch_rw_yieldwrlock: This system call is used for psync rwlock yielding writers to block.
1385 */
1386 int
1387 _psynch_rw_yieldwrlock(__unused proc_t p,
1388 __unused user_addr_t rwlock,
1389 __unused uint32_t lgenval,
1390 __unused uint32_t ugenval,
1391 __unused uint32_t rw_wc,
1392 __unused int flags,
1393 __unused uint32_t *retval)
1394 {
1395 return ESRCH;
1396 }
1397
1398 /*
1399 * psynch_rw_unlock: This system call is used for unlock state postings. This will grant appropriate
1400 * reader/writer variety lock.
1401 */
1402 int
1403 _psynch_rw_unlock(__unused proc_t p,
1404 user_addr_t rwlock,
1405 uint32_t lgenval,
1406 uint32_t ugenval,
1407 uint32_t rw_wc,
1408 int flags,
1409 uint32_t *retval)
1410 {
1411 int error = 0;
1412 ksyn_wait_queue_t kwq;
1413 uint32_t updatebits = 0;
1414 int diff;
1415 uint32_t count = 0;
1416 uint32_t curgen = lgenval & PTHRW_COUNT_MASK;
1417
1418 error = ksyn_wqfind(rwlock, lgenval, ugenval, rw_wc, flags, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK), &kwq);
1419 if (error != 0) {
1420 return(error);
1421 }
1422
1423 ksyn_wqlock(kwq);
1424 int isinit = _ksyn_check_init(kwq, lgenval);
1425
1426 /* if lastunlock seq is set, ensure the current one is not lower than that, as it would be spurious */
1427 if ((kwq->kw_lastunlockseq != PTHRW_RWL_INIT) && (is_seqlower(ugenval, kwq->kw_lastunlockseq)!= 0)) {
1428 error = 0;
1429 goto out;
1430 }
1431
1432 /* If L-U != num of waiters, then it needs to be preposted or spr */
1433 diff = find_diff(lgenval, ugenval);
1434
1435 if (find_seq_till(kwq, curgen, diff, &count) == 0) {
1436 if ((count == 0) || (count < (uint32_t)diff))
1437 goto prepost;
1438 }
1439
1440 /* no prepost and all threads are in place, reset the bit */
1441 if ((isinit != 0) && ((kwq->kw_kflags & KSYN_KWF_INITCLEARED) != 0)){
1442 kwq->kw_kflags &= ~KSYN_KWF_INITCLEARED;
1443 }
1444
1445 /* can handle unlock now */
1446
1447 CLEAR_PREPOST_BITS(kwq);
1448
1449 error = kwq_handle_unlock(kwq, lgenval, rw_wc, &updatebits, 0, NULL, 0);
1450 #if __TESTPANICS__
1451 if (error != 0)
1452 panic("psynch_rw_unlock: kwq_handle_unlock failed %d\n",error);
1453 #endif /* __TESTPANICS__ */
1454 out:
1455 if (error == 0) {
1456 /* update bits?? */
1457 *retval = updatebits;
1458 }
1459
1460
1461 ksyn_wqunlock(kwq);
1462 ksyn_wqrelease(kwq, 0, (KSYN_WQTYPE_INDROP | KSYN_WQTYPE_RWLOCK));
1463
1464 return(error);
1465
1466 prepost:
1467 /* update if the new seq is higher than prev prepost, or first set */
1468 if (is_rws_setseq(kwq->kw_pre_sseq) ||
1469 is_seqhigher_eq(rw_wc, kwq->kw_pre_sseq)) {
1470 kwq->kw_pre_rwwc = (diff - count);
1471 kwq->kw_pre_lockseq = curgen;
1472 kwq->kw_pre_sseq = rw_wc;
1473 updatebits = lgenval; /* let this not do unlock handling */
1474 }
1475 error = 0;
1476 goto out;
1477 }
1478
1479
1480 /* ************************************************************************** */
1481 void
1482 pth_global_hashinit(void)
1483 {
1484 pth_glob_hashtbl = hashinit(PTH_HASHSIZE * 4, M_PROC, &pthhash);
1485 }
1486
1487 void
1488 _pth_proc_hashinit(proc_t p)
1489 {
1490 void *ptr = hashinit(PTH_HASHSIZE, M_PCB, &pthhash);
1491 if (ptr == NULL) {
1492 panic("pth_proc_hashinit: hash init returned 0\n");
1493 }
1494
1495 pthread_kern->proc_set_pthhash(p, ptr);
1496 }
1497
1498
1499 static int
1500 ksyn_wq_hash_lookup(user_addr_t uaddr,
1501 proc_t p,
1502 int flags,
1503 ksyn_wait_queue_t *out_kwq,
1504 struct pthhashhead **out_hashptr,
1505 uint64_t *out_object,
1506 uint64_t *out_offset)
1507 {
1508 int res = 0;
1509 ksyn_wait_queue_t kwq;
1510 uint64_t object = 0, offset = 0;
1511 struct pthhashhead *hashptr;
1512 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1513 hashptr = pth_glob_hashtbl;
1514 res = ksyn_findobj(uaddr, &object, &offset);
1515 if (res == 0) {
1516 LIST_FOREACH(kwq, &hashptr[object & pthhash], kw_hash) {
1517 if (kwq->kw_object == object && kwq->kw_offset == offset) {
1518 break;
1519 }
1520 }
1521 } else {
1522 kwq = NULL;
1523 }
1524 } else {
1525 hashptr = pthread_kern->proc_get_pthhash(p);
1526 LIST_FOREACH(kwq, &hashptr[uaddr & pthhash], kw_hash) {
1527 if (kwq->kw_addr == uaddr) {
1528 break;
1529 }
1530 }
1531 }
1532 *out_kwq = kwq;
1533 *out_object = object;
1534 *out_offset = offset;
1535 *out_hashptr = hashptr;
1536 return res;
1537 }
1538
1539 void
1540 _pth_proc_hashdelete(proc_t p)
1541 {
1542 struct pthhashhead * hashptr;
1543 ksyn_wait_queue_t kwq;
1544 unsigned long hashsize = pthhash + 1;
1545 unsigned long i;
1546
1547 hashptr = pthread_kern->proc_get_pthhash(p);
1548 pthread_kern->proc_set_pthhash(p, NULL);
1549 if (hashptr == NULL) {
1550 return;
1551 }
1552
1553 pthread_list_lock();
1554 for(i= 0; i < hashsize; i++) {
1555 while ((kwq = LIST_FIRST(&hashptr[i])) != NULL) {
1556 if ((kwq->kw_pflags & KSYN_WQ_INHASH) != 0) {
1557 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1558 LIST_REMOVE(kwq, kw_hash);
1559 }
1560 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1561 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1562 LIST_REMOVE(kwq, kw_list);
1563 }
1564 pthread_list_unlock();
1565 /* release fake entries if present for cvars */
1566 if (((kwq->kw_type & KSYN_WQTYPE_MASK) == KSYN_WQTYPE_CVAR) && (kwq->kw_inqueue != 0))
1567 ksyn_freeallkwe(&kwq->kw_ksynqueues[KSYN_QUEUE_WRITER]);
1568 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
1569 pthread_kern->zfree(kwq_zone, kwq);
1570 pthread_list_lock();
1571 }
1572 }
1573 pthread_list_unlock();
1574 FREE(hashptr, M_PROC);
1575 }
1576
1577 /* no lock held for this as the waitqueue is getting freed */
1578 void
1579 ksyn_freeallkwe(ksyn_queue_t kq)
1580 {
1581 ksyn_waitq_element_t kwe;
1582 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
1583 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
1584 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1585 pthread_kern->zfree(kwe_zone, kwe);
1586 }
1587 }
1588 }
1589
1590 /* find kernel waitqueue, if not present create one. Grants a reference */
1591 int
1592 ksyn_wqfind(user_addr_t uaddr, uint32_t mgen, uint32_t ugen, uint32_t sgen, int flags, int wqtype, ksyn_wait_queue_t *kwqp)
1593 {
1594 int res = 0;
1595 ksyn_wait_queue_t kwq = NULL;
1596 ksyn_wait_queue_t nkwq = NULL;
1597 struct pthhashhead *hashptr;
1598 proc_t p = current_proc();
1599
1600 uint64_t object = 0, offset = 0;
1601 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1602 res = ksyn_findobj(uaddr, &object, &offset);
1603 hashptr = pth_glob_hashtbl;
1604 } else {
1605 hashptr = pthread_kern->proc_get_pthhash(p);
1606 }
1607
1608 while (res == 0) {
1609 pthread_list_lock();
1610 res = ksyn_wq_hash_lookup(uaddr, current_proc(), flags, &kwq, &hashptr, &object, &offset);
1611 if (res != 0) {
1612 break;
1613 }
1614 if (kwq == NULL && nkwq == NULL) {
1615 // Drop the lock to allocate a new kwq and retry.
1616 pthread_list_unlock();
1617
1618 nkwq = (ksyn_wait_queue_t)pthread_kern->zalloc(kwq_zone);
1619 bzero(nkwq, sizeof(struct ksyn_wait_queue));
1620 int i;
1621 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
1622 ksyn_queue_init(&nkwq->kw_ksynqueues[i]);
1623 }
1624 lck_mtx_init(&nkwq->kw_lock, pthread_lck_grp, pthread_lck_attr);
1625 continue;
1626 } else if (kwq == NULL && nkwq != NULL) {
1627 // Still not found, add the new kwq to the hash.
1628 kwq = nkwq;
1629 nkwq = NULL; // Don't free.
1630 if ((flags & PTHREAD_PSHARED_FLAGS_MASK) == PTHREAD_PROCESS_SHARED) {
1631 kwq->kw_pflags |= KSYN_WQ_SHARED;
1632 LIST_INSERT_HEAD(&hashptr[object & pthhash], kwq, kw_hash);
1633 } else {
1634 LIST_INSERT_HEAD(&hashptr[uaddr & pthhash], kwq, kw_hash);
1635 }
1636 kwq->kw_pflags |= KSYN_WQ_INHASH;
1637 } else if (kwq != NULL) {
1638 // Found an existing kwq, use it.
1639 if ((kwq->kw_pflags & KSYN_WQ_FLIST) != 0) {
1640 LIST_REMOVE(kwq, kw_list);
1641 kwq->kw_pflags &= ~KSYN_WQ_FLIST;
1642 }
1643 if ((kwq->kw_type & KSYN_WQTYPE_MASK) != (wqtype & KSYN_WQTYPE_MASK)) {
1644 if (kwq->kw_inqueue == 0 && kwq->kw_pre_rwwc == 0 && kwq->kw_pre_intrcount == 0) {
1645 if (kwq->kw_iocount == 0) {
1646 kwq->kw_type = 0; // mark for reinitialization
1647 } else if (kwq->kw_iocount == 1 && kwq->kw_dropcount == kwq->kw_iocount) {
1648 /* if all users are unlockers then wait for it to finish */
1649 kwq->kw_pflags |= KSYN_WQ_WAITING;
1650 // Drop the lock and wait for the kwq to be free.
1651 (void)msleep(&kwq->kw_pflags, pthread_list_mlock, PDROP, "ksyn_wqfind", 0);
1652 continue;
1653 } else {
1654 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
1655 res = EINVAL;
1656 }
1657 } else {
1658 __FAILEDUSERTEST__("address already known to kernel for another [busy] synchronizer type\n");
1659 res = EINVAL;
1660 }
1661 }
1662 }
1663 if (res == 0) {
1664 if (kwq->kw_type == 0) {
1665 kwq->kw_addr = uaddr;
1666 kwq->kw_object = object;
1667 kwq->kw_offset = offset;
1668 kwq->kw_type = (wqtype & KSYN_WQTYPE_MASK);
1669 CLEAR_REINIT_BITS(kwq);
1670 kwq->kw_lword = mgen;
1671 kwq->kw_uword = ugen;
1672 kwq->kw_sword = sgen;
1673 kwq->kw_owner = 0;
1674 kwq->kw_kflags = 0;
1675 kwq->kw_qos_override = THREAD_QOS_UNSPECIFIED;
1676 }
1677 kwq->kw_iocount++;
1678 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1679 kwq->kw_dropcount++;
1680 }
1681 }
1682 break;
1683 }
1684 pthread_list_unlock();
1685 if (kwqp != NULL) {
1686 *kwqp = kwq;
1687 }
1688 if (nkwq) {
1689 lck_mtx_destroy(&nkwq->kw_lock, pthread_lck_grp);
1690 pthread_kern->zfree(kwq_zone, nkwq);
1691 }
1692 return res;
1693 }
1694
1695 /* Reference from find is dropped here. Starts the free process if needed */
1696 void
1697 ksyn_wqrelease(ksyn_wait_queue_t kwq, int qfreenow, int wqtype)
1698 {
1699 uint64_t deadline;
1700 ksyn_wait_queue_t free_elem = NULL;
1701
1702 pthread_list_lock();
1703 if (wqtype == KSYN_WQTYPE_MUTEXDROP) {
1704 kwq->kw_dropcount--;
1705 }
1706 if (--kwq->kw_iocount == 0) {
1707 if ((kwq->kw_pflags & KSYN_WQ_WAITING) != 0) {
1708 /* some one is waiting for the waitqueue, wake them up */
1709 kwq->kw_pflags &= ~KSYN_WQ_WAITING;
1710 wakeup(&kwq->kw_pflags);
1711 }
1712
1713 if (kwq->kw_pre_rwwc == 0 && kwq->kw_inqueue == 0 && kwq->kw_pre_intrcount == 0) {
1714 if (qfreenow == 0) {
1715 microuptime(&kwq->kw_ts);
1716 LIST_INSERT_HEAD(&pth_free_list, kwq, kw_list);
1717 kwq->kw_pflags |= KSYN_WQ_FLIST;
1718 if (psynch_cleanupset == 0) {
1719 struct timeval t;
1720 microuptime(&t);
1721 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1722 deadline = tvtoabstime(&t);
1723 thread_call_enter_delayed(psynch_thcall, deadline);
1724 psynch_cleanupset = 1;
1725 }
1726 } else {
1727 kwq->kw_pflags &= ~KSYN_WQ_INHASH;
1728 LIST_REMOVE(kwq, kw_hash);
1729 free_elem = kwq;
1730 }
1731 }
1732 }
1733 pthread_list_unlock();
1734 if (free_elem != NULL) {
1735 lck_mtx_destroy(&free_elem->kw_lock, pthread_lck_grp);
1736 pthread_kern->zfree(kwq_zone, free_elem);
1737 }
1738 }
1739
1740 /* responsible to free the waitqueues */
1741 void
1742 psynch_wq_cleanup(__unused void *param, __unused void * param1)
1743 {
1744 ksyn_wait_queue_t kwq;
1745 struct timeval t;
1746 int reschedule = 0;
1747 uint64_t deadline = 0;
1748 LIST_HEAD(, ksyn_wait_queue) freelist;
1749 LIST_INIT(&freelist);
1750
1751 pthread_list_lock();
1752
1753 microuptime(&t);
1754
1755 LIST_FOREACH(kwq, &pth_free_list, kw_list) {
1756 if (kwq->kw_iocount != 0 || kwq->kw_pre_rwwc != 0 || kwq->kw_inqueue != 0 || kwq->kw_pre_intrcount != 0) {
1757 // still in use
1758 continue;
1759 }
1760 __darwin_time_t diff = t.tv_sec - kwq->kw_ts.tv_sec;
1761 if (diff < 0)
1762 diff *= -1;
1763 if (diff >= KSYN_CLEANUP_DEADLINE) {
1764 kwq->kw_pflags &= ~(KSYN_WQ_FLIST | KSYN_WQ_INHASH);
1765 LIST_REMOVE(kwq, kw_hash);
1766 LIST_REMOVE(kwq, kw_list);
1767 LIST_INSERT_HEAD(&freelist, kwq, kw_list);
1768 } else {
1769 reschedule = 1;
1770 }
1771
1772 }
1773 if (reschedule != 0) {
1774 t.tv_sec += KSYN_CLEANUP_DEADLINE;
1775 deadline = tvtoabstime(&t);
1776 thread_call_enter_delayed(psynch_thcall, deadline);
1777 psynch_cleanupset = 1;
1778 } else {
1779 psynch_cleanupset = 0;
1780 }
1781 pthread_list_unlock();
1782
1783 while ((kwq = LIST_FIRST(&freelist)) != NULL) {
1784 LIST_REMOVE(kwq, kw_list);
1785 lck_mtx_destroy(&kwq->kw_lock, pthread_lck_grp);
1786 pthread_kern->zfree(kwq_zone, kwq);
1787 }
1788 }
1789
1790 static int
1791 _wait_result_to_errno(wait_result_t result)
1792 {
1793 int res = 0;
1794 switch (result) {
1795 case THREAD_TIMED_OUT:
1796 res = ETIMEDOUT;
1797 break;
1798 case THREAD_INTERRUPTED:
1799 res = EINTR;
1800 break;
1801 }
1802 return res;
1803 }
1804
1805 int
1806 ksyn_wait(ksyn_wait_queue_t kwq,
1807 int kqi,
1808 uint32_t lockseq,
1809 int fit,
1810 uint64_t abstime,
1811 thread_continue_t continuation)
1812 {
1813 int res;
1814
1815 thread_t th = current_thread();
1816 uthread_t uth = pthread_kern->get_bsdthread_info(th);
1817 ksyn_waitq_element_t kwe = pthread_kern->uthread_get_uukwe(uth);
1818 bzero(kwe, sizeof(*kwe));
1819 kwe->kwe_count = 1;
1820 kwe->kwe_lockseq = lockseq & PTHRW_COUNT_MASK;
1821 kwe->kwe_state = KWE_THREAD_INWAIT;
1822 kwe->kwe_uth = uth;
1823 kwe->kwe_tid = thread_tid(th);
1824
1825 res = ksyn_queue_insert(kwq, kqi, kwe, lockseq, fit);
1826 if (res != 0) {
1827 //panic("psynch_rw_wrlock: failed to enqueue\n"); // XXX
1828 ksyn_wqunlock(kwq);
1829 return res;
1830 }
1831
1832 assert_wait_deadline_with_leeway(&kwe->kwe_psynchretval, THREAD_ABORTSAFE, TIMEOUT_URGENCY_USER_NORMAL, abstime, 0);
1833 ksyn_wqunlock(kwq);
1834
1835 kern_return_t ret;
1836 if (continuation == THREAD_CONTINUE_NULL) {
1837 ret = thread_block(NULL);
1838 } else {
1839 ret = thread_block_parameter(continuation, kwq);
1840
1841 // If thread_block_parameter returns (interrupted) call the
1842 // continuation manually to clean up.
1843 continuation(kwq, ret);
1844
1845 // NOT REACHED
1846 panic("ksyn_wait continuation returned");
1847 }
1848
1849 res = _wait_result_to_errno(ret);
1850 if (res != 0) {
1851 ksyn_wqlock(kwq);
1852 if (kwe->kwe_kwqqueue) {
1853 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1854 }
1855 ksyn_wqunlock(kwq);
1856 }
1857 return res;
1858 }
1859
1860 kern_return_t
1861 ksyn_signal(ksyn_wait_queue_t kwq,
1862 int kqi,
1863 ksyn_waitq_element_t kwe,
1864 uint32_t updateval)
1865 {
1866 kern_return_t ret;
1867
1868 // If no wait element was specified, wake the first.
1869 if (!kwe) {
1870 kwe = TAILQ_FIRST(&kwq->kw_ksynqueues[kqi].ksynq_kwelist);
1871 if (!kwe) {
1872 panic("ksyn_signal: panic signaling empty queue");
1873 }
1874 }
1875
1876 if (kwe->kwe_state != KWE_THREAD_INWAIT) {
1877 panic("ksyn_signal: panic signaling non-waiting element");
1878 }
1879
1880 ksyn_queue_remove_item(kwq, &kwq->kw_ksynqueues[kqi], kwe);
1881 kwe->kwe_psynchretval = updateval;
1882
1883 ret = thread_wakeup_one((caddr_t)&kwe->kwe_psynchretval);
1884 if (ret != KERN_SUCCESS && ret != KERN_NOT_WAITING) {
1885 panic("ksyn_signal: panic waking up thread %x\n", ret);
1886 }
1887 return ret;
1888 }
1889
1890 int
1891 ksyn_findobj(user_addr_t uaddr, uint64_t *objectp, uint64_t *offsetp)
1892 {
1893 kern_return_t ret;
1894 vm_page_info_basic_data_t info;
1895 mach_msg_type_number_t count = VM_PAGE_INFO_BASIC_COUNT;
1896 ret = pthread_kern->vm_map_page_info(pthread_kern->current_map(), uaddr, VM_PAGE_INFO_BASIC, (vm_page_info_t)&info, &count);
1897 if (ret != KERN_SUCCESS) {
1898 return EINVAL;
1899 }
1900
1901 if (objectp != NULL) {
1902 *objectp = (uint64_t)info.object_id;
1903 }
1904 if (offsetp != NULL) {
1905 *offsetp = (uint64_t)info.offset;
1906 }
1907
1908 return(0);
1909 }
1910
1911
1912 /* lowest of kw_fr, kw_flr, kw_fwr, kw_fywr */
1913 int
1914 kwq_find_rw_lowest(ksyn_wait_queue_t kwq, int flags, uint32_t premgen, int *typep, uint32_t lowest[])
1915 {
1916 uint32_t kw_fr, kw_fwr, low;
1917 int type = 0, lowtype, typenum[2] = { 0 };
1918 uint32_t numbers[2] = { 0 };
1919 int count = 0, i;
1920
1921
1922 if ((kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0)) {
1923 type |= PTH_RWSHFT_TYPE_READ;
1924 /* read entries are present */
1925 if (kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count != 0) {
1926 kw_fr = kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_firstnum;
1927 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, kw_fr) != 0))
1928 kw_fr = premgen;
1929 } else
1930 kw_fr = premgen;
1931
1932 lowest[KSYN_QUEUE_READ] = kw_fr;
1933 numbers[count]= kw_fr;
1934 typenum[count] = PTH_RW_TYPE_READ;
1935 count++;
1936 } else
1937 lowest[KSYN_QUEUE_READ] = 0;
1938
1939 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) || ((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0)) {
1940 type |= PTH_RWSHFT_TYPE_WRITE;
1941 /* read entries are present */
1942 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
1943 kw_fwr = kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_firstnum;
1944 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (is_seqlower(premgen, kw_fwr) != 0))
1945 kw_fwr = premgen;
1946 } else
1947 kw_fwr = premgen;
1948
1949 lowest[KSYN_QUEUE_WRITER] = kw_fwr;
1950 numbers[count]= kw_fwr;
1951 typenum[count] = PTH_RW_TYPE_WRITE;
1952 count++;
1953 } else
1954 lowest[KSYN_QUEUE_WRITER] = 0;
1955
1956 #if __TESTPANICS__
1957 if (count == 0)
1958 panic("nothing in the queue???\n");
1959 #endif /* __TESTPANICS__ */
1960
1961 low = numbers[0];
1962 lowtype = typenum[0];
1963 if (count > 1) {
1964 for (i = 1; i< count; i++) {
1965 if (is_seqlower(numbers[i] , low) != 0) {
1966 low = numbers[i];
1967 lowtype = typenum[i];
1968 }
1969 }
1970 }
1971 type |= lowtype;
1972
1973 if (typep != 0)
1974 *typep = type;
1975 return(0);
1976 }
1977
1978 /* wakeup readers to upto the writer limits */
1979 int
1980 ksyn_wakeupreaders(ksyn_wait_queue_t kwq, uint32_t limitread, int allreaders, uint32_t updatebits, int *wokenp)
1981 {
1982 ksyn_queue_t kq;
1983 int failedwakeup = 0;
1984 int numwoken = 0;
1985 kern_return_t kret = KERN_SUCCESS;
1986 uint32_t lbits = 0;
1987
1988 lbits = updatebits;
1989
1990 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
1991 while ((kq->ksynq_count != 0) && (allreaders || (is_seqlower(kq->ksynq_firstnum, limitread) != 0))) {
1992 kret = ksyn_signal(kwq, KSYN_QUEUE_READ, NULL, lbits);
1993 if (kret == KERN_NOT_WAITING) {
1994 failedwakeup++;
1995 }
1996 numwoken++;
1997 }
1998
1999 if (wokenp != NULL)
2000 *wokenp = numwoken;
2001 return(failedwakeup);
2002 }
2003
2004
2005 /* This handles the unlock grants for next set on rw_unlock() or on arrival of all preposted waiters */
2006 int
2007 kwq_handle_unlock(ksyn_wait_queue_t kwq,
2008 __unused uint32_t mgen,
2009 uint32_t rw_wc,
2010 uint32_t *updatep,
2011 int flags,
2012 int *blockp,
2013 uint32_t premgen)
2014 {
2015 uint32_t low_writer, limitrdnum;
2016 int rwtype, error=0;
2017 int allreaders, failed;
2018 uint32_t updatebits=0, numneeded = 0;;
2019 int prepost = flags & KW_UNLOCK_PREPOST;
2020 thread_t preth = THREAD_NULL;
2021 ksyn_waitq_element_t kwe;
2022 uthread_t uth;
2023 thread_t th;
2024 int woken = 0;
2025 int block = 1;
2026 uint32_t lowest[KSYN_QUEUE_MAX]; /* np need for upgrade as it is handled separately */
2027 kern_return_t kret = KERN_SUCCESS;
2028 ksyn_queue_t kq;
2029 int curthreturns = 0;
2030
2031 if (prepost != 0) {
2032 preth = current_thread();
2033 }
2034
2035 kq = &kwq->kw_ksynqueues[KSYN_QUEUE_READ];
2036 kwq->kw_lastseqword = rw_wc;
2037 kwq->kw_lastunlockseq = (rw_wc & PTHRW_COUNT_MASK);
2038 kwq->kw_overlapwatch = 0;
2039
2040 error = kwq_find_rw_lowest(kwq, flags, premgen, &rwtype, lowest);
2041 #if __TESTPANICS__
2042 if (error != 0)
2043 panic("rwunlock: cannot fails to slot next round of threads");
2044 #endif /* __TESTPANICS__ */
2045
2046 low_writer = lowest[KSYN_QUEUE_WRITER];
2047
2048 allreaders = 0;
2049 updatebits = 0;
2050
2051 switch (rwtype & PTH_RW_TYPE_MASK) {
2052 case PTH_RW_TYPE_READ: {
2053 // XXX
2054 /* what about the preflight which is LREAD or READ ?? */
2055 if ((rwtype & PTH_RWSHFT_TYPE_MASK) != 0) {
2056 if (rwtype & PTH_RWSHFT_TYPE_WRITE) {
2057 updatebits |= (PTH_RWL_WBIT | PTH_RWL_KBIT);
2058 }
2059 }
2060 limitrdnum = 0;
2061 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2062 limitrdnum = low_writer;
2063 } else {
2064 allreaders = 1;
2065 }
2066
2067 numneeded = 0;
2068
2069 if ((rwtype & PTH_RWSHFT_TYPE_WRITE) != 0) {
2070 limitrdnum = low_writer;
2071 numneeded = ksyn_queue_count_tolowest(kq, limitrdnum);
2072 if (((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) && (is_seqlower(premgen, limitrdnum) != 0)) {
2073 curthreturns = 1;
2074 numneeded += 1;
2075 }
2076 } else {
2077 // no writers at all
2078 // no other waiters only readers
2079 kwq->kw_overlapwatch = 1;
2080 numneeded += kwq->kw_ksynqueues[KSYN_QUEUE_READ].ksynq_count;
2081 if ((flags & KW_UNLOCK_PREPOST_READLOCK) != 0) {
2082 curthreturns = 1;
2083 numneeded += 1;
2084 }
2085 }
2086
2087 updatebits += (numneeded << PTHRW_COUNT_SHIFT);
2088
2089 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2090
2091 if (curthreturns != 0) {
2092 block = 0;
2093 uth = current_uthread();
2094 kwe = pthread_kern->uthread_get_uukwe(uth);
2095 kwe->kwe_psynchretval = updatebits;
2096 }
2097
2098
2099 failed = ksyn_wakeupreaders(kwq, limitrdnum, allreaders, updatebits, &woken);
2100 if (failed != 0) {
2101 kwq->kw_pre_intrcount = failed; /* actually a count */
2102 kwq->kw_pre_intrseq = limitrdnum;
2103 kwq->kw_pre_intrretbits = updatebits;
2104 kwq->kw_pre_intrtype = PTH_RW_TYPE_READ;
2105 }
2106
2107 error = 0;
2108
2109 if ((kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) && ((updatebits & PTH_RWL_WBIT) == 0))
2110 panic("kwq_handle_unlock: writer pending but no writebit set %x\n", updatebits);
2111 }
2112 break;
2113
2114 case PTH_RW_TYPE_WRITE: {
2115
2116 /* only one thread is goin to be granted */
2117 updatebits |= (PTHRW_INC);
2118 updatebits |= PTH_RWL_KBIT| PTH_RWL_EBIT;
2119
2120 if (((flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) && (low_writer == premgen)) {
2121 block = 0;
2122 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count != 0) {
2123 updatebits |= PTH_RWL_WBIT;
2124 }
2125 th = preth;
2126 uth = pthread_kern->get_bsdthread_info(th);
2127 kwe = pthread_kern->uthread_get_uukwe(uth);
2128 kwe->kwe_psynchretval = updatebits;
2129 } else {
2130 /* we are not granting writelock to the preposting thread */
2131 /* if there are writers present or the preposting write thread then W bit is to be set */
2132 if (kwq->kw_ksynqueues[KSYN_QUEUE_WRITER].ksynq_count > 1 ||
2133 (flags & KW_UNLOCK_PREPOST_WRLOCK) != 0) {
2134 updatebits |= PTH_RWL_WBIT;
2135 }
2136 /* setup next in the queue */
2137 kret = ksyn_signal(kwq, KSYN_QUEUE_WRITER, NULL, updatebits);
2138 if (kret == KERN_NOT_WAITING) {
2139 kwq->kw_pre_intrcount = 1; /* actually a count */
2140 kwq->kw_pre_intrseq = low_writer;
2141 kwq->kw_pre_intrretbits = updatebits;
2142 kwq->kw_pre_intrtype = PTH_RW_TYPE_WRITE;
2143 }
2144 error = 0;
2145 }
2146 kwq->kw_nextseqword = (rw_wc & PTHRW_COUNT_MASK) + updatebits;
2147 if ((updatebits & (PTH_RWL_KBIT | PTH_RWL_EBIT)) != (PTH_RWL_KBIT | PTH_RWL_EBIT))
2148 panic("kwq_handle_unlock: writer lock granted but no ke set %x\n", updatebits);
2149 }
2150 break;
2151
2152 default:
2153 panic("rwunlock: invalid type for lock grants");
2154
2155 };
2156
2157 if (updatep != NULL)
2158 *updatep = updatebits;
2159 if (blockp != NULL)
2160 *blockp = block;
2161 return(error);
2162 }
2163
2164 /************* Indiv queue support routines ************************/
2165 void
2166 ksyn_queue_init(ksyn_queue_t kq)
2167 {
2168 TAILQ_INIT(&kq->ksynq_kwelist);
2169 kq->ksynq_count = 0;
2170 kq->ksynq_firstnum = 0;
2171 kq->ksynq_lastnum = 0;
2172 }
2173
2174 int
2175 ksyn_queue_insert(ksyn_wait_queue_t kwq, int kqi, ksyn_waitq_element_t kwe, uint32_t mgen, int fit)
2176 {
2177 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2178 uint32_t lockseq = mgen & PTHRW_COUNT_MASK;
2179 int res = 0;
2180
2181 if (kwe->kwe_kwqqueue != NULL) {
2182 panic("adding enqueued item to another queue");
2183 }
2184
2185 if (kq->ksynq_count == 0) {
2186 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2187 kq->ksynq_firstnum = lockseq;
2188 kq->ksynq_lastnum = lockseq;
2189 } else if (fit == FIRSTFIT) {
2190 /* TBD: if retry bit is set for mutex, add it to the head */
2191 /* firstfit, arriving order */
2192 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2193 if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2194 kq->ksynq_firstnum = lockseq;
2195 }
2196 if (is_seqhigher(lockseq, kq->ksynq_lastnum)) {
2197 kq->ksynq_lastnum = lockseq;
2198 }
2199 } else if (lockseq == kq->ksynq_firstnum || lockseq == kq->ksynq_lastnum) {
2200 /* During prepost when a thread is getting cancelled, we could have two with same seq */
2201 res = EBUSY;
2202 if (kwe->kwe_state == KWE_THREAD_PREPOST) {
2203 ksyn_waitq_element_t tmp = ksyn_queue_find_seq(kwq, kq, lockseq);
2204 if (tmp != NULL && tmp->kwe_uth != NULL && pthread_kern->uthread_is_cancelled(tmp->kwe_uth)) {
2205 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2206 res = 0;
2207 }
2208 }
2209 } else if (is_seqlower(kq->ksynq_lastnum, lockseq)) { // XXX is_seqhigher
2210 TAILQ_INSERT_TAIL(&kq->ksynq_kwelist, kwe, kwe_list);
2211 kq->ksynq_lastnum = lockseq;
2212 } else if (is_seqlower(lockseq, kq->ksynq_firstnum)) {
2213 TAILQ_INSERT_HEAD(&kq->ksynq_kwelist, kwe, kwe_list);
2214 kq->ksynq_firstnum = lockseq;
2215 } else {
2216 ksyn_waitq_element_t q_kwe, r_kwe;
2217
2218 res = ESRCH;
2219 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2220 if (is_seqhigher(q_kwe->kwe_lockseq, lockseq)) {
2221 TAILQ_INSERT_BEFORE(q_kwe, kwe, kwe_list);
2222 res = 0;
2223 break;
2224 }
2225 }
2226 }
2227
2228 if (res == 0) {
2229 kwe->kwe_kwqqueue = kwq;
2230 kq->ksynq_count++;
2231 kwq->kw_inqueue++;
2232 update_low_high(kwq, lockseq);
2233 }
2234 return res;
2235 }
2236
2237 void
2238 ksyn_queue_remove_item(ksyn_wait_queue_t kwq, ksyn_queue_t kq, ksyn_waitq_element_t kwe)
2239 {
2240 if (kq->ksynq_count == 0) {
2241 panic("removing item from empty queue");
2242 }
2243
2244 if (kwe->kwe_kwqqueue != kwq) {
2245 panic("removing item from wrong queue");
2246 }
2247
2248 TAILQ_REMOVE(&kq->ksynq_kwelist, kwe, kwe_list);
2249 kwe->kwe_list.tqe_next = NULL;
2250 kwe->kwe_list.tqe_prev = NULL;
2251 kwe->kwe_kwqqueue = NULL;
2252
2253 if (--kq->ksynq_count > 0) {
2254 ksyn_waitq_element_t tmp;
2255 tmp = TAILQ_FIRST(&kq->ksynq_kwelist);
2256 kq->ksynq_firstnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2257 tmp = TAILQ_LAST(&kq->ksynq_kwelist, ksynq_kwelist_head);
2258 kq->ksynq_lastnum = tmp->kwe_lockseq & PTHRW_COUNT_MASK;
2259 } else {
2260 kq->ksynq_firstnum = 0;
2261 kq->ksynq_lastnum = 0;
2262 }
2263
2264 if (--kwq->kw_inqueue > 0) {
2265 uint32_t curseq = kwe->kwe_lockseq & PTHRW_COUNT_MASK;
2266 if (kwq->kw_lowseq == curseq) {
2267 kwq->kw_lowseq = find_nextlowseq(kwq);
2268 }
2269 if (kwq->kw_highseq == curseq) {
2270 kwq->kw_highseq = find_nexthighseq(kwq);
2271 }
2272 } else {
2273 kwq->kw_lowseq = 0;
2274 kwq->kw_highseq = 0;
2275 }
2276 }
2277
2278 ksyn_waitq_element_t
2279 ksyn_queue_find_seq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t seq)
2280 {
2281 ksyn_waitq_element_t kwe;
2282
2283 // XXX: should stop searching when higher sequence number is seen
2284 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2285 if ((kwe->kwe_lockseq & PTHRW_COUNT_MASK) == seq) {
2286 return kwe;
2287 }
2288 }
2289 return NULL;
2290 }
2291
2292 /* find the thread at the target sequence (or a broadcast/prepost at or above) */
2293 ksyn_waitq_element_t
2294 ksyn_queue_find_cvpreposeq(ksyn_queue_t kq, uint32_t cgen)
2295 {
2296 ksyn_waitq_element_t result = NULL;
2297 ksyn_waitq_element_t kwe;
2298 uint32_t lgen = (cgen & PTHRW_COUNT_MASK);
2299
2300 TAILQ_FOREACH(kwe, &kq->ksynq_kwelist, kwe_list) {
2301 if (is_seqhigher_eq(kwe->kwe_lockseq, cgen)) {
2302 result = kwe;
2303
2304 // KWE_THREAD_INWAIT must be strictly equal
2305 if (kwe->kwe_state == KWE_THREAD_INWAIT && (kwe->kwe_lockseq & PTHRW_COUNT_MASK) != lgen) {
2306 result = NULL;
2307 }
2308 break;
2309 }
2310 }
2311 return result;
2312 }
2313
2314 /* look for a thread at lockseq, a */
2315 ksyn_waitq_element_t
2316 ksyn_queue_find_signalseq(__unused ksyn_wait_queue_t kwq, ksyn_queue_t kq, uint32_t uptoseq, uint32_t signalseq)
2317 {
2318 ksyn_waitq_element_t result = NULL;
2319 ksyn_waitq_element_t q_kwe, r_kwe;
2320
2321 // XXX
2322 /* case where wrap in the tail of the queue exists */
2323 TAILQ_FOREACH_SAFE(q_kwe, &kq->ksynq_kwelist, kwe_list, r_kwe) {
2324 if (q_kwe->kwe_state == KWE_THREAD_PREPOST) {
2325 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2326 return result;
2327 }
2328 }
2329 if (q_kwe->kwe_state == KWE_THREAD_PREPOST || q_kwe->kwe_state == KWE_THREAD_BROADCAST) {
2330 /* match any prepost at our same uptoseq or any broadcast above */
2331 if (is_seqlower(q_kwe->kwe_lockseq, uptoseq)) {
2332 continue;
2333 }
2334 return q_kwe;
2335 } else if (q_kwe->kwe_state == KWE_THREAD_INWAIT) {
2336 /*
2337 * Match any (non-cancelled) thread at or below our upto sequence -
2338 * but prefer an exact match to our signal sequence (if present) to
2339 * keep exact matches happening.
2340 */
2341 if (is_seqhigher(q_kwe->kwe_lockseq, uptoseq)) {
2342 return result;
2343 }
2344 if (q_kwe->kwe_kwqqueue == kwq) {
2345 if (!pthread_kern->uthread_is_cancelled(q_kwe->kwe_uth)) {
2346 /* if equal or higher than our signal sequence, return this one */
2347 if (is_seqhigher_eq(q_kwe->kwe_lockseq, signalseq)) {
2348 return q_kwe;
2349 }
2350
2351 /* otherwise, just remember this eligible thread and move on */
2352 if (result == NULL) {
2353 result = q_kwe;
2354 }
2355 }
2356 }
2357 } else {
2358 panic("ksyn_queue_find_signalseq(): unknown wait queue element type (%d)\n", q_kwe->kwe_state);
2359 }
2360 }
2361 return result;
2362 }
2363
2364 void
2365 ksyn_queue_free_items(ksyn_wait_queue_t kwq, int kqi, uint32_t upto, int all)
2366 {
2367 ksyn_waitq_element_t kwe;
2368 uint32_t tseq = upto & PTHRW_COUNT_MASK;
2369 ksyn_queue_t kq = &kwq->kw_ksynqueues[kqi];
2370
2371 while ((kwe = TAILQ_FIRST(&kq->ksynq_kwelist)) != NULL) {
2372 if (all == 0 && is_seqhigher(kwe->kwe_lockseq, tseq)) {
2373 break;
2374 }
2375 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2376 /*
2377 * This scenario is typically noticed when the cvar is
2378 * reinited and the new waiters are waiting. We can
2379 * return them as spurious wait so the cvar state gets
2380 * reset correctly.
2381 */
2382
2383 /* skip canceled ones */
2384 /* wake the rest */
2385 /* set M bit to indicate to waking CV to retun Inc val */
2386 (void)ksyn_signal(kwq, kqi, kwe, PTHRW_INC | PTH_RWS_CV_MBIT | PTH_RWL_MTX_WAIT);
2387 } else {
2388 ksyn_queue_remove_item(kwq, kq, kwe);
2389 pthread_kern->zfree(kwe_zone, kwe);
2390 kwq->kw_fakecount--;
2391 }
2392 }
2393 }
2394
2395 /*************************************************************************/
2396
2397 void
2398 update_low_high(ksyn_wait_queue_t kwq, uint32_t lockseq)
2399 {
2400 if (kwq->kw_inqueue == 1) {
2401 kwq->kw_lowseq = lockseq;
2402 kwq->kw_highseq = lockseq;
2403 } else {
2404 if (is_seqlower(lockseq, kwq->kw_lowseq)) {
2405 kwq->kw_lowseq = lockseq;
2406 }
2407 if (is_seqhigher(lockseq, kwq->kw_highseq)) {
2408 kwq->kw_highseq = lockseq;
2409 }
2410 }
2411 }
2412
2413 uint32_t
2414 find_nextlowseq(ksyn_wait_queue_t kwq)
2415 {
2416 uint32_t lowest = 0;
2417 int first = 1;
2418 int i;
2419
2420 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2421 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2422 uint32_t current = kwq->kw_ksynqueues[i].ksynq_firstnum;
2423 if (first || is_seqlower(current, lowest)) {
2424 lowest = current;
2425 first = 0;
2426 }
2427 }
2428 }
2429
2430 return lowest;
2431 }
2432
2433 uint32_t
2434 find_nexthighseq(ksyn_wait_queue_t kwq)
2435 {
2436 uint32_t highest = 0;
2437 int first = 1;
2438 int i;
2439
2440 for (i = 0; i < KSYN_QUEUE_MAX; i++) {
2441 if (kwq->kw_ksynqueues[i].ksynq_count > 0) {
2442 uint32_t current = kwq->kw_ksynqueues[i].ksynq_lastnum;
2443 if (first || is_seqhigher(current, highest)) {
2444 highest = current;
2445 first = 0;
2446 }
2447 }
2448 }
2449
2450 return highest;
2451 }
2452
2453 int
2454 find_seq_till(ksyn_wait_queue_t kwq, uint32_t upto, uint32_t nwaiters, uint32_t *countp)
2455 {
2456 int i;
2457 uint32_t count = 0;
2458
2459 for (i = 0; i< KSYN_QUEUE_MAX; i++) {
2460 count += ksyn_queue_count_tolowest(&kwq->kw_ksynqueues[i], upto);
2461 if (count >= nwaiters) {
2462 break;
2463 }
2464 }
2465
2466 if (countp != NULL) {
2467 *countp = count;
2468 }
2469
2470 if (count == 0) {
2471 return 0;
2472 } else if (count >= nwaiters) {
2473 return 1;
2474 } else {
2475 return 0;
2476 }
2477 }
2478
2479
2480 uint32_t
2481 ksyn_queue_count_tolowest(ksyn_queue_t kq, uint32_t upto)
2482 {
2483 uint32_t i = 0;
2484 ksyn_waitq_element_t kwe, newkwe;
2485
2486 if (kq->ksynq_count == 0 || is_seqhigher(kq->ksynq_firstnum, upto)) {
2487 return 0;
2488 }
2489 if (upto == kq->ksynq_firstnum) {
2490 return 1;
2491 }
2492 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2493 uint32_t curval = (kwe->kwe_lockseq & PTHRW_COUNT_MASK);
2494 if (is_seqhigher(curval, upto)) {
2495 break;
2496 }
2497 ++i;
2498 if (upto == curval) {
2499 break;
2500 }
2501 }
2502 return i;
2503 }
2504
2505 /* handles the cond broadcast of cvar and returns number of woken threads and bits for syscall return */
2506 void
2507 ksyn_handle_cvbroad(ksyn_wait_queue_t ckwq, uint32_t upto, uint32_t *updatep)
2508 {
2509 ksyn_waitq_element_t kwe, newkwe;
2510 uint32_t updatebits = 0;
2511 ksyn_queue_t kq = &ckwq->kw_ksynqueues[KSYN_QUEUE_WRITER];
2512
2513 struct ksyn_queue kfreeq;
2514 ksyn_queue_init(&kfreeq);
2515
2516 retry:
2517 TAILQ_FOREACH_SAFE(kwe, &kq->ksynq_kwelist, kwe_list, newkwe) {
2518 if (is_seqhigher(kwe->kwe_lockseq, upto)) {
2519 // outside our range
2520 break;
2521 }
2522
2523 if (kwe->kwe_state == KWE_THREAD_INWAIT) {
2524 // Wake only non-canceled threads waiting on this CV.
2525 if (!pthread_kern->uthread_is_cancelled(kwe->kwe_uth)) {
2526 (void)ksyn_signal(ckwq, KSYN_QUEUE_WRITER, kwe, PTH_RWL_MTX_WAIT);
2527 updatebits += PTHRW_INC;
2528 }
2529 } else if (kwe->kwe_state == KWE_THREAD_BROADCAST ||
2530 kwe->kwe_state == KWE_THREAD_PREPOST) {
2531 ksyn_queue_remove_item(ckwq, kq, kwe);
2532 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2533 ckwq->kw_fakecount--;
2534 } else {
2535 panic("unknown kwe state\n");
2536 }
2537 }
2538
2539 /* Need to enter a broadcast in the queue (if not already at L == S) */
2540
2541 if (diff_genseq(ckwq->kw_lword, ckwq->kw_sword)) {
2542 newkwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist);
2543 if (newkwe == NULL) {
2544 ksyn_wqunlock(ckwq);
2545 newkwe = (ksyn_waitq_element_t)pthread_kern->zalloc(kwe_zone);
2546 TAILQ_INSERT_TAIL(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2547 ksyn_wqlock(ckwq);
2548 goto retry;
2549 } else {
2550 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, newkwe, kwe_list);
2551 ksyn_prepost(ckwq, newkwe, KWE_THREAD_BROADCAST, upto);
2552 }
2553 }
2554
2555 // free up any remaining things stumbled across above
2556 while ((kwe = TAILQ_FIRST(&kfreeq.ksynq_kwelist)) != NULL) {
2557 TAILQ_REMOVE(&kfreeq.ksynq_kwelist, kwe, kwe_list);
2558 pthread_kern->zfree(kwe_zone, kwe);
2559 }
2560
2561 if (updatep != NULL) {
2562 *updatep = updatebits;
2563 }
2564 }
2565
2566 void
2567 ksyn_cvupdate_fixup(ksyn_wait_queue_t ckwq, uint32_t *updatebits)
2568 {
2569 if ((ckwq->kw_lword & PTHRW_COUNT_MASK) == (ckwq->kw_sword & PTHRW_COUNT_MASK)) {
2570 if (ckwq->kw_inqueue != 0) {
2571 /* FREE THE QUEUE */
2572 ksyn_queue_free_items(ckwq, KSYN_QUEUE_WRITER, ckwq->kw_lword, 0);
2573 #if __TESTPANICS__
2574 if (ckwq->kw_inqueue != 0)
2575 panic("ksyn_cvupdate_fixup: L == S, but entries in queue beyond S");
2576 #endif /* __TESTPANICS__ */
2577 }
2578 ckwq->kw_lword = ckwq->kw_uword = ckwq->kw_sword = 0;
2579 ckwq->kw_kflags |= KSYN_KWF_ZEROEDOUT;
2580 *updatebits |= PTH_RWS_CV_CBIT;
2581 } else if (ckwq->kw_inqueue != 0 && ckwq->kw_fakecount == ckwq->kw_inqueue) {
2582 // only fake entries are present in the queue
2583 *updatebits |= PTH_RWS_CV_PBIT;
2584 }
2585 }
2586
2587 void
2588 psynch_zoneinit(void)
2589 {
2590 kwq_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_wait_queue), 8192 * sizeof(struct ksyn_wait_queue), 4096, "ksyn_wait_queue");
2591 kwe_zone = (zone_t)pthread_kern->zinit(sizeof(struct ksyn_waitq_element), 8192 * sizeof(struct ksyn_waitq_element), 4096, "ksyn_waitq_element");
2592 }