X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..bd504ef0e0b883cdd7917b73b3574eb9ce669905:/osfmk/kern/wait_queue.c diff --git a/osfmk/kern/wait_queue.c b/osfmk/kern/wait_queue.c index 187ac8b13..6bcabf8c1 100644 --- a/osfmk/kern/wait_queue.c +++ b/osfmk/kern/wait_queue.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -67,25 +67,150 @@ #include #include -#include +#include #include #include #include +#include #include #include +#include /* forward declarations */ static boolean_t wait_queue_member_locked( wait_queue_t wq, wait_queue_set_t wq_set); -void wait_queue_unlink_one( - wait_queue_t wq, - wait_queue_set_t *wq_setp); +static void wait_queues_init(void) __attribute__((section("__TEXT, initcode"))); + +#define WAIT_QUEUE_MAX thread_max +#define WAIT_QUEUE_SET_MAX task_max * 3 +#define WAIT_QUEUE_LINK_MAX PORT_MAX / 2 + (WAIT_QUEUE_MAX * WAIT_QUEUE_SET_MAX) / 64 + +static zone_t _wait_queue_link_zone; +static zone_t _wait_queue_set_zone; +static zone_t _wait_queue_zone; + +/* see rdar://6737748&5561610; we need an unshadowed + * definition of a WaitQueueLink for debugging, + * but it needs to be used somewhere to wind up in + * the dSYM file. */ +volatile WaitQueueLink *unused_except_for_debugging; -kern_return_t wait_queue_set_unlink_all_nofree( - wait_queue_set_t wq_set); + +/* + * Waiting protocols and implementation: + * + * Each thread may be waiting for exactly one event; this event + * is set using assert_wait(). That thread may be awakened either + * by performing a thread_wakeup_prim() on its event, + * or by directly waking that thread up with clear_wait(). + * + * The implementation of wait events uses a hash table. Each + * bucket is queue of threads having the same hash function + * value; the chain for the queue (linked list) is the run queue + * field. [It is not possible to be waiting and runnable at the + * same time.] + * + * Locks on both the thread and on the hash buckets govern the + * wait event field and the queue chain field. Because wakeup + * operations only have the event as an argument, the event hash + * bucket must be locked before any thread. + * + * Scheduling operations may also occur at interrupt level; therefore, + * interrupts below splsched() must be prevented when holding + * thread or hash bucket locks. + * + * The wait event hash table declarations are as follows: + */ + +struct wait_queue boot_wait_queue[1]; +__private_extern__ struct wait_queue *wait_queues = &boot_wait_queue[0]; +__private_extern__ uint32_t num_wait_queues = 1; + +#define P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align))) +#define ROUNDDOWN(x,y) (((x)/(y))*(y)) + +static uint32_t +compute_wait_hash_size(void) +{ + uint32_t hsize, queues; + + if (PE_parse_boot_argn("wqsize", &hsize, sizeof(hsize))) + return (hsize); + + queues = thread_max / 11; + hsize = P2ROUNDUP(queues * sizeof(struct wait_queue), PAGE_SIZE); + + return hsize; +} + +static void +wait_queues_init(void) +{ + uint32_t i, whsize, qsz; + kern_return_t kret; + + /* + * Determine the amount of memory we're willing to reserve for + * the waitqueue hash table + */ + whsize = compute_wait_hash_size(); + + /* Determine the number of waitqueues we can fit. */ + qsz = sizeof (struct wait_queue); + whsize = ROUNDDOWN(whsize, qsz); + num_wait_queues = whsize / qsz; + + /* + * The hash algorithm requires that this be a power of 2, so we + * just mask off all the low-order bits. + */ + for (i = 0; i < 31; i++) { + uint32_t bit = (1 << i); + if ((num_wait_queues & bit) == num_wait_queues) + break; + num_wait_queues &= ~bit; + } + assert(num_wait_queues > 0); + + /* Now determine how much memory we really need. */ + whsize = P2ROUNDUP(num_wait_queues * qsz, PAGE_SIZE); + + kret = kernel_memory_allocate(kernel_map, (vm_offset_t *) &wait_queues, + whsize, 0, KMA_KOBJECT|KMA_NOPAGEWAIT); + + if (kret != KERN_SUCCESS || wait_queues == NULL) + panic("kernel_memory_allocate() failed to allocate wait queues, error: %d, whsize: 0x%x", kret, whsize); + + for (i = 0; i < num_wait_queues; i++) { + wait_queue_init(&wait_queues[i], SYNC_POLICY_FIFO); + } +} + +void +wait_queue_bootstrap(void) +{ + wait_queues_init(); + _wait_queue_zone = zinit(sizeof(struct wait_queue), + WAIT_QUEUE_MAX * sizeof(struct wait_queue), + sizeof(struct wait_queue), + "wait queues"); + zone_change(_wait_queue_zone, Z_NOENCRYPT, TRUE); + + _wait_queue_set_zone = zinit(sizeof(struct wait_queue_set), + WAIT_QUEUE_SET_MAX * sizeof(struct wait_queue_set), + sizeof(struct wait_queue_set), + "wait queue sets"); + zone_change(_wait_queue_set_zone, Z_NOENCRYPT, TRUE); + + _wait_queue_link_zone = zinit(sizeof(struct _wait_queue_link), + WAIT_QUEUE_LINK_MAX * sizeof(struct _wait_queue_link), + sizeof(struct _wait_queue_link), + "wait queue links"); + zone_change(_wait_queue_link_zone, Z_NOENCRYPT, TRUE); +} /* * Routine: wait_queue_init @@ -100,10 +225,11 @@ wait_queue_init( wait_queue_t wq, int policy) { - if (!((policy & SYNC_POLICY_ORDER_MASK) == SYNC_POLICY_FIFO)) + /* only FIFO and LIFO for now */ + if ((policy & SYNC_POLICY_FIXED_PRIORITY) != 0) return KERN_INVALID_ARGUMENT; - wq->wq_fifo = TRUE; + wq->wq_fifo = ((policy & SYNC_POLICY_REVERSED) == 0); wq->wq_type = _WAIT_QUEUE_inited; queue_init(&wq->wq_queue); hw_lock_init(&wq->wq_interlock); @@ -128,11 +254,11 @@ wait_queue_alloc( wait_queue_t wq; kern_return_t ret; - wq = (wait_queue_t) kalloc(sizeof(struct wait_queue)); + wq = (wait_queue_t) zalloc(_wait_queue_zone); if (wq != WAIT_QUEUE_NULL) { ret = wait_queue_init(wq, policy); if (ret != KERN_SUCCESS) { - kfree(wq, sizeof(struct wait_queue)); + zfree(_wait_queue_zone, wq); wq = WAIT_QUEUE_NULL; } } @@ -154,7 +280,7 @@ wait_queue_free( return KERN_INVALID_ARGUMENT; if (!queue_empty(&wq->wq_queue)) return KERN_FAILURE; - kfree(wq, sizeof(struct wait_queue)); + zfree(_wait_queue_zone, wq); return KERN_SUCCESS; } @@ -179,11 +305,11 @@ wait_queue_set_init( wqset->wqs_wait_queue.wq_type = _WAIT_QUEUE_SET_inited; if (policy & SYNC_POLICY_PREPOST) - wqset->wqs_wait_queue.wq_isprepost = TRUE; + wqset->wqs_wait_queue.wq_prepost = TRUE; else - wqset->wqs_wait_queue.wq_isprepost = FALSE; + wqset->wqs_wait_queue.wq_prepost = FALSE; queue_init(&wqset->wqs_setlinks); - wqset->wqs_refcount = 0; + queue_init(&wqset->wqs_preposts); return KERN_SUCCESS; } @@ -200,12 +326,22 @@ kern_return_t wait_queue_sub_clearrefs( wait_queue_set_t wq_set) { + wait_queue_link_t wql; + queue_t q; + spl_t s; + if (!wait_queue_is_set(wq_set)) return KERN_INVALID_ARGUMENT; + s = splsched(); wqs_lock(wq_set); - wq_set->wqs_refcount = 0; + q = &wq_set->wqs_preposts; + while (!queue_empty(q)) { + queue_remove_first(q, wql, wait_queue_link_t, wql_preposts); + assert(!wql_is_preposted(wql)); + } wqs_unlock(wq_set); + splx(s); return KERN_SUCCESS; } @@ -226,13 +362,13 @@ wait_queue_set_alloc( { wait_queue_set_t wq_set; - wq_set = (wait_queue_set_t) kalloc(sizeof(struct wait_queue_set)); + wq_set = (wait_queue_set_t) zalloc(_wait_queue_set_zone); if (wq_set != WAIT_QUEUE_SET_NULL) { kern_return_t ret; ret = wait_queue_set_init(wq_set, policy); if (ret != KERN_SUCCESS) { - kfree(wq_set, sizeof(struct wait_queue_set)); + zfree(_wait_queue_set_zone, wq_set); wq_set = WAIT_QUEUE_SET_NULL; } } @@ -256,7 +392,7 @@ wait_queue_set_free( if (!queue_empty(&wq_set->wqs_wait_queue.wq_queue)) return KERN_FAILURE; - kfree(wq_set, sizeof(struct wait_queue_set)); + zfree(_wait_queue_set_zone, wq_set); return KERN_SUCCESS; } @@ -273,9 +409,11 @@ unsigned int wait_queue_link_size(void) { return sizeof(WaitQueueLink); } /* declare a unique type for wait queue link structures */ static unsigned int _wait_queue_link; +static unsigned int _wait_queue_link_noalloc; static unsigned int _wait_queue_unlinked; #define WAIT_QUEUE_LINK ((void *)&_wait_queue_link) +#define WAIT_QUEUE_LINK_NOALLOC ((void *)&_wait_queue_link_noalloc) #define WAIT_QUEUE_UNLINKED ((void *)&_wait_queue_unlinked) #define WAIT_QUEUE_ELEMENT_CHECK(wq, wqe) \ @@ -293,12 +431,14 @@ static unsigned int _wait_queue_unlinked; (queue_t)(wql) : &(wql)->wql_setlinks))) #define WAIT_QUEUE_SET_LINK_CHECK(wqs, wql) \ - WQASSERT((((wql)->wql_type == WAIT_QUEUE_LINK) && \ + WQASSERT(((((wql)->wql_type == WAIT_QUEUE_LINK) || \ + ((wql)->wql_type == WAIT_QUEUE_LINK_NOALLOC)) && \ ((wql)->wql_setqueue == (wqs)) && \ - ((wql)->wql_queue->wq_type == _WAIT_QUEUE_inited) && \ + (((wql)->wql_queue->wq_type == _WAIT_QUEUE_inited) || \ + ((wql)->wql_queue->wq_type == _WAIT_QUEUE_SET_inited)) && \ (WQSNEXT((wqs), WQSPREV((wqs),(wql))) == (wql))), \ "wait queue set links corruption: wqs=%#x, wql=%#x", \ - (wqs), (wql)) + (wqs), (wql)) #if defined(_WAIT_QUEUE_DEBUG_) @@ -357,7 +497,8 @@ wait_queue_member_locked( wq_element = (wait_queue_element_t) queue_first(q); while (!queue_end(q, (queue_entry_t)wq_element)) { WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); - if ((wq_element->wqe_type == WAIT_QUEUE_LINK)) { + if ((wq_element->wqe_type == WAIT_QUEUE_LINK) || + (wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC)) { wait_queue_link_t wql = (wait_queue_link_t)wq_element; if (wql->wql_setqueue == wq_set) @@ -399,16 +540,18 @@ wait_queue_member( /* - * Routine: wait_queue_link_noalloc + * Routine: wait_queue_link_internal * Purpose: * Insert a set wait queue into a wait queue. This * requires us to link the two together using a wait_queue_link - * structure that we allocate. + * structure that was provided. * Conditions: * The wait queue being inserted must be inited as a set queue + * The wait_queue_link structure must already be properly typed */ +static kern_return_t -wait_queue_link_noalloc( +wait_queue_link_internal( wait_queue_t wq, wait_queue_set_t wq_set, wait_queue_link_t wql) @@ -417,13 +560,13 @@ wait_queue_link_noalloc( queue_t q; spl_t s; - if (!wait_queue_is_queue(wq) || !wait_queue_is_set(wq_set)) + if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) return KERN_INVALID_ARGUMENT; /* - * There are probably less threads and sets associated with - * the wait queue, then there are wait queues associated with - * the set. So lets validate it that way. + * There are probably fewer threads and sets associated with + * the wait queue than there are wait queues associated with + * the set. So let's validate it that way. */ s = splsched(); wait_queue_lock(wq); @@ -431,7 +574,8 @@ wait_queue_link_noalloc( wq_element = (wait_queue_element_t) queue_first(q); while (!queue_end(q, (queue_entry_t)wq_element)) { WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); - if (wq_element->wqe_type == WAIT_QUEUE_LINK && + if ((wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) && ((wait_queue_link_t)wq_element)->wql_setqueue == wq_set) { wait_queue_unlock(wq); splx(s); @@ -448,11 +592,14 @@ wait_queue_link_noalloc( WAIT_QUEUE_SET_CHECK(wq_set); + assert(wql->wql_type == WAIT_QUEUE_LINK || + wql->wql_type == WAIT_QUEUE_LINK_NOALLOC); + wql->wql_queue = wq; + wql_clear_prepost(wql); queue_enter(&wq->wq_queue, wql, wait_queue_link_t, wql_links); wql->wql_setqueue = wq_set; queue_enter(&wq_set->wqs_setlinks, wql, wait_queue_link_t, wql_setlinks); - wql->wql_type = WAIT_QUEUE_LINK; wqs_unlock(wq_set); wait_queue_unlock(wq); @@ -461,6 +608,25 @@ wait_queue_link_noalloc( return KERN_SUCCESS; } +/* + * Routine: wait_queue_link_noalloc + * Purpose: + * Insert a set wait queue into a wait queue. This + * requires us to link the two together using a wait_queue_link + * structure that we allocate. + * Conditions: + * The wait queue being inserted must be inited as a set queue + */ +kern_return_t +wait_queue_link_noalloc( + wait_queue_t wq, + wait_queue_set_t wq_set, + wait_queue_link_t wql) +{ + wql->wql_type = WAIT_QUEUE_LINK_NOALLOC; + return wait_queue_link_internal(wq, wq_set, wql); +} + /* * Routine: wait_queue_link * Purpose: @@ -478,20 +644,40 @@ wait_queue_link( wait_queue_link_t wql; kern_return_t ret; - wql = (wait_queue_link_t) kalloc(sizeof(struct _wait_queue_link)); + wql = (wait_queue_link_t) zalloc(_wait_queue_link_zone); if (wql == WAIT_QUEUE_LINK_NULL) return KERN_RESOURCE_SHORTAGE; - ret = wait_queue_link_noalloc(wq, wq_set, wql); + wql->wql_type = WAIT_QUEUE_LINK; + ret = wait_queue_link_internal(wq, wq_set, wql); if (ret != KERN_SUCCESS) - kfree(wql, sizeof(struct _wait_queue_link)); + zfree(_wait_queue_link_zone, wql); return ret; } +wait_queue_link_t +wait_queue_link_allocate(void) +{ + wait_queue_link_t wql; + + wql = zalloc(_wait_queue_link_zone); /* Can't fail */ + bzero(wql, sizeof(*wql)); + wql->wql_type = WAIT_QUEUE_UNLINKED; + + return wql; +} + +kern_return_t +wait_queue_link_free(wait_queue_link_t wql) +{ + zfree(_wait_queue_link_zone, wql); + return KERN_SUCCESS; +} + /* - * Routine: wait_queue_unlink_nofree + * Routine: wait_queue_unlink_locked * Purpose: * Undo the linkage between a wait queue and a set. */ @@ -508,6 +694,10 @@ wait_queue_unlink_locked( queue_remove(&wq->wq_queue, wql, wait_queue_link_t, wql_links); wql->wql_setqueue = WAIT_QUEUE_SET_NULL; queue_remove(&wq_set->wqs_setlinks, wql, wait_queue_link_t, wql_setlinks); + if (wql_is_preposted(wql)) { + queue_t ppq = &wq_set->wqs_preposts; + queue_remove(ppq, wql, wait_queue_link_t, wql_preposts); + } wql->wql_type = WAIT_QUEUE_UNLINKED; WAIT_QUEUE_CHECK(wq); @@ -515,24 +705,26 @@ wait_queue_unlink_locked( } /* - * Routine: wait_queue_unlink + * Routine: wait_queue_unlink_nofree * Purpose: * Remove the linkage between a wait queue and a set, - * freeing the linkage structure. + * returning the linkage structure to the caller to + * free later. * Conditions: * The wait queue being must be a member set queue */ kern_return_t -wait_queue_unlink( +wait_queue_unlink_nofree( wait_queue_t wq, - wait_queue_set_t wq_set) + wait_queue_set_t wq_set, + wait_queue_link_t *wqlp) { wait_queue_element_t wq_element; wait_queue_link_t wql; queue_t q; spl_t s; - if (!wait_queue_is_queue(wq) || !wait_queue_is_set(wq_set)) { + if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) { return KERN_INVALID_ARGUMENT; } s = splsched(); @@ -542,16 +734,19 @@ wait_queue_unlink( wq_element = (wait_queue_element_t) queue_first(q); while (!queue_end(q, (queue_entry_t)wq_element)) { WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + if (wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { + wql = (wait_queue_link_t)wq_element; if (wql->wql_setqueue == wq_set) { + wqs_lock(wq_set); wait_queue_unlink_locked(wq, wq_set, wql); wqs_unlock(wq_set); wait_queue_unlock(wq); splx(s); - kfree(wql, sizeof(struct _wait_queue_link)); + *wqlp = wql; return KERN_SUCCESS; } } @@ -563,133 +758,242 @@ wait_queue_unlink( return KERN_NOT_IN_SET; } - /* - * Routine: wait_queue_unlinkall_nofree + * Routine: wait_queue_unlink * Purpose: - * Remove the linkage between a wait queue and all its - * sets. The caller is responsible for freeing - * the wait queue link structures. + * Remove the linkage between a wait queue and a set, + * freeing the linkage structure. + * Conditions: + * The wait queue being must be a member set queue */ - kern_return_t -wait_queue_unlinkall_nofree( - wait_queue_t wq) +wait_queue_unlink( + wait_queue_t wq, + wait_queue_set_t wq_set) { wait_queue_element_t wq_element; - wait_queue_element_t wq_next_element; - wait_queue_set_t wq_set; wait_queue_link_t wql; - queue_head_t links_queue_head; - queue_t links = &links_queue_head; queue_t q; spl_t s; - if (!wait_queue_is_queue(wq)) { + if (!wait_queue_is_valid(wq) || !wait_queue_is_set(wq_set)) { return KERN_INVALID_ARGUMENT; } - - queue_init(links); - s = splsched(); wait_queue_lock(wq); + q = &wq->wq_queue; + wq_element = (wait_queue_element_t) queue_first(q); + while (!queue_end(q, (queue_entry_t)wq_element)) { + WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); + if (wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { + + wql = (wait_queue_link_t)wq_element; + + if (wql->wql_setqueue == wq_set) { + boolean_t alloced; + + alloced = (wql->wql_type == WAIT_QUEUE_LINK); + wqs_lock(wq_set); + wait_queue_unlink_locked(wq, wq_set, wql); + wqs_unlock(wq_set); + wait_queue_unlock(wq); + splx(s); + if (alloced) + zfree(_wait_queue_link_zone, wql); + return KERN_SUCCESS; + } + } + wq_element = (wait_queue_element_t) + queue_next((queue_t) wq_element); + } + wait_queue_unlock(wq); + splx(s); + return KERN_NOT_IN_SET; +} + +/* + * Routine: wait_queue_unlink_all_nofree_locked + * Purpose: + * Remove the linkage between a wait queue and all its sets. + * All the linkage structures are returned to the caller for + * later freeing. + * Conditions: + * Wait queue locked. + */ + +static void +wait_queue_unlink_all_nofree_locked( + wait_queue_t wq, + queue_t links) +{ + wait_queue_element_t wq_element; + wait_queue_element_t wq_next_element; + wait_queue_set_t wq_set; + wait_queue_link_t wql; + queue_t q; + q = &wq->wq_queue; wq_element = (wait_queue_element_t) queue_first(q); while (!queue_end(q, (queue_entry_t)wq_element)) { + WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); wq_next_element = (wait_queue_element_t) queue_next((queue_t) wq_element); - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + if (wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { wql = (wait_queue_link_t)wq_element; wq_set = wql->wql_setqueue; wqs_lock(wq_set); wait_queue_unlink_locked(wq, wq_set, wql); wqs_unlock(wq_set); + enqueue(links, &wql->wql_links); } wq_element = wq_next_element; } - wait_queue_unlock(wq); - splx(s); - return(KERN_SUCCESS); } - /* - * Routine: wait_queue_unlink_all + * Routine: wait_queue_unlink_all_nofree * Purpose: - * Remove the linkage between a wait queue and all its sets. - * All the linkage structures are freed. + * Remove the linkage between a wait queue and all its sets. + * All the linkage structures are returned to the caller for + * later freeing. * Conditions: * Nothing of interest locked. */ kern_return_t -wait_queue_unlink_all( - wait_queue_t wq) +wait_queue_unlink_all_nofree( + wait_queue_t wq, + queue_t links) { - wait_queue_element_t wq_element; - wait_queue_element_t wq_next_element; - wait_queue_set_t wq_set; - wait_queue_link_t wql; - queue_head_t links_queue_head; - queue_t links = &links_queue_head; - queue_t q; spl_t s; - if (!wait_queue_is_queue(wq)) { + if (!wait_queue_is_valid(wq)) { return KERN_INVALID_ARGUMENT; } - queue_init(links); - s = splsched(); wait_queue_lock(wq); + wait_queue_unlink_all_nofree_locked(wq, links); + wait_queue_unlock(wq); + splx(s); + + return(KERN_SUCCESS); +} + +/* + * Routine: wait_queue_unlink_all_locked + * Purpose: + * Remove the linkage between a locked wait queue and all its + * sets and enqueue the allocated ones onto the links queue + * provided. + * Conditions: + * Wait queue locked. + */ +static void +wait_queue_unlink_all_locked( + wait_queue_t wq, + queue_t links) +{ + wait_queue_element_t wq_element; + wait_queue_element_t wq_next_element; + wait_queue_set_t wq_set; + wait_queue_link_t wql; + queue_t q; q = &wq->wq_queue; wq_element = (wait_queue_element_t) queue_first(q); while (!queue_end(q, (queue_entry_t)wq_element)) { + boolean_t alloced; + WAIT_QUEUE_ELEMENT_CHECK(wq, wq_element); wq_next_element = (wait_queue_element_t) queue_next((queue_t) wq_element); - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + alloced = (wq_element->wqe_type == WAIT_QUEUE_LINK); + if (alloced || wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { wql = (wait_queue_link_t)wq_element; wq_set = wql->wql_setqueue; wqs_lock(wq_set); wait_queue_unlink_locked(wq, wq_set, wql); wqs_unlock(wq_set); - enqueue(links, &wql->wql_links); + if (alloced) + enqueue(links, &wql->wql_links); } wq_element = wq_next_element; } + +} + + +/* + * Routine: wait_queue_unlink_all + * Purpose: + * Remove the linkage between a wait queue and all its sets. + * All the linkage structures that were allocated internally + * are freed. The others are the caller's responsibility. + * Conditions: + * Nothing of interest locked. + */ + +kern_return_t +wait_queue_unlink_all( + wait_queue_t wq) +{ + wait_queue_link_t wql; + queue_head_t links_queue_head; + queue_t links = &links_queue_head; + spl_t s; + + if (!wait_queue_is_valid(wq)) { + return KERN_INVALID_ARGUMENT; + } + + queue_init(links); + + s = splsched(); + wait_queue_lock(wq); + wait_queue_unlink_all_locked(wq, links); wait_queue_unlock(wq); splx(s); while(!queue_empty(links)) { wql = (wait_queue_link_t) dequeue(links); - kfree(wql, sizeof(struct _wait_queue_link)); + zfree(_wait_queue_link_zone, wql); } return(KERN_SUCCESS); } +/* legacy interface naming */ +kern_return_t +wait_subqueue_unlink_all( + wait_queue_set_t wq_set) +{ + return wait_queue_set_unlink_all(wq_set); +} + + /* * Routine: wait_queue_set_unlink_all_nofree * Purpose: * Remove the linkage between a set wait queue and all its - * member wait queues. The link structures are not freed, nor - * returned. It is the caller's responsibility to track and free - * them. + * member wait queues and all the sets it may be a member of. + * The links structures are returned for later freeing by the + * caller. * Conditions: - * The wait queue being must be a member set queue + * The wait queue must be a set */ kern_return_t wait_queue_set_unlink_all_nofree( - wait_queue_set_t wq_set) + wait_queue_set_t wq_set, + queue_t links) { wait_queue_link_t wql; wait_queue_t wq; @@ -704,6 +1008,7 @@ retry: s = splsched(); wqs_lock(wq_set); + /* remove the wait queues that are members of our set */ q = &wq_set->wqs_setlinks; wql = (wait_queue_link_t)queue_first(q); @@ -713,6 +1018,7 @@ retry: if (wait_queue_lock_try(wq)) { wait_queue_unlink_locked(wq, wq_set, wql); wait_queue_unlock(wq); + enqueue(links, &wql->wql_links); wql = (wait_queue_link_t)queue_first(q); } else { wqs_unlock(wq_set); @@ -721,26 +1027,23 @@ retry: goto retry; } } + + /* remove this set from sets it belongs to */ + wait_queue_unlink_all_nofree_locked(&wq_set->wqs_wait_queue, links); + wqs_unlock(wq_set); splx(s); return(KERN_SUCCESS); } -/* legacy interface naming */ -kern_return_t -wait_subqueue_unlink_all( - wait_queue_set_t wq_set) -{ - return wait_queue_set_unlink_all_nofree(wq_set); -} - - /* * Routine: wait_queue_set_unlink_all * Purpose: * Remove the linkage between a set wait queue and all its - * member wait queues. The link structures are freed. + * member wait queues and all the sets it may be members of. + * The link structures are freed for those links which were + * dynamically allocated. * Conditions: * The wait queue must be a set */ @@ -765,6 +1068,7 @@ retry: s = splsched(); wqs_lock(wq_set); + /* remove the wait queues that are members of our set */ q = &wq_set->wqs_setlinks; wql = (wait_queue_link_t)queue_first(q); @@ -772,9 +1076,13 @@ retry: WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql); wq = wql->wql_queue; if (wait_queue_lock_try(wq)) { + boolean_t alloced; + + alloced = (wql->wql_type == WAIT_QUEUE_LINK); wait_queue_unlink_locked(wq, wq_set, wql); wait_queue_unlock(wq); - enqueue(links, &wql->wql_links); + if (alloced) + enqueue(links, &wql->wql_links); wql = (wait_queue_link_t)queue_first(q); } else { wqs_unlock(wq_set); @@ -783,63 +1091,62 @@ retry: goto retry; } } + + + /* remove this set from sets it belongs to */ + wait_queue_unlink_all_locked(&wq_set->wqs_wait_queue, links); + wqs_unlock(wq_set); splx(s); while (!queue_empty (links)) { wql = (wait_queue_link_t) dequeue(links); - kfree(wql, sizeof(struct _wait_queue_link)); + zfree(_wait_queue_link_zone, wql); } return(KERN_SUCCESS); } - -/* - * Routine: wait_queue_unlink_one - * Purpose: - * Find and unlink one set wait queue - * Conditions: - * Nothing of interest locked. - */ -void -wait_queue_unlink_one( - wait_queue_t wq, - wait_queue_set_t *wq_setp) +kern_return_t +wait_queue_set_unlink_one( + wait_queue_set_t wq_set, + wait_queue_link_t wql) { - wait_queue_element_t wq_element; - queue_t q; + wait_queue_t wq; spl_t s; + assert(wait_queue_is_set(wq_set)); + +retry: s = splsched(); - wait_queue_lock(wq); + wqs_lock(wq_set); - q = &wq->wq_queue; - - wq_element = (wait_queue_element_t) queue_first(q); - while (!queue_end(q, (queue_entry_t)wq_element)) { + WAIT_QUEUE_SET_CHECK(wq_set); - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { - wait_queue_link_t wql = (wait_queue_link_t)wq_element; - wait_queue_set_t wq_set = wql->wql_setqueue; - - wqs_lock(wq_set); - wait_queue_unlink_locked(wq, wq_set, wql); - wqs_unlock(wq_set); - wait_queue_unlock(wq); - splx(s); - kfree(wql,sizeof(struct _wait_queue_link)); - *wq_setp = wq_set; - return; - } + /* Already unlinked, e.g. by selclearthread() */ + if (wql->wql_type == WAIT_QUEUE_UNLINKED) { + goto out; + } - wq_element = (wait_queue_element_t) - queue_next((queue_t) wq_element); + WAIT_QUEUE_SET_LINK_CHECK(wq_set, wql); + + /* On a wait queue, and we hold set queue lock ... */ + wq = wql->wql_queue; + if (wait_queue_lock_try(wq)) { + wait_queue_unlink_locked(wq, wq_set, wql); + wait_queue_unlock(wq); + } else { + wqs_unlock(wq_set); + splx(s); + delay(1); + goto retry; } - wait_queue_unlock(wq); + +out: + wqs_unlock(wq_set); splx(s); - *wq_setp = WAIT_QUEUE_SET_NULL; -} + return KERN_SUCCESS; +} /* * Routine: wait_queue_assert_wait64_locked @@ -861,6 +1168,7 @@ wait_queue_assert_wait64_locked( thread_t thread) { wait_result_t wait_result; + boolean_t realtime; if (!wait_queue_assert_possible(thread)) panic("wait_queue_assert_wait64_locked"); @@ -868,10 +1176,20 @@ wait_queue_assert_wait64_locked( if (wq->wq_type == _WAIT_QUEUE_SET_inited) { wait_queue_set_t wqs = (wait_queue_set_t)wq; - if (wqs->wqs_isprepost && wqs->wqs_refcount > 0) + if (event == NO_EVENT64 && wqs_is_preposted(wqs)) return(THREAD_AWAKENED); } - + + /* + * Realtime threads get priority for wait queue placements. + * This allows wait_queue_wakeup_one to prefer a waiting + * realtime thread, similar in principle to performing + * a wait_queue_wakeup_all and allowing scheduler prioritization + * to run the realtime thread, but without causing the + * lock contention of that scenario. + */ + realtime = (thread->sched_pri >= BASEPRI_REALTIME); + /* * This is the extent to which we currently take scheduling attributes * into account. If the thread is vm priviledged, we stick it at @@ -880,7 +1198,9 @@ wait_queue_assert_wait64_locked( */ wait_result = thread_mark_wait_locked(thread, interruptible); if (wait_result == THREAD_WAITING) { - if (thread->options & TH_OPT_VMPRIV) + if (!wq->wq_fifo + || (thread->options & TH_OPT_VMPRIV) + || realtime) enqueue_head(&wq->wq_queue, (queue_entry_t) thread); else enqueue_tail(&wq->wq_queue, (queue_entry_t) thread); @@ -889,7 +1209,11 @@ wait_queue_assert_wait64_locked( thread->wait_queue = wq; if (deadline != 0) { - if (!timer_call_enter(&thread->wait_timer, deadline)) + uint32_t flags; + + flags = realtime ? TIMER_CALL_CRITICAL : 0; + + if (!timer_call_enter(&thread->wait_timer, deadline, flags)) thread->wait_timer_active++; thread->wait_timer_is_set = TRUE; } @@ -924,7 +1248,7 @@ wait_queue_assert_wait( s = splsched(); wait_queue_lock(wq); thread_lock(thread); - ret = wait_queue_assert_wait64_locked(wq, (event64_t)((uint32_t)event), + ret = wait_queue_assert_wait64_locked(wq, CAST_DOWN(event64_t,event), interruptible, deadline, thread); thread_unlock(thread); wait_queue_unlock(wq); @@ -999,27 +1323,24 @@ _wait_queue_select64_all( /* * We may have to recurse if this is a compound wait queue. */ - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + if (wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { wait_queue_link_t wql = (wait_queue_link_t)wq_element; - wait_queue_t set_queue; + wait_queue_set_t set_queue = wql->wql_setqueue; /* - * We have to check the set wait queue. + * We have to check the set wait queue. If it is marked + * as pre-post, and it is the "generic event" then mark + * it pre-posted now (if not already). */ - set_queue = (wait_queue_t)wql->wql_setqueue; - wait_queue_lock(set_queue); - if (set_queue->wq_isprepost) { - wait_queue_set_t wqs = (wait_queue_set_t)set_queue; - - /* - * Preposting is only for sets and wait queue - * is the first element of set - */ - wqs->wqs_refcount++; + wqs_lock(set_queue); + if (event == NO_EVENT64 && set_queue->wqs_prepost && !wql_is_preposted(wql)) { + queue_t ppq = &set_queue->wqs_preposts; + queue_enter(ppq, wql, wait_queue_link_t, wql_preposts); } - if (! wait_queue_empty(set_queue)) - _wait_queue_select64_all(set_queue, event, wake_queue); - wait_queue_unlock(set_queue); + if (! wait_queue_empty(&set_queue->wqs_wait_queue)) + _wait_queue_select64_all(&set_queue->wqs_wait_queue, event, wake_queue); + wqs_unlock(set_queue); } else { /* @@ -1031,7 +1352,7 @@ _wait_queue_select64_all( if (t->wait_event == event) { thread_lock(t); - remqueue(q, (queue_entry_t) t); + remqueue((queue_entry_t) t); enqueue (wake_queue, (queue_entry_t) t); t->wait_queue = WAIT_QUEUE_NULL; t->wait_event = NO_EVENT64; @@ -1125,7 +1446,7 @@ wait_queue_wakeup_all( // panic("wait_queue_wakeup_all: we did not get the lock on %p\n", wq); /* (BRINGUP) */ // } ret = wait_queue_wakeup64_all_locked( - wq, (event64_t)((uint32_t)event), + wq, CAST_DOWN(event64_t,event), result, TRUE); /* lock released */ splx(s); @@ -1177,7 +1498,7 @@ wait_queue_wakeup64_all( * a locked thread - if one found * Note: * This is where the sync policy of the wait queue comes - * into effect. For now, we just assume FIFO. + * into effect. For now, we just assume FIFO/LIFO. */ static thread_t _wait_queue_select64_one( @@ -1189,8 +1510,6 @@ _wait_queue_select64_one( thread_t t = THREAD_NULL; queue_t q; - assert(wq->wq_fifo); - q = &wq->wq_queue; wq_element = (wait_queue_element_t) queue_first(q); @@ -1202,21 +1521,34 @@ _wait_queue_select64_one( /* * We may have to recurse if this is a compound wait queue. */ - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + if (wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { wait_queue_link_t wql = (wait_queue_link_t)wq_element; - wait_queue_t set_queue; + wait_queue_set_t set_queue = wql->wql_setqueue; /* - * We have to check the set wait queue. + * We have to check the set wait queue. If the set + * supports pre-posting, it isn't already preposted, + * and we didn't find a thread in the set, then mark it. + * + * If we later find a thread, there may be a spurious + * pre-post here on this set. The wait side has to check + * for that either pre- or post-wait. */ - set_queue = (wait_queue_t)wql->wql_setqueue; - wait_queue_lock(set_queue); - if (! wait_queue_empty(set_queue)) { - t = _wait_queue_select64_one(set_queue, event); + wqs_lock(set_queue); + if (! wait_queue_empty(&set_queue->wqs_wait_queue)) { + t = _wait_queue_select64_one(&set_queue->wqs_wait_queue, event); } - wait_queue_unlock(set_queue); - if (t != THREAD_NULL) + if (t != THREAD_NULL) { + wqs_unlock(set_queue); return t; + } + if (event == NO_EVENT64 && set_queue->wqs_prepost && !wql_is_preposted(wql)) { + queue_t ppq = &set_queue->wqs_preposts; + queue_enter(ppq, wql, wait_queue_link_t, wql_preposts); + } + wqs_unlock(set_queue); + } else { /* @@ -1227,7 +1559,7 @@ _wait_queue_select64_one( t = (thread_t)wq_element; if (t->wait_event == event) { thread_lock(t); - remqueue(q, (queue_entry_t) t); + remqueue((queue_entry_t) t); t->wait_queue = WAIT_QUEUE_NULL; t->wait_event = NO_EVENT64; t->at_safe_point = FALSE; @@ -1263,7 +1595,7 @@ wait_queue_pull_thread_locked( assert(thread->wait_queue == waitq); - remqueue(&waitq->wq_queue, (queue_entry_t)thread ); + remqueue((queue_entry_t)thread ); thread->wait_queue = WAIT_QUEUE_NULL; thread->wait_event = NO_EVENT64; thread->at_safe_point = FALSE; @@ -1299,7 +1631,7 @@ _wait_queue_select64_thread( thread_lock(thread); if ((thread->wait_queue == wq) && (thread->wait_event == event)) { - remqueue(q, (queue_entry_t) thread); + remqueue((queue_entry_t) thread); thread->at_safe_point = FALSE; thread->wait_event = NO_EVENT64; thread->wait_queue = WAIT_QUEUE_NULL; @@ -1319,18 +1651,18 @@ _wait_queue_select64_thread( wqe_next = (wait_queue_element_t) queue_next((queue_t) wq_element); - if (wq_element->wqe_type == WAIT_QUEUE_LINK) { + if (wq_element->wqe_type == WAIT_QUEUE_LINK || + wq_element->wqe_type == WAIT_QUEUE_LINK_NOALLOC) { wait_queue_link_t wql = (wait_queue_link_t)wq_element; - wait_queue_t set_queue; + wait_queue_set_t set_queue = wql->wql_setqueue; - set_queue = (wait_queue_t)wql->wql_setqueue; - wait_queue_lock(set_queue); - if (! wait_queue_empty(set_queue)) { - res = _wait_queue_select64_thread(set_queue, + wqs_lock(set_queue); + if (! wait_queue_empty(&set_queue->wqs_wait_queue)) { + res = _wait_queue_select64_thread(&set_queue->wqs_wait_queue, event, thread); } - wait_queue_unlock(set_queue); + wqs_unlock(set_queue); if (res == KERN_SUCCESS) return KERN_SUCCESS; } @@ -1433,7 +1765,8 @@ kern_return_t wait_queue_wakeup_one( wait_queue_t wq, event_t event, - wait_result_t result) + wait_result_t result, + int priority) { thread_t thread; spl_t s; @@ -1444,12 +1777,20 @@ wait_queue_wakeup_one( s = splsched(); wait_queue_lock(wq); - thread = _wait_queue_select64_one(wq, (event64_t)((uint32_t)event)); + thread = _wait_queue_select64_one(wq, CAST_DOWN(event64_t,event)); wait_queue_unlock(wq); if (thread) { kern_return_t res; + if (thread->sched_pri < priority) { + if (priority <= MAXPRI) { + set_sched_pri(thread, priority); + + thread->was_promoted_on_wakeup = 1; + thread->sched_flags |= TH_SFLAG_PROMOTED; + } + } res = thread_go(thread, result); assert(res == KERN_SUCCESS); thread_unlock(thread); @@ -1587,7 +1928,7 @@ wait_queue_wakeup_thread( s = splsched(); wait_queue_lock(wq); - res = _wait_queue_select64_thread(wq, (event64_t)((uint32_t)event), thread); + res = _wait_queue_select64_thread(wq, CAST_DOWN(event64_t,event), thread); wait_queue_unlock(wq); if (res == KERN_SUCCESS) {