2 * Copyright (c) 2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #ifndef _KERN_MPSC_QUEUE_H_
30 #define _KERN_MPSC_QUEUE_H_
32 #ifdef XNU_KERNEL_PRIVATE
34 #include <machine/atomic.h>
35 #include <kern/macro_help.h>
36 #include <kern/thread_call.h>
38 #endif // XNU_KERNEL_PRIVATE
40 #include <sys/cdefs.h>
45 * @typedef struct mpsc_queue_chain
48 * Type for the intrusive linkage used by MPSC queues.
50 typedef struct mpsc_queue_chain
{
51 struct mpsc_queue_chain
*_Atomic mpqc_next
;
52 } *mpsc_queue_chain_t
;
55 * @typedef struct mpsc_queue_head
58 * The type for a multi-producer single-consumer queue.
61 * MPSC queues allow for producers to not be affected by other producers or the
62 * consumer. Which means in turn that having producers in interrupt context
63 * does not require that other producers disable interrupts like a traditional
64 * spinlock based approach would require.
66 * These queues shine when data is produced from the entire system and is
67 * consumed from a single serial context (logging, tracing, ...).
68 * mpsc_daemon_queue_t is provided as a fully ready/easy-to-use pre-packaged
69 * solution for these common use cases.
71 * - mpsc_queue_append() can be used to append a single item
72 * - mpsc_queue_append_list() can be used to append a batch of items at once.
74 * Functions for the consumer side assume proper serialization that is not
75 * provided by the MPSC queue itself. Dequeuing doesn't require preemption
80 * The base of the enqueue algorithm is a single atomic exchange (first half,
81 * called __mpsc_queue_append_update_tail) and a list fixup (2nd half, called
82 * __mpsc_queue_append_update_prev).
84 * Graphically, enqueuing `X` looks like this, with each step being done
85 * atomically (for the empty queue case, `tail` points to `head`):
87 * | orig state | update_tail | update_prev |
88 * +---------------------+---------------------+---------------------+
90 * | head -> e1 -> e2 -. | head -> e1 -> e2 -. | head -> e1 -> e2 -. |
92 * | ,- ... <--' | ,- ... <--' | ,- ... <--' |
95 * | tail -> eN -> NULL | tail eN -> NULL | tail eN |
98 * | X -> NULL | `---> X -> NULL | '---> X -> NULL |
100 * +---------------------+---------------------+---------------------+
103 * There is a small 1-instruction gap of inconsistency which makes the chosen
104 * algorithm non linearizable, and requires enqueuers to disable preemption
105 * during the enqueue so as not to starve the consumer forever.
107 * As far as memory visibility is concerned, enqueuing uses a release fence in
108 * update_tail which pairs with memory fences in mpsc_queue_dequeue_batch().
110 * Note: as far as the data structure in memory, its layout is equivalent to
111 * a BSD <sys/queue.h> STAILQ. However because of this inconsistency
112 * window and memory ordering concerns, it is incorrect to use STAILQ
113 * macros on an MPSC queue.
115 typedef struct mpsc_queue_head
{
116 struct mpsc_queue_chain mpqh_head
;
117 struct mpsc_queue_chain
*_Atomic mpqh_tail
;
118 } *mpsc_queue_head_t
;
121 * @macro MPSC_QUEUE_INITIALIZER
124 * Macro to use in static initializers for mpsc queues.
127 * The name of the variable to initialize.
129 #define MPSC_QUEUE_INITIALIZER(head) { .mpqh_tail = &(head).mpqh_head }
131 #ifdef XNU_KERNEL_PRIVATE
134 * @function mpsc_queue_init
137 * Dynamically initialize an mpsc queue.
140 * This initialization assumes that the object holding the queue head
141 * is initialized before it can be made visible to other threads/cores.
144 * The queue to initialize.
147 mpsc_queue_init(mpsc_queue_head_t q
)
149 os_atomic_init(&q
->mpqh_head
.mpqc_next
, NULL
);
150 os_atomic_init(&q
->mpqh_tail
, &q
->mpqh_head
);
154 * @typedef enum mpsc_queue_options
156 typedef enum mpsc_queue_options
{
158 MPSC_QUEUE_DISABLE_PREEMPTION
= 1 << 0,
159 } mpsc_queue_options_t
;
162 * @const MPSC_QUEUE_NOTQUEUED_MARKER
165 * Magical marker that implementations can use to poison the chain pointer of
166 * elements not on any MPSC queue.
168 #define MPSC_QUEUE_NOTQUEUED_MARKER ((mpsc_queue_chain_t)~0ul)
171 * @macro mpsc_queue_element
174 * Macro to find the pointer of an element back from its MPSC chain linkage.
176 #define mpsc_queue_element(ptr, type, field) __container_of(ptr, type, field)
179 #pragma mark Advanced Multi Producer calls
182 * @function __mpsc_queue_append_update_tail
185 * First half of the enqueue operation onto a multi-producer single-consumer
189 * This function is available for algorithms that need to do things (such as
190 * taking a refcount) before calling __mpsc_queue_append_update_prev().
192 * Preemption should be disabled before calling
193 * __mpsc_queue_append_update_tail(), and until
194 * __mpsc_queue_append_update_prev() has returned.
197 * The queue to update.
200 * The element to append to `q`.
203 * A token to later pass to __mpsc_queue_append_update_prev()
204 * to complete the enqueue.
206 static inline mpsc_queue_chain_t
207 __mpsc_queue_append_update_tail(mpsc_queue_head_t q
, mpsc_queue_chain_t elm
)
209 os_atomic_store(&elm
->mpqc_next
, NULL
, relaxed
);
210 return os_atomic_xchg(&q
->mpqh_tail
, elm
, release
);
214 * @function __mpsc_queue_append_was_empty
217 * Tests whether the queue was empty at the time
218 * __mpsc_queue_append_update_tail() was called.
221 * The queue to test emptiness for.
224 * The token returned by __mpsc_queue_append_update_tail().
227 * Whether the queue was empty (true) or not (false).
230 __mpsc_queue_append_was_empty(mpsc_queue_head_t q
, mpsc_queue_chain_t prev
)
232 return &q
->mpqh_head
== prev
;
236 * @function __mpsc_queue_append_update_prev
239 * Second half of the enqueue operation onto a multi-producer single-consumer
243 * This function is available for algorithms that need to do things (such as
244 * taking a refcount) before calling __mpsc_queue_append_update_prev().
246 * Preemption should be disabled before calling
247 * __mpsc_queue_append_update_tail(), and until
248 * __mpsc_queue_append_update_prev() has returned.
251 * The token returned by __mpsc_queue_append_update_tail().
254 * The element to append to the queue.
257 __mpsc_queue_append_update_prev(mpsc_queue_chain_t prev
, mpsc_queue_chain_t elm
)
259 os_atomic_store(&prev
->mpqc_next
, elm
, relaxed
);
263 #pragma mark Multi Producer calls
266 * @function mpsc_queue_append_list
269 * Enqueues a list of elements onto a queue.
272 * This enqueues a list that has to be fully formed from `first` to `last`
275 * Preemption should be disabled when calling mpsc_queue_append_list().
278 * The queue to update.
281 * The first of the list elements being appended.
284 * The last of the list elements being appended.
287 mpsc_queue_append_list(mpsc_queue_head_t q
, mpsc_queue_chain_t first
,
288 mpsc_queue_chain_t last
)
290 mpsc_queue_chain_t prev
= __mpsc_queue_append_update_tail(q
, last
);
291 __mpsc_queue_append_update_prev(prev
, first
);
292 return __mpsc_queue_append_was_empty(q
, prev
);
296 * @function __mpsc_queue_append_update_tail
299 * Enqueues an element onto a queue.
302 * Preemption should be disabled when calling mpsc_queue_append().
304 * @param q the queue to update
305 * @param elm the element to append
308 mpsc_queue_append(mpsc_queue_head_t q
, mpsc_queue_chain_t elm
)
310 return mpsc_queue_append_list(q
, elm
, elm
);
314 #pragma mark Single Consumer calls
317 * @function mpsc_queue_dequeue_batch()
320 * Atomically empty a queue at once and return the batch head and tail.
323 * Consumer function, must be called in a serialized way with respect to any
324 * other consumer function.
330 * An out pointer filled with the last element captured.
333 * A dependency token (to rely on consume / hardware dependencies)
334 * When not trying to take advantage of hardware dependencies, just pass NULL.
337 * The first element of the batch if any, or NULL the queue was empty.
340 mpsc_queue_dequeue_batch(mpsc_queue_head_t q
, mpsc_queue_chain_t
*tail
,
341 os_atomic_dependency_t dependency
);
344 * @function mpsc_queue_batch_next()
347 * Function used to consume an element from a batch dequeued with
348 * mpsc_queue_dequeue_batch().
351 * Once a batch has been dequeued, there is no need to hold the consumer lock
352 * anymore to consume it.
354 * mpsc_queue_batch_foreach_safe() is the preferred interface to consume
358 * The current inspected element of the batch (must be the batch head or
359 * a value returned by mpsc_queue_batch_next()).
362 * The last element of the batch.
365 * The next element if any.
368 mpsc_queue_batch_next(mpsc_queue_chain_t cur
, mpsc_queue_chain_t tail
);
371 * @macro mpsc_queue_batch_foreach_safe
374 * Macro used to enumerate a batch dequeued with mpsc_queue_dequeue_batch().
377 * The item being currently visited.
380 * The first element of the batch.
383 * The last element of the batch.
385 #define mpsc_queue_batch_foreach_safe(item, head, tail) \
386 for (mpsc_queue_chain_t __tmp, __item = (head), __tail = (tail); \
387 __tmp = mpsc_queue_batch_next(__item, __tail), (item) = __item; \
391 * @function mpsc_queue_restore_batch()
394 * "Restore"s a batch at the head of the queue.
397 * Consumer function, must be called in a serialized way with respect to any
398 * other consumer function.
404 * The first element to put back.
407 * The last element to put back.
408 * It is the responsibility of the caller to ensure the linkages from first to
409 * last are properly set up before calling this function.
412 mpsc_queue_restore_batch(mpsc_queue_head_t q
, mpsc_queue_chain_t first
,
413 mpsc_queue_chain_t last
);
416 #pragma mark "GCD"-like facilities
419 * @typedef struct mpsc_daemon_queue
422 * Daemon queues are a ready-to use packaging of the low level MPSC queue
426 * mpsc_queue_t requires handling of state transitions of the queue and
427 * dequeuing yourself, which is a non trivial task.
429 * Daemon queues are a simple packaged solution that allows for mpsc_queue_t to
430 * form hierarchies (mostly for layering purposes), and be serviced at the
431 * bottom of such a hierarchy by a thread or a thread call.
433 * Daemon queues assume homogenous items, and are setup with an `invoke`
434 * callback that is called in the dequeuer on every item as they are dequeued.
436 typedef struct mpsc_daemon_queue
*mpsc_daemon_queue_t
;
439 * @typedef struct mpsc_daemon_queue
442 * The type for MPSC Daemon Queues invoke callbacks.
444 typedef void (*mpsc_daemon_invoke_fn_t
)(mpsc_queue_chain_t elm
,
445 mpsc_daemon_queue_t dq
);
448 * @enum mpsc_daemon_queue_kind
451 * Internal type, not to be used by clients.
453 typedef enum mpsc_daemon_queue_kind
{
454 MPSC_QUEUE_KIND_UNKNOWN
,
455 MPSC_QUEUE_KIND_NESTED
,
456 MPSC_QUEUE_KIND_THREAD
,
457 MPSC_QUEUE_KIND_THREAD_CRITICAL
,
458 MPSC_QUEUE_KIND_THREAD_CALL
,
459 } mpsc_daemon_queue_kind_t
;
462 * @enum mpsc_daemon_queue_state
465 * Internal type, not to be used by clients.
467 __options_decl(mpsc_daemon_queue_state_t
, uint32_t, {
468 MPSC_QUEUE_STATE_DRAINING
= 0x0001,
469 MPSC_QUEUE_STATE_WAKEUP
= 0x0002,
470 MPSC_QUEUE_STATE_CANCELED
= 0x0004,
473 struct mpsc_daemon_queue
{
474 mpsc_daemon_queue_kind_t mpd_kind
;
475 mpsc_daemon_queue_state_t _Atomic mpd_state
;
476 mpsc_daemon_invoke_fn_t mpd_invoke
;
478 mpsc_daemon_queue_t mpd_target
;
479 struct thread
*mpd_thread
;
480 struct thread_call
*mpd_call
;
482 struct mpsc_queue_head mpd_queue
;
483 struct mpsc_queue_chain mpd_chain
;
487 * @function mpsc_daemon_queue_init_with_thread
490 * Sets up a daemon queue to be a base queue drained by a kernel thread.
493 * The function will allocate the thread and start it in assert_wait.
496 * The queue to initialize
499 * The invoke function called on individual items on the queue during drain.
502 * The scheduler priority for the created thread.
505 * The name to give to the created thread.
508 * Whether creating the thread was successful.
511 mpsc_daemon_queue_init_with_thread(mpsc_daemon_queue_t dq
,
512 mpsc_daemon_invoke_fn_t invoke
, int pri
, const char *name
);
516 * @function mpsc_daemon_queue_init_with_thread_call
519 * Sets up a daemon queue to be a base queue drained by a thread call.
522 * The queue to initialize
525 * The invoke function called on individual items on the queue during drain.
528 * The priority the thread call will run at.
531 mpsc_daemon_queue_init_with_thread_call(mpsc_daemon_queue_t dq
,
532 mpsc_daemon_invoke_fn_t invoke
, thread_call_priority_t pri
);
535 * @function mpsc_daemon_queue_init_with_target
538 * Sets up a daemon queue to target another daemon queue.
541 * The targetting relationship is useful for subsystem layering purposes only.
542 * Because draining a given queue is atomic with respect to its target, target
543 * queue hierarchies are prone to starvation.
546 * The queue to initialize
549 * The invoke function called on individual items on the queue during drain.
552 * The target queue of the initialized queue, which has to be initialized with
553 * the mpsc_daemon_queue_nested_invoke invoke handler.
556 mpsc_daemon_queue_init_with_target(mpsc_daemon_queue_t dq
,
557 mpsc_daemon_invoke_fn_t invoke
, mpsc_daemon_queue_t target
);
560 * @function mpsc_daemon_queue_nested_invoke
563 * The invoke function to pass to mpsc_daemon_queue_init_* when a queue is meant
564 * to be targeted by other queues.
567 mpsc_daemon_queue_nested_invoke(mpsc_queue_chain_t elm
,
568 mpsc_daemon_queue_t dq
);
571 * @function mpsc_daemon_queue_cancel_and_wait
574 * Cancels the queue so that the object owning it can be destroyed.
577 * This interface will cancel the queue and wait synchronously for the
578 * cancelation to have taken effect, possibly waiting on elements currently
581 * Sending objects to the daemon queue after cancelation is undefined.
583 * Calling this function multiple times is undefined.
585 * Tearing down daemon queue hierarchies is the responsibility of the adopter.
588 mpsc_daemon_queue_cancel_and_wait(mpsc_daemon_queue_t dq
);
591 * @function mpsc_daemon_enqueue
594 * Send ("async") an item to a given daemon on a given queue.
597 * It is the responsibility of the caller to ensure preemption is disabled when
601 * The daemon queue to enqueue the element onto.
604 * The item to enqueue.
607 * Options applicable to the enqueue. In particupar passing
608 * MPSC_QUEUE_DISABLE_PREEMPTION makes sure preemption is properly disabled
609 * during the enqueue.
612 mpsc_daemon_enqueue(mpsc_daemon_queue_t dq
, mpsc_queue_chain_t elm
,
613 mpsc_queue_options_t options
);
616 #pragma mark Deferred deallocation daemon
619 * @function thread_deallocate_daemon_init
622 * Initializes the deferred deallocation daemon, called by thread_daemon_init().
625 * The deferred deallocation daemon is a kernel thread based daemon queue that
626 * is targeted by nested daemon queues.
628 * It is used to perform deferred deallocation for objects that can't safely be
629 * deallocated from the context where the deallocation should normally occur.
631 * Subsystems using it are for example: turnstiles, workqueues, threads.
634 * New queues should be added to this daemon with great care,
635 * as abusing it can lead to unbounded amount of kernel work.
638 thread_deallocate_daemon_init(void);
641 * @function thread_deallocate_daemon_register_queue
644 * Dynamically register a queue for deferred deletion with the deferred
645 * deallocation daemon.
648 * The daemon queue to register with the deferred deallocation daemon.
651 * The callback called on every element of this queue by the deallocation
655 thread_deallocate_daemon_register_queue(mpsc_daemon_queue_t dq
,
656 mpsc_daemon_invoke_fn_t invoke
);
660 #if DEBUG || DEVELOPMENT
663 mpsc_test_pingpong(uint64_t count
, uint64_t *out
);
665 #endif /* DEBUG || DEVELOPMENT */
667 #endif /* XNU_KERNEL_PRIVATE */
671 #endif /* _KERN_MPSC_QUEUE_H_ */