2 * Copyright (c) 2003-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * 1) ramesh is looking into how to replace taking a reference on
33 * the user's map (vm_map_reference()) since it is believed that
34 * would not hold the process for us.
35 * 2) david is looking into a way for us to set the priority of the
36 * worker threads to match that of the user's thread when the
37 * async IO was queued.
42 * This file contains support for the POSIX 1003.1B AIO/LIO facility.
45 #include <sys/systm.h>
46 #include <sys/fcntl.h>
47 #include <sys/file_internal.h>
48 #include <sys/filedesc.h>
49 #include <sys/kernel.h>
50 #include <sys/vnode_internal.h>
51 #include <sys/malloc.h>
52 #include <sys/mount_internal.h>
53 #include <sys/param.h>
54 #include <sys/proc_internal.h>
55 #include <sys/sysctl.h>
56 #include <sys/unistd.h>
59 #include <sys/aio_kern.h>
60 #include <sys/sysproto.h>
62 #include <machine/limits.h>
64 #include <mach/mach_types.h>
65 #include <kern/kern_types.h>
66 #include <kern/waitq.h>
67 #include <kern/zalloc.h>
68 #include <kern/task.h>
69 #include <kern/sched_prim.h>
71 #include <vm/vm_map.h>
73 #include <os/refcnt.h>
75 #include <sys/kdebug.h>
76 #define AIO_work_queued 1
77 #define AIO_worker_wake 2
78 #define AIO_completion_sig 3
79 #define AIO_completion_cleanup_wait 4
80 #define AIO_completion_cleanup_wake 5
81 #define AIO_completion_suspend_wake 6
82 #define AIO_fsync_delay 7
84 #define AIO_cancel_async_workq 11
85 #define AIO_cancel_sync_workq 12
86 #define AIO_cancel_activeq 13
87 #define AIO_cancel_doneq 14
93 #define AIO_error_val 61
94 #define AIO_error_activeq 62
95 #define AIO_error_workq 63
97 #define AIO_return_val 71
98 #define AIO_return_activeq 72
99 #define AIO_return_workq 73
102 #define AIO_exit_sleep 91
103 #define AIO_close 100
104 #define AIO_close_sleep 101
105 #define AIO_suspend 110
106 #define AIO_suspend_sleep 111
107 #define AIO_worker_thread 120
109 __options_decl(aio_entry_flags_t
, uint32_t, {
110 AIO_READ
= 0x00000001, /* a read */
111 AIO_WRITE
= 0x00000002, /* a write */
112 AIO_FSYNC
= 0x00000004, /* aio_fsync with op = O_SYNC */
113 AIO_DSYNC
= 0x00000008, /* aio_fsync with op = O_DSYNC (not supported yet) */
114 AIO_LIO
= 0x00000010, /* lio_listio generated IO */
115 AIO_LIO_WAIT
= 0x00000020, /* lio_listio is waiting on the leader */
118 * These flags mean that this entry is blocking either:
119 * - close (AIO_CLOSE_WAIT)
120 * - exit or exec (AIO_EXIT_WAIT)
122 * These flags are mutually exclusive, and the AIO_EXIT_WAIT variant
123 * will also neuter notifications in do_aio_completion_and_unlock().
125 AIO_CLOSE_WAIT
= 0x00004000,
126 AIO_EXIT_WAIT
= 0x00008000,
129 /*! @struct aio_workq_entry
132 * This represents a piece of aio/lio work.
134 * The ownership rules go as follows:
136 * - the "proc" owns one refcount on the entry (from creation), while it is
137 * enqueued on the aio_activeq and then the aio_doneq.
139 * either aio_return() (user read the status) or _aio_exit() (the process
140 * died) will dequeue the entry and consume this ref.
142 * - the async workqueue owns one refcount once the work is submitted,
143 * which is consumed in do_aio_completion_and_unlock().
145 * This ref protects the entry for the the end of
146 * do_aio_completion_and_unlock() (when signal delivery happens).
148 * - lio_listio() for batches picks one of the entries to be the "leader"
149 * of the batch. Each work item will have a refcount on its leader
150 * so that the accounting of the batch completion can be done on the leader
151 * (to be able to decrement lio_pending).
153 * This ref is consumed in do_aio_completion_and_unlock() as well.
155 * - lastly, in lio_listio() when the LIO_WAIT behavior is requested,
156 * an extra ref is taken in this syscall as it needs to keep accessing
157 * the leader "lio_pending" field until it hits 0.
159 struct aio_workq_entry
{
161 TAILQ_ENTRY(aio_workq_entry
) aio_workq_link
;
164 TAILQ_ENTRY(aio_workq_entry
) aio_proc_link
; /* p_aio_activeq or p_aio_doneq */
165 user_ssize_t returnval
; /* return value from read / write request */
166 errno_t errorval
; /* error value from read / write request */
167 os_refcnt_t aio_refcount
;
168 aio_entry_flags_t flags
;
170 int lio_pending
; /* pending I/Os in lio group, only on leader */
171 struct aio_workq_entry
*lio_leader
; /* pointer to the lio leader, can be self */
173 /* Initialized and never changed, safe to access */
174 struct proc
*procp
; /* user proc that queued this request */
175 user_addr_t uaiocbp
; /* pointer passed in from user land */
176 struct user_aiocb aiocb
; /* copy of aiocb from user land */
177 thread_t thread
; /* thread that queued this request */
179 /* Initialized, and possibly freed by aio_work_thread() or at free if cancelled */
180 vm_map_t aio_map
; /* user land map we have a reference to */
184 * aio requests queue up on the aio_async_workq or lio_sync_workq (for
185 * lio_listio LIO_WAIT). Requests then move to the per process aio_activeq
186 * (proc.aio_activeq) when one of our worker threads start the IO.
187 * And finally, requests move to the per process aio_doneq (proc.aio_doneq)
188 * when the IO request completes. The request remains on aio_doneq until
189 * user process calls aio_return or the process exits, either way that is our
190 * trigger to release aio resources.
192 typedef struct aio_workq
{
193 TAILQ_HEAD(, aio_workq_entry
) aioq_entries
;
194 lck_spin_t aioq_lock
;
195 struct waitq aioq_waitq
;
198 #define AIO_NUM_WORK_QUEUES 1
199 struct aio_anchor_cb
{
200 os_atomic(int) aio_total_count
; /* total extant entries */
202 /* Hash table of queues here */
204 struct aio_workq aio_async_workqs
[AIO_NUM_WORK_QUEUES
];
206 typedef struct aio_anchor_cb aio_anchor_cb
;
209 * Notes on aio sleep / wake channels.
210 * We currently pick a couple fields within the proc structure that will allow
211 * us sleep channels that currently do not collide with any other kernel routines.
212 * At this time, for binary compatibility reasons, we cannot create new proc fields.
214 #define AIO_SUSPEND_SLEEP_CHAN p_aio_activeq
215 #define AIO_CLEANUP_SLEEP_CHAN p_aio_total_count
217 #define ASSERT_AIO_FROM_PROC(aiop, theproc) \
218 if ((aiop)->procp != (theproc)) { \
219 panic("AIO on a proc list that does not belong to that proc.\n"); \
225 static void aio_proc_lock(proc_t procp
);
226 static void aio_proc_lock_spin(proc_t procp
);
227 static void aio_proc_unlock(proc_t procp
);
228 static lck_mtx_t
*aio_proc_mutex(proc_t procp
);
229 static bool aio_has_active_requests_for_process(proc_t procp
);
230 static bool aio_proc_has_active_requests_for_file(proc_t procp
, int fd
);
231 static boolean_t
is_already_queued(proc_t procp
, user_addr_t aiocbp
);
233 static aio_workq_t
aio_entry_workq(aio_workq_entry
*entryp
);
234 static void aio_workq_remove_entry_locked(aio_workq_t queue
, aio_workq_entry
*entryp
);
235 static void aio_workq_add_entry_locked(aio_workq_t queue
, aio_workq_entry
*entryp
);
236 static void aio_entry_ref(aio_workq_entry
*entryp
);
237 static void aio_entry_unref(aio_workq_entry
*entryp
);
238 static bool aio_entry_try_workq_remove(aio_workq_entry
*entryp
);
239 static boolean_t
aio_delay_fsync_request(aio_workq_entry
*entryp
);
240 static void aio_free_request(aio_workq_entry
*entryp
);
242 static void aio_workq_init(aio_workq_t wq
);
243 static void aio_workq_lock_spin(aio_workq_t wq
);
244 static void aio_workq_unlock(aio_workq_t wq
);
245 static lck_spin_t
*aio_workq_lock(aio_workq_t wq
);
247 static void aio_work_thread(void *arg
, wait_result_t wr
);
248 static aio_workq_entry
*aio_get_some_work(void);
250 static int aio_queue_async_request(proc_t procp
, user_addr_t aiocbp
, aio_entry_flags_t
);
251 static int aio_validate(proc_t
, aio_workq_entry
*entryp
);
253 static int do_aio_cancel_locked(proc_t p
, int fd
, user_addr_t aiocbp
, aio_entry_flags_t
);
254 static void do_aio_completion_and_unlock(proc_t p
, aio_workq_entry
*entryp
);
255 static int do_aio_fsync(aio_workq_entry
*entryp
);
256 static int do_aio_read(aio_workq_entry
*entryp
);
257 static int do_aio_write(aio_workq_entry
*entryp
);
258 static void do_munge_aiocb_user32_to_user(struct user32_aiocb
*my_aiocbp
, struct user_aiocb
*the_user_aiocbp
);
259 static void do_munge_aiocb_user64_to_user(struct user64_aiocb
*my_aiocbp
, struct user_aiocb
*the_user_aiocbp
);
260 static aio_workq_entry
*aio_create_queue_entry(proc_t procp
, user_addr_t aiocbp
, aio_entry_flags_t
);
261 static int aio_copy_in_list(proc_t
, user_addr_t
, user_addr_t
*, int);
263 #define ASSERT_AIO_PROC_LOCK_OWNED(p) LCK_MTX_ASSERT(aio_proc_mutex(p), LCK_MTX_ASSERT_OWNED)
264 #define ASSERT_AIO_WORKQ_LOCK_OWNED(q) LCK_SPIN_ASSERT(aio_workq_lock(q), LCK_ASSERT_OWNED)
267 * EXTERNAL PROTOTYPES
270 /* in ...bsd/kern/sys_generic.c */
271 extern int dofileread(vfs_context_t ctx
, struct fileproc
*fp
,
272 user_addr_t bufp
, user_size_t nbyte
,
273 off_t offset
, int flags
, user_ssize_t
*retval
);
274 extern int dofilewrite(vfs_context_t ctx
, struct fileproc
*fp
,
275 user_addr_t bufp
, user_size_t nbyte
, off_t offset
,
276 int flags
, user_ssize_t
*retval
);
279 * aio external global variables.
281 extern int aio_max_requests
; /* AIO_MAX - configurable */
282 extern int aio_max_requests_per_process
; /* AIO_PROCESS_MAX - configurable */
283 extern int aio_worker_threads
; /* AIO_THREAD_COUNT - configurable */
287 * aio static variables.
289 static aio_anchor_cb aio_anchor
= {
290 .aio_num_workqs
= AIO_NUM_WORK_QUEUES
,
292 os_refgrp_decl(static, aio_refgrp
, "aio", NULL
);
293 static LCK_GRP_DECLARE(aio_proc_lock_grp
, "aio_proc");
294 static LCK_GRP_DECLARE(aio_queue_lock_grp
, "aio_queue");
295 static LCK_MTX_DECLARE(aio_proc_mtx
, &aio_proc_lock_grp
);
297 static ZONE_DECLARE(aio_workq_zonep
, "aiowq", sizeof(aio_workq_entry
),
302 aio_entry_workq(__unused aio_workq_entry
*entryp
)
304 return &aio_anchor
.aio_async_workqs
[0];
308 aio_workq_init(aio_workq_t wq
)
310 TAILQ_INIT(&wq
->aioq_entries
);
311 lck_spin_init(&wq
->aioq_lock
, &aio_queue_lock_grp
, LCK_ATTR_NULL
);
312 waitq_init(&wq
->aioq_waitq
, SYNC_POLICY_FIFO
);
317 * Can be passed a queue which is locked spin.
320 aio_workq_remove_entry_locked(aio_workq_t queue
, aio_workq_entry
*entryp
)
322 ASSERT_AIO_WORKQ_LOCK_OWNED(queue
);
324 if (entryp
->aio_workq_link
.tqe_prev
== NULL
) {
325 panic("Trying to remove an entry from a work queue, but it is not on a queue\n");
328 TAILQ_REMOVE(&queue
->aioq_entries
, entryp
, aio_workq_link
);
329 entryp
->aio_workq_link
.tqe_prev
= NULL
; /* Not on a workq */
333 aio_workq_add_entry_locked(aio_workq_t queue
, aio_workq_entry
*entryp
)
335 ASSERT_AIO_WORKQ_LOCK_OWNED(queue
);
337 TAILQ_INSERT_TAIL(&queue
->aioq_entries
, entryp
, aio_workq_link
);
341 aio_proc_lock(proc_t procp
)
343 lck_mtx_lock(aio_proc_mutex(procp
));
347 aio_proc_lock_spin(proc_t procp
)
349 lck_mtx_lock_spin(aio_proc_mutex(procp
));
353 aio_has_any_work(void)
355 return os_atomic_load(&aio_anchor
.aio_total_count
, relaxed
) != 0;
359 aio_try_proc_insert_active_locked(proc_t procp
, aio_workq_entry
*entryp
)
363 ASSERT_AIO_PROC_LOCK_OWNED(procp
);
365 if (procp
->p_aio_total_count
>= aio_max_requests_per_process
) {
369 if (is_already_queued(procp
, entryp
->uaiocbp
)) {
373 os_atomic_rmw_loop(&aio_anchor
.aio_total_count
, old
, new, relaxed
, {
374 if (old
>= aio_max_requests
) {
375 os_atomic_rmw_loop_give_up(return false);
380 TAILQ_INSERT_TAIL(&procp
->p_aio_activeq
, entryp
, aio_proc_link
);
381 procp
->p_aio_total_count
++;
386 aio_proc_move_done_locked(proc_t procp
, aio_workq_entry
*entryp
)
388 TAILQ_REMOVE(&procp
->p_aio_activeq
, entryp
, aio_proc_link
);
389 TAILQ_INSERT_TAIL(&procp
->p_aio_doneq
, entryp
, aio_proc_link
);
393 aio_proc_remove_done_locked(proc_t procp
, aio_workq_entry
*entryp
)
395 TAILQ_REMOVE(&procp
->p_aio_doneq
, entryp
, aio_proc_link
);
396 entryp
->aio_proc_link
.tqe_prev
= NULL
;
397 if (os_atomic_dec_orig(&aio_anchor
.aio_total_count
, relaxed
) <= 0) {
398 panic("Negative total AIO count!\n");
400 if (procp
->p_aio_total_count
-- <= 0) {
401 panic("proc %p: p_aio_total_count accounting mismatch", procp
);
406 aio_proc_unlock(proc_t procp
)
408 lck_mtx_unlock(aio_proc_mutex(procp
));
412 aio_proc_mutex(proc_t procp
)
414 return &procp
->p_mlock
;
418 aio_entry_ref(aio_workq_entry
*entryp
)
420 os_ref_retain(&entryp
->aio_refcount
);
424 aio_entry_unref(aio_workq_entry
*entryp
)
426 if (os_ref_release(&entryp
->aio_refcount
) == 0) {
427 aio_free_request(entryp
);
432 aio_entry_try_workq_remove(aio_workq_entry
*entryp
)
434 /* Can only be cancelled if it's still on a work queue */
435 if (entryp
->aio_workq_link
.tqe_prev
!= NULL
) {
438 /* Will have to check again under the lock */
439 queue
= aio_entry_workq(entryp
);
440 aio_workq_lock_spin(queue
);
441 if (entryp
->aio_workq_link
.tqe_prev
!= NULL
) {
442 aio_workq_remove_entry_locked(queue
, entryp
);
443 aio_workq_unlock(queue
);
446 aio_workq_unlock(queue
);
454 aio_workq_lock_spin(aio_workq_t wq
)
456 lck_spin_lock(aio_workq_lock(wq
));
460 aio_workq_unlock(aio_workq_t wq
)
462 lck_spin_unlock(aio_workq_lock(wq
));
466 aio_workq_lock(aio_workq_t wq
)
468 return &wq
->aioq_lock
;
472 * aio_cancel - attempt to cancel one or more async IO requests currently
473 * outstanding against file descriptor uap->fd. If uap->aiocbp is not
474 * NULL then only one specific IO is cancelled (if possible). If uap->aiocbp
475 * is NULL then all outstanding async IO request for the given file
476 * descriptor are cancelled (if possible).
479 aio_cancel(proc_t p
, struct aio_cancel_args
*uap
, int *retval
)
481 struct user_aiocb my_aiocb
;
484 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel
) | DBG_FUNC_START
,
485 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, 0, 0, 0);
487 /* quick check to see if there are any async IO requests queued up */
488 if (!aio_has_any_work()) {
490 *retval
= AIO_ALLDONE
;
495 if (uap
->aiocbp
!= USER_ADDR_NULL
) {
496 if (proc_is64bit(p
)) {
497 struct user64_aiocb aiocb64
;
499 result
= copyin(uap
->aiocbp
, &aiocb64
, sizeof(aiocb64
));
501 do_munge_aiocb_user64_to_user(&aiocb64
, &my_aiocb
);
504 struct user32_aiocb aiocb32
;
506 result
= copyin(uap
->aiocbp
, &aiocb32
, sizeof(aiocb32
));
508 do_munge_aiocb_user32_to_user(&aiocb32
, &my_aiocb
);
517 /* NOTE - POSIX standard says a mismatch between the file */
518 /* descriptor passed in and the file descriptor embedded in */
519 /* the aiocb causes unspecified results. We return EBADF in */
520 /* that situation. */
521 if (uap
->fd
!= my_aiocb
.aio_fildes
) {
528 result
= do_aio_cancel_locked(p
, uap
->fd
, uap
->aiocbp
, 0);
529 ASSERT_AIO_PROC_LOCK_OWNED(p
);
541 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel
) | DBG_FUNC_END
,
542 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, result
, 0, 0);
549 * _aio_close - internal function used to clean up async IO requests for
550 * a file descriptor that is closing.
553 __private_extern__
void
554 _aio_close(proc_t p
, int fd
)
558 /* quick check to see if there are any async IO requests queued up */
559 if (!aio_has_any_work()) {
563 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_close
) | DBG_FUNC_START
,
564 VM_KERNEL_ADDRPERM(p
), fd
, 0, 0, 0);
566 /* cancel all async IO requests on our todo queues for this file descriptor */
568 error
= do_aio_cancel_locked(p
, fd
, USER_ADDR_NULL
, AIO_CLOSE_WAIT
);
569 ASSERT_AIO_PROC_LOCK_OWNED(p
);
570 if (error
== AIO_NOTCANCELED
) {
572 * AIO_NOTCANCELED is returned when we find an aio request for this process
573 * and file descriptor on the active async IO queue. Active requests cannot
574 * be cancelled so we must wait for them to complete. We will get a special
575 * wake up call on our channel used to sleep for ALL active requests to
576 * complete. This sleep channel (proc.AIO_CLEANUP_SLEEP_CHAN) is only used
577 * when we must wait for all active aio requests.
580 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_close_sleep
) | DBG_FUNC_NONE
,
581 VM_KERNEL_ADDRPERM(p
), fd
, 0, 0, 0);
583 while (aio_proc_has_active_requests_for_file(p
, fd
)) {
584 msleep(&p
->AIO_CLEANUP_SLEEP_CHAN
, aio_proc_mutex(p
), PRIBIO
, "aio_close", 0);
590 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_close
) | DBG_FUNC_END
,
591 VM_KERNEL_ADDRPERM(p
), fd
, 0, 0, 0);
596 * aio_error - return the error status associated with the async IO
597 * request referred to by uap->aiocbp. The error status is the errno
598 * value that would be set by the corresponding IO request (read, wrtie,
599 * fdatasync, or sync).
602 aio_error(proc_t p
, struct aio_error_args
*uap
, int *retval
)
604 aio_workq_entry
*entryp
;
607 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_error
) | DBG_FUNC_START
,
608 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, 0, 0, 0);
610 /* see if there are any aios to check */
611 if (!aio_has_any_work()) {
617 /* look for a match on our queue of async IO requests that have completed */
618 TAILQ_FOREACH(entryp
, &p
->p_aio_doneq
, aio_proc_link
) {
619 if (entryp
->uaiocbp
== uap
->aiocbp
) {
620 ASSERT_AIO_FROM_PROC(entryp
, p
);
622 *retval
= entryp
->errorval
;
625 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_val
) | DBG_FUNC_NONE
,
626 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, *retval
, 0, 0);
631 /* look for a match on our queue of active async IO requests */
632 TAILQ_FOREACH(entryp
, &p
->p_aio_activeq
, aio_proc_link
) {
633 if (entryp
->uaiocbp
== uap
->aiocbp
) {
634 ASSERT_AIO_FROM_PROC(entryp
, p
);
635 *retval
= EINPROGRESS
;
637 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_activeq
) | DBG_FUNC_NONE
,
638 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, *retval
, 0, 0);
646 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_error
) | DBG_FUNC_END
,
647 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, error
, 0, 0);
655 * aio_fsync - asynchronously force all IO operations associated
656 * with the file indicated by the file descriptor (uap->aiocbp->aio_fildes) and
657 * queued at the time of the call to the synchronized completion state.
658 * NOTE - we do not support op O_DSYNC at this point since we do not support the
662 aio_fsync(proc_t p
, struct aio_fsync_args
*uap
, int *retval
)
664 aio_entry_flags_t fsync_kind
;
667 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync
) | DBG_FUNC_START
,
668 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, uap
->op
, 0, 0);
671 /* 0 := O_SYNC for binary backward compatibility with Panther */
672 if (uap
->op
== O_SYNC
|| uap
->op
== 0) {
673 fsync_kind
= AIO_FSYNC
;
674 } else if (uap
->op
== O_DSYNC
) {
675 fsync_kind
= AIO_DSYNC
;
682 error
= aio_queue_async_request(p
, uap
->aiocbp
, fsync_kind
);
688 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync
) | DBG_FUNC_END
,
689 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, error
, 0, 0);
695 /* aio_read - asynchronously read uap->aiocbp->aio_nbytes bytes from the
696 * file descriptor (uap->aiocbp->aio_fildes) into the buffer
697 * (uap->aiocbp->aio_buf).
700 aio_read(proc_t p
, struct aio_read_args
*uap
, int *retval
)
704 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_read
) | DBG_FUNC_START
,
705 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, 0, 0, 0);
709 error
= aio_queue_async_request(p
, uap
->aiocbp
, AIO_READ
);
714 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_read
) | DBG_FUNC_END
,
715 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, error
, 0, 0);
722 * aio_return - return the return status associated with the async IO
723 * request referred to by uap->aiocbp. The return status is the value
724 * that would be returned by corresponding IO request (read, write,
725 * fdatasync, or sync). This is where we release kernel resources
726 * held for async IO call associated with the given aiocb pointer.
729 aio_return(proc_t p
, struct aio_return_args
*uap
, user_ssize_t
*retval
)
731 aio_workq_entry
*entryp
;
734 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_return
) | DBG_FUNC_START
,
735 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, 0, 0, 0);
737 /* See if there are any entries to check */
738 if (!aio_has_any_work()) {
745 /* look for a match on our queue of async IO requests that have completed */
746 TAILQ_FOREACH(entryp
, &p
->p_aio_doneq
, aio_proc_link
) {
747 ASSERT_AIO_FROM_PROC(entryp
, p
);
748 if (entryp
->uaiocbp
== uap
->aiocbp
) {
749 /* Done and valid for aio_return(), pull it off the list */
750 aio_proc_remove_done_locked(p
, entryp
);
752 *retval
= entryp
->returnval
;
756 aio_entry_unref(entryp
);
758 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_val
) | DBG_FUNC_NONE
,
759 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, *retval
, 0, 0);
764 /* look for a match on our queue of active async IO requests */
765 TAILQ_FOREACH(entryp
, &p
->p_aio_activeq
, aio_proc_link
) {
766 ASSERT_AIO_FROM_PROC(entryp
, p
);
767 if (entryp
->uaiocbp
== uap
->aiocbp
) {
769 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_activeq
) | DBG_FUNC_NONE
,
770 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, *retval
, 0, 0);
778 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_return
) | DBG_FUNC_END
,
779 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, error
, 0, 0);
786 * _aio_exec - internal function used to clean up async IO requests for
787 * a process that is going away due to exec(). We cancel any async IOs
788 * we can and wait for those already active. We also disable signaling
789 * for cancelled or active aio requests that complete.
790 * This routine MAY block!
792 __private_extern__
void
795 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_exec
) | DBG_FUNC_START
,
796 VM_KERNEL_ADDRPERM(p
), 0, 0, 0, 0);
800 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_exec
) | DBG_FUNC_END
,
801 VM_KERNEL_ADDRPERM(p
), 0, 0, 0, 0);
806 * _aio_exit - internal function used to clean up async IO requests for
807 * a process that is terminating (via exit() or exec()). We cancel any async IOs
808 * we can and wait for those already active. We also disable signaling
809 * for cancelled or active aio requests that complete. This routine MAY block!
811 __private_extern__
void
814 TAILQ_HEAD(, aio_workq_entry
) tofree
= TAILQ_HEAD_INITIALIZER(tofree
);
815 aio_workq_entry
*entryp
, *tmp
;
818 /* quick check to see if there are any async IO requests queued up */
819 if (!aio_has_any_work()) {
823 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit
) | DBG_FUNC_START
,
824 VM_KERNEL_ADDRPERM(p
), 0, 0, 0, 0);
829 * cancel async IO requests on the todo work queue and wait for those
830 * already active to complete.
832 error
= do_aio_cancel_locked(p
, -1, USER_ADDR_NULL
, AIO_EXIT_WAIT
);
833 ASSERT_AIO_PROC_LOCK_OWNED(p
);
834 if (error
== AIO_NOTCANCELED
) {
836 * AIO_NOTCANCELED is returned when we find an aio request for this process
837 * on the active async IO queue. Active requests cannot be cancelled so we
838 * must wait for them to complete. We will get a special wake up call on
839 * our channel used to sleep for ALL active requests to complete. This sleep
840 * channel (proc.AIO_CLEANUP_SLEEP_CHAN) is only used when we must wait for all
841 * active aio requests.
844 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit_sleep
) | DBG_FUNC_NONE
,
845 VM_KERNEL_ADDRPERM(p
), 0, 0, 0, 0);
847 while (aio_has_active_requests_for_process(p
)) {
848 msleep(&p
->AIO_CLEANUP_SLEEP_CHAN
, aio_proc_mutex(p
), PRIBIO
, "aio_exit", 0);
852 assert(!aio_has_active_requests_for_process(p
));
854 /* release all aio resources used by this process */
855 TAILQ_FOREACH_SAFE(entryp
, &p
->p_aio_doneq
, aio_proc_link
, tmp
) {
856 ASSERT_AIO_FROM_PROC(entryp
, p
);
858 aio_proc_remove_done_locked(p
, entryp
);
859 TAILQ_INSERT_TAIL(&tofree
, entryp
, aio_proc_link
);
864 /* free all the entries outside of the aio_proc_lock() */
865 TAILQ_FOREACH_SAFE(entryp
, &tofree
, aio_proc_link
, tmp
) {
866 entryp
->aio_proc_link
.tqe_prev
= NULL
;
867 aio_entry_unref(entryp
);
870 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit
) | DBG_FUNC_END
,
871 VM_KERNEL_ADDRPERM(p
), 0, 0, 0, 0);
876 should_cancel(aio_workq_entry
*entryp
, int fd
, user_addr_t aiocbp
,
877 aio_entry_flags_t reason
)
879 if (reason
& AIO_EXIT_WAIT
) {
880 /* caller is _aio_exit() */
883 if (fd
!= entryp
->aiocb
.aio_fildes
) {
884 /* not the file we're looking for */
888 * aio_cancel() or _aio_close() cancel
889 * everything for a given fd when aiocbp is NULL
891 return aiocbp
== USER_ADDR_NULL
|| entryp
->uaiocbp
== aiocbp
;
895 * do_aio_cancel_locked - cancel async IO requests (if possible). We get called by
896 * aio_cancel, close, and at exit.
897 * There are three modes of operation: 1) cancel all async IOs for a process -
898 * fd is 0 and aiocbp is NULL 2) cancel all async IOs for file descriptor - fd
899 * is > 0 and aiocbp is NULL 3) cancel one async IO associated with the given
901 * Returns -1 if no matches were found, AIO_CANCELED when we cancelled all
902 * target async IO requests, AIO_NOTCANCELED if we could not cancel all
903 * target async IO requests, and AIO_ALLDONE if all target async IO requests
904 * were already complete.
905 * WARNING - do not deference aiocbp in this routine, it may point to user
906 * land data that has not been copied in (when called from aio_cancel())
908 * Called with proc locked, and returns the same way.
911 do_aio_cancel_locked(proc_t p
, int fd
, user_addr_t aiocbp
,
912 aio_entry_flags_t reason
)
914 bool multiple_matches
= (aiocbp
== USER_ADDR_NULL
);
915 aio_workq_entry
*entryp
, *tmp
;
918 ASSERT_AIO_PROC_LOCK_OWNED(p
);
920 /* look for a match on our queue of async todo work. */
923 TAILQ_FOREACH_SAFE(entryp
, &p
->p_aio_activeq
, aio_proc_link
, tmp
) {
924 ASSERT_AIO_FROM_PROC(entryp
, p
);
926 if (!should_cancel(entryp
, fd
, aiocbp
, reason
)) {
931 /* mark the entry as blocking close or exit/exec */
932 entryp
->flags
|= reason
;
933 if ((entryp
->flags
& AIO_EXIT_WAIT
) && (entryp
->flags
& AIO_CLOSE_WAIT
)) {
934 panic("Close and exit flags set at the same time\n");
938 /* Can only be cancelled if it's still on a work queue */
939 if (aio_entry_try_workq_remove(entryp
)) {
940 entryp
->errorval
= ECANCELED
;
941 entryp
->returnval
= -1;
943 /* Now it's officially cancelled. Do the completion */
944 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_async_workq
) | DBG_FUNC_NONE
,
945 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
947 do_aio_completion_and_unlock(p
, entryp
);
951 if (multiple_matches
) {
953 * Restart from the head of the proc active queue since it
954 * may have been changed while we were away doing completion
957 * Note that if we found an uncancellable AIO before, we will
958 * either find it again or discover that it's been completed,
959 * so resetting the result will not cause us to return success
960 * despite outstanding AIOs.
969 * It's been taken off the active queue already, i.e. is in flight.
970 * All we can do is ask for notification.
972 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_activeq
) | DBG_FUNC_NONE
,
973 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
976 result
= AIO_NOTCANCELED
;
977 if (!multiple_matches
) {
983 * if we didn't find any matches on the todo or active queues then look for a
984 * match on our queue of async IO requests that have completed and if found
985 * return AIO_ALLDONE result.
987 * Proc AIO lock is still held.
990 TAILQ_FOREACH(entryp
, &p
->p_aio_doneq
, aio_proc_link
) {
991 ASSERT_AIO_FROM_PROC(entryp
, p
);
992 if (should_cancel(entryp
, fd
, aiocbp
, reason
)) {
993 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_doneq
) | DBG_FUNC_NONE
,
994 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
997 result
= AIO_ALLDONE
;
998 if (!multiple_matches
) {
1010 * aio_suspend - suspend the calling thread until at least one of the async
1011 * IO operations referenced by uap->aiocblist has completed, until a signal
1012 * interrupts the function, or uap->timeoutp time interval (optional) has
1014 * Returns 0 if one or more async IOs have completed else -1 and errno is
1015 * set appropriately - EAGAIN if timeout elapses or EINTR if an interrupt
1019 aio_suspend(proc_t p
, struct aio_suspend_args
*uap
, int *retval
)
1021 __pthread_testcancel(1);
1022 return aio_suspend_nocancel(p
, (struct aio_suspend_nocancel_args
*)uap
, retval
);
1027 aio_suspend_nocancel(proc_t p
, struct aio_suspend_nocancel_args
*uap
, int *retval
)
1032 struct user_timespec ts
;
1033 aio_workq_entry
*entryp
;
1034 user_addr_t
*aiocbpp
;
1035 size_t aiocbpp_size
;
1037 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend
) | DBG_FUNC_START
,
1038 VM_KERNEL_ADDRPERM(p
), uap
->nent
, 0, 0, 0);
1044 if (!aio_has_any_work()) {
1046 goto ExitThisRoutine
;
1049 if (uap
->nent
< 1 || uap
->nent
> aio_max_requests_per_process
||
1050 os_mul_overflow(sizeof(user_addr_t
), uap
->nent
, &aiocbpp_size
)) {
1052 goto ExitThisRoutine
;
1055 if (uap
->timeoutp
!= USER_ADDR_NULL
) {
1056 if (proc_is64bit(p
)) {
1057 struct user64_timespec temp
;
1058 error
= copyin(uap
->timeoutp
, &temp
, sizeof(temp
));
1060 ts
.tv_sec
= (user_time_t
)temp
.tv_sec
;
1061 ts
.tv_nsec
= (user_long_t
)temp
.tv_nsec
;
1064 struct user32_timespec temp
;
1065 error
= copyin(uap
->timeoutp
, &temp
, sizeof(temp
));
1067 ts
.tv_sec
= temp
.tv_sec
;
1068 ts
.tv_nsec
= temp
.tv_nsec
;
1073 goto ExitThisRoutine
;
1076 if (ts
.tv_sec
< 0 || ts
.tv_nsec
< 0 || ts
.tv_nsec
>= 1000000000) {
1078 goto ExitThisRoutine
;
1081 nanoseconds_to_absolutetime((uint64_t)ts
.tv_sec
* NSEC_PER_SEC
+ ts
.tv_nsec
,
1083 clock_absolutetime_interval_to_deadline(abstime
, &abstime
);
1086 aiocbpp
= kheap_alloc(KHEAP_TEMP
, aiocbpp_size
, Z_WAITOK
);
1087 if (aiocbpp
== NULL
|| aio_copy_in_list(p
, uap
->aiocblist
, aiocbpp
, uap
->nent
)) {
1089 goto ExitThisRoutine
;
1092 /* check list of aio requests to see if any have completed */
1093 check_for_our_aiocbp
:
1094 aio_proc_lock_spin(p
);
1095 for (i
= 0; i
< uap
->nent
; i
++) {
1098 /* NULL elements are legal so check for 'em */
1099 aiocbp
= *(aiocbpp
+ i
);
1100 if (aiocbp
== USER_ADDR_NULL
) {
1104 /* return immediately if any aio request in the list is done */
1105 TAILQ_FOREACH(entryp
, &p
->p_aio_doneq
, aio_proc_link
) {
1106 ASSERT_AIO_FROM_PROC(entryp
, p
);
1107 if (entryp
->uaiocbp
== aiocbp
) {
1111 goto ExitThisRoutine
;
1116 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend_sleep
) | DBG_FUNC_NONE
,
1117 VM_KERNEL_ADDRPERM(p
), uap
->nent
, 0, 0, 0);
1120 * wait for an async IO to complete or a signal fires or timeout expires.
1121 * we return EAGAIN (35) for timeout expiration and EINTR (4) when a signal
1122 * interrupts us. If an async IO completes before a signal fires or our
1123 * timeout expires, we get a wakeup call from aio_work_thread().
1126 error
= msleep1(&p
->AIO_SUSPEND_SLEEP_CHAN
, aio_proc_mutex(p
),
1127 PCATCH
| PWAIT
| PDROP
, "aio_suspend", abstime
);
1130 * got our wakeup call from aio_work_thread().
1131 * Since we can get a wakeup on this channel from another thread in the
1132 * same process we head back up to make sure this is for the correct aiocbp.
1133 * If it is the correct aiocbp we will return from where we do the check
1134 * (see entryp->uaiocbp == aiocbp after check_for_our_aiocbp label)
1135 * else we will fall out and just sleep again.
1137 goto check_for_our_aiocbp
;
1138 } else if (error
== EWOULDBLOCK
) {
1139 /* our timeout expired */
1142 /* we were interrupted */
1147 if (aiocbpp
!= NULL
) {
1148 kheap_free(KHEAP_TEMP
, aiocbpp
, aiocbpp_size
);
1151 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend
) | DBG_FUNC_END
,
1152 VM_KERNEL_ADDRPERM(p
), uap
->nent
, error
, 0, 0);
1158 /* aio_write - asynchronously write uap->aiocbp->aio_nbytes bytes to the
1159 * file descriptor (uap->aiocbp->aio_fildes) from the buffer
1160 * (uap->aiocbp->aio_buf).
1164 aio_write(proc_t p
, struct aio_write_args
*uap
, int *retval __unused
)
1168 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_write
) | DBG_FUNC_START
,
1169 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, 0, 0, 0);
1171 error
= aio_queue_async_request(p
, uap
->aiocbp
, AIO_WRITE
);
1173 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_write
) | DBG_FUNC_END
,
1174 VM_KERNEL_ADDRPERM(p
), uap
->aiocbp
, error
, 0, 0);
1181 aio_copy_in_list(proc_t procp
, user_addr_t aiocblist
, user_addr_t
*aiocbpp
,
1186 /* copyin our aiocb pointers from list */
1187 result
= copyin(aiocblist
, aiocbpp
,
1188 proc_is64bit(procp
) ? (nent
* sizeof(user64_addr_t
))
1189 : (nent
* sizeof(user32_addr_t
)));
1195 * We depend on a list of user_addr_t's so we need to
1196 * munge and expand when these pointers came from a
1199 if (!proc_is64bit(procp
)) {
1200 /* copy from last to first to deal with overlap */
1201 user32_addr_t
*my_ptrp
= ((user32_addr_t
*)aiocbpp
) + (nent
- 1);
1202 user_addr_t
*my_addrp
= aiocbpp
+ (nent
- 1);
1204 for (int i
= 0; i
< nent
; i
++, my_ptrp
--, my_addrp
--) {
1205 *my_addrp
= (user_addr_t
) (*my_ptrp
);
1214 aio_copy_in_sigev(proc_t procp
, user_addr_t sigp
, struct user_sigevent
*sigev
)
1218 if (sigp
== USER_ADDR_NULL
) {
1223 * We need to munge aio_sigevent since it contains pointers.
1224 * Since we do not know if sigev_value is an int or a ptr we do
1225 * NOT cast the ptr to a user_addr_t. This means if we send
1226 * this info back to user space we need to remember sigev_value
1227 * was not expanded for the 32-bit case.
1229 * Notes: This does NOT affect us since we don't support
1230 * sigev_value yet in the aio context.
1232 if (proc_is64bit(procp
)) {
1234 struct user64_sigevent sigevent64
;
1236 result
= copyin(sigp
, &sigevent64
, sizeof(sigevent64
));
1238 sigev
->sigev_notify
= sigevent64
.sigev_notify
;
1239 sigev
->sigev_signo
= sigevent64
.sigev_signo
;
1240 sigev
->sigev_value
.size_equivalent
.sival_int
= sigevent64
.sigev_value
.size_equivalent
.sival_int
;
1241 sigev
->sigev_notify_function
= sigevent64
.sigev_notify_function
;
1242 sigev
->sigev_notify_attributes
= sigevent64
.sigev_notify_attributes
;
1245 panic("64bit process on 32bit kernel is not supported");
1248 struct user32_sigevent sigevent32
;
1250 result
= copyin(sigp
, &sigevent32
, sizeof(sigevent32
));
1252 sigev
->sigev_notify
= sigevent32
.sigev_notify
;
1253 sigev
->sigev_signo
= sigevent32
.sigev_signo
;
1254 sigev
->sigev_value
.size_equivalent
.sival_int
= sigevent32
.sigev_value
.sival_int
;
1255 sigev
->sigev_notify_function
= CAST_USER_ADDR_T(sigevent32
.sigev_notify_function
);
1256 sigev
->sigev_notify_attributes
= CAST_USER_ADDR_T(sigevent32
.sigev_notify_attributes
);
1269 * validate user_sigevent. at this point we only support
1270 * sigev_notify equal to SIGEV_SIGNAL or SIGEV_NONE. this means
1271 * sigev_value, sigev_notify_function, and sigev_notify_attributes
1272 * are ignored, since SIGEV_THREAD is unsupported. This is consistent
1273 * with no [RTS] (RalTime Signal) option group support.
1276 aio_sigev_validate(const struct user_sigevent
*sigev
)
1278 switch (sigev
->sigev_notify
) {
1283 /* make sure we have a valid signal number */
1284 signum
= sigev
->sigev_signo
;
1285 if (signum
<= 0 || signum
>= NSIG
||
1286 signum
== SIGKILL
|| signum
== SIGSTOP
) {
1296 /* Unsupported [RTS] */
1307 * aio_try_enqueue_work_locked
1309 * Queue up the entry on the aio asynchronous work queue in priority order
1310 * based on the relative priority of the request. We calculate the relative
1311 * priority using the nice value of the caller and the value
1313 * Parameters: procp Process queueing the I/O
1314 * entryp The work queue entry being queued
1315 * leader The work leader if any
1317 * Returns: Wether the enqueue was successful
1319 * Notes: This function is used for both lio_listio and aio
1321 * XXX: At some point, we may have to consider thread priority
1322 * rather than process priority, but we don't maintain the
1323 * adjusted priority for threads the POSIX way.
1325 * Called with proc locked.
1328 aio_try_enqueue_work_locked(proc_t procp
, aio_workq_entry
*entryp
,
1329 aio_workq_entry
*leader
)
1331 aio_workq_t queue
= aio_entry_workq(entryp
);
1333 ASSERT_AIO_PROC_LOCK_OWNED(procp
);
1335 /* Onto proc queue */
1336 if (!aio_try_proc_insert_active_locked(procp
, entryp
)) {
1341 aio_entry_ref(leader
); /* consumed in do_aio_completion_and_unlock */
1342 leader
->lio_pending
++;
1343 entryp
->lio_leader
= leader
;
1346 /* And work queue */
1347 aio_entry_ref(entryp
); /* consumed in do_aio_completion_and_unlock */
1348 aio_workq_lock_spin(queue
);
1349 aio_workq_add_entry_locked(queue
, entryp
);
1350 waitq_wakeup64_one(&queue
->aioq_waitq
, CAST_EVENT64_T(queue
),
1351 THREAD_AWAKENED
, WAITQ_ALL_PRIORITIES
);
1352 aio_workq_unlock(queue
);
1354 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_AIO
, AIO_work_queued
) | DBG_FUNC_START
,
1355 VM_KERNEL_ADDRPERM(procp
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
1356 entryp
->flags
, entryp
->aiocb
.aio_fildes
, 0);
1357 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_AIO
, AIO_work_queued
) | DBG_FUNC_END
,
1358 entryp
->aiocb
.aio_offset
, 0, entryp
->aiocb
.aio_nbytes
, 0, 0);
1364 * lio_listio - initiate a list of IO requests. We process the list of
1365 * aiocbs either synchronously (mode == LIO_WAIT) or asynchronously
1366 * (mode == LIO_NOWAIT).
1368 * The caller gets error and return status for each aiocb in the list
1369 * via aio_error and aio_return. We must keep completed requests until
1370 * released by the aio_return call.
1373 lio_listio(proc_t p
, struct lio_listio_args
*uap
, int *retval __unused
)
1375 aio_workq_entry
*entries
[AIO_LISTIO_MAX
] = { };
1376 user_addr_t aiocbpp
[AIO_LISTIO_MAX
];
1377 struct user_sigevent aiosigev
= { };
1381 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_listio
) | DBG_FUNC_START
,
1382 VM_KERNEL_ADDRPERM(p
), uap
->nent
, uap
->mode
, 0, 0);
1384 if (!(uap
->mode
== LIO_NOWAIT
|| uap
->mode
== LIO_WAIT
)) {
1389 if (uap
->nent
< 1 || uap
->nent
> AIO_LISTIO_MAX
) {
1395 * Use sigevent passed in to lio_listio for each of our calls, but
1396 * only do completion notification after the last request completes.
1398 if (uap
->sigp
!= USER_ADDR_NULL
) {
1399 result
= aio_copy_in_sigev(p
, uap
->sigp
, &aiosigev
);
1403 result
= aio_sigev_validate(&aiosigev
);
1409 if (aio_copy_in_list(p
, uap
->aiocblist
, aiocbpp
, uap
->nent
)) {
1415 * allocate/parse all entries
1417 for (int i
= 0; i
< uap
->nent
; i
++) {
1418 aio_workq_entry
*entryp
;
1420 /* NULL elements are legal so check for 'em */
1421 if (aiocbpp
[i
] == USER_ADDR_NULL
) {
1425 entryp
= aio_create_queue_entry(p
, aiocbpp
[i
], AIO_LIO
);
1426 if (entryp
== NULL
) {
1432 * This refcount is cleaned up on exit if the entry
1435 entries
[lio_count
++] = entryp
;
1436 if (uap
->mode
== LIO_NOWAIT
) {
1437 /* Set signal hander, if any */
1438 entryp
->aiocb
.aio_sigevent
= aiosigev
;
1442 if (lio_count
== 0) {
1443 /* There's nothing to submit */
1448 * Past this point we're commited and will not bail out
1450 * - keep a reference on the leader for LIO_WAIT
1451 * - perform the submissions and optionally wait
1454 aio_workq_entry
*leader
= entries
[0];
1455 if (uap
->mode
== LIO_WAIT
) {
1456 aio_entry_ref(leader
); /* consumed below */
1459 aio_proc_lock_spin(p
);
1461 for (int i
= 0; i
< lio_count
; i
++) {
1462 if (aio_try_enqueue_work_locked(p
, entries
[i
], leader
)) {
1463 entries
[i
] = NULL
; /* the entry was submitted */
1469 if (uap
->mode
== LIO_WAIT
&& result
== 0) {
1470 leader
->flags
|= AIO_LIO_WAIT
;
1472 while (leader
->lio_pending
) {
1473 /* If we were interrupted, fail out (even if all finished) */
1474 if (msleep(leader
, aio_proc_mutex(p
),
1475 PCATCH
| PRIBIO
| PSPIN
, "lio_listio", 0) != 0) {
1481 leader
->flags
&= ~AIO_LIO_WAIT
;
1486 if (uap
->mode
== LIO_WAIT
) {
1487 aio_entry_unref(leader
);
1491 /* Consume unsubmitted entries */
1492 for (int i
= 0; i
< lio_count
; i
++) {
1494 aio_entry_unref(entries
[i
]);
1498 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_listio
) | DBG_FUNC_END
,
1499 VM_KERNEL_ADDRPERM(p
), result
, 0, 0, 0);
1506 * aio worker thread. this is where all the real work gets done.
1507 * we get a wake up call on sleep channel &aio_anchor.aio_async_workq
1508 * after new work is queued up.
1510 __attribute__((noreturn
))
1512 aio_work_thread(void *arg __unused
, wait_result_t wr __unused
)
1514 aio_workq_entry
*entryp
;
1516 vm_map_t currentmap
;
1517 vm_map_t oldmap
= VM_MAP_NULL
;
1518 task_t oldaiotask
= TASK_NULL
;
1519 struct uthread
*uthreadp
= NULL
;
1524 * returns with the entry ref'ed.
1525 * sleeps until work is available.
1527 entryp
= aio_get_some_work();
1530 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_worker_thread
) | DBG_FUNC_START
,
1531 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
1532 entryp
->flags
, 0, 0);
1535 * Assume the target's address space identity for the duration
1536 * of the IO. Note: don't need to have the entryp locked,
1537 * because the proc and map don't change until it's freed.
1539 currentmap
= get_task_map((current_proc())->task
);
1540 if (currentmap
!= entryp
->aio_map
) {
1541 uthreadp
= (struct uthread
*) get_bsdthread_info(current_thread());
1542 oldaiotask
= uthreadp
->uu_aio_task
;
1544 * workq entries at this stage cause _aio_exec() and _aio_exit() to
1545 * block until we hit `do_aio_completion_and_unlock()` below,
1546 * which means that it is safe to dereference p->task without
1547 * holding a lock or taking references.
1549 uthreadp
->uu_aio_task
= p
->task
;
1550 oldmap
= vm_map_switch(entryp
->aio_map
);
1553 if ((entryp
->flags
& AIO_READ
) != 0) {
1554 error
= do_aio_read(entryp
);
1555 } else if ((entryp
->flags
& AIO_WRITE
) != 0) {
1556 error
= do_aio_write(entryp
);
1557 } else if ((entryp
->flags
& (AIO_FSYNC
| AIO_DSYNC
)) != 0) {
1558 error
= do_aio_fsync(entryp
);
1563 /* Restore old map */
1564 if (currentmap
!= entryp
->aio_map
) {
1565 vm_map_switch(oldmap
);
1566 uthreadp
->uu_aio_task
= oldaiotask
;
1569 /* liberate unused map */
1570 vm_map_deallocate(entryp
->aio_map
);
1571 entryp
->aio_map
= VM_MAP_NULL
;
1573 KERNEL_DEBUG(SDDBG_CODE(DBG_BSD_AIO
, AIO_worker_thread
) | DBG_FUNC_END
,
1574 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
1575 entryp
->errorval
, entryp
->returnval
, 0);
1577 /* we're done with the IO request so pop it off the active queue and */
1578 /* push it on the done queue */
1580 entryp
->errorval
= error
;
1581 do_aio_completion_and_unlock(p
, entryp
);
1587 * aio_get_some_work - get the next async IO request that is ready to be executed.
1588 * aio_fsync complicates matters a bit since we cannot do the fsync until all async
1589 * IO requests at the time the aio_fsync call came in have completed.
1590 * NOTE - AIO_LOCK must be held by caller
1592 static aio_workq_entry
*
1593 aio_get_some_work(void)
1595 aio_workq_entry
*entryp
= NULL
;
1596 aio_workq_t queue
= NULL
;
1598 /* Just one queue for the moment. In the future there will be many. */
1599 queue
= &aio_anchor
.aio_async_workqs
[0];
1600 aio_workq_lock_spin(queue
);
1603 * Hold the queue lock.
1605 * pop some work off the work queue and add to our active queue
1606 * Always start with the queue lock held.
1608 while ((entryp
= TAILQ_FIRST(&queue
->aioq_entries
))) {
1610 * Pull of of work queue. Once it's off, it can't be cancelled,
1611 * so we can take our ref once we drop the queue lock.
1614 aio_workq_remove_entry_locked(queue
, entryp
);
1616 aio_workq_unlock(queue
);
1619 * Check if it's an fsync that must be delayed. No need to lock the entry;
1620 * that flag would have been set at initialization.
1622 if ((entryp
->flags
& AIO_FSYNC
) != 0) {
1624 * Check for unfinished operations on the same file
1625 * in this proc's queue.
1627 aio_proc_lock_spin(entryp
->procp
);
1628 if (aio_delay_fsync_request(entryp
)) {
1629 /* It needs to be delayed. Put it back on the end of the work queue */
1630 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync_delay
) | DBG_FUNC_NONE
,
1631 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
1634 aio_proc_unlock(entryp
->procp
);
1636 aio_workq_lock_spin(queue
);
1637 aio_workq_add_entry_locked(queue
, entryp
);
1640 aio_proc_unlock(entryp
->procp
);
1646 /* We will wake up when someone enqueues something */
1647 waitq_assert_wait64(&queue
->aioq_waitq
, CAST_EVENT64_T(queue
), THREAD_UNINT
, 0);
1648 aio_workq_unlock(queue
);
1649 thread_block(aio_work_thread
);
1651 __builtin_unreachable();
1655 * aio_delay_fsync_request - look to see if this aio_fsync request should be delayed.
1656 * A big, simple hammer: only send it off if it's the most recently filed IO which has
1657 * not been completed.
1660 aio_delay_fsync_request(aio_workq_entry
*entryp
)
1662 if (proc_in_teardown(entryp
->procp
)) {
1664 * we can't delay FSYNCS when in teardown as it will confuse _aio_exit,
1665 * if it was dequeued, then we must now commit to it
1670 if (entryp
== TAILQ_FIRST(&entryp
->procp
->p_aio_activeq
)) {
1677 static aio_workq_entry
*
1678 aio_create_queue_entry(proc_t procp
, user_addr_t aiocbp
, aio_entry_flags_t flags
)
1680 aio_workq_entry
*entryp
;
1682 entryp
= zalloc_flags(aio_workq_zonep
, Z_WAITOK
| Z_ZERO
);
1683 entryp
->procp
= procp
;
1684 entryp
->uaiocbp
= aiocbp
;
1685 entryp
->flags
= flags
;
1686 /* consumed in aio_return or _aio_exit */
1687 os_ref_init(&entryp
->aio_refcount
, &aio_refgrp
);
1689 if (proc_is64bit(procp
)) {
1690 struct user64_aiocb aiocb64
;
1692 if (copyin(aiocbp
, &aiocb64
, sizeof(aiocb64
)) != 0) {
1695 do_munge_aiocb_user64_to_user(&aiocb64
, &entryp
->aiocb
);
1697 struct user32_aiocb aiocb32
;
1699 if (copyin(aiocbp
, &aiocb32
, sizeof(aiocb32
)) != 0) {
1702 do_munge_aiocb_user32_to_user(&aiocb32
, &entryp
->aiocb
);
1705 /* do some more validation on the aiocb and embedded file descriptor */
1706 if (aio_validate(procp
, entryp
) != 0) {
1710 /* get a reference to the user land map in order to keep it around */
1711 entryp
->aio_map
= get_task_map(procp
->task
);
1712 vm_map_reference(entryp
->aio_map
);
1714 /* get a reference on the current_thread, which is passed in vfs_context. */
1715 entryp
->thread
= current_thread();
1716 thread_reference(entryp
->thread
);
1720 zfree(aio_workq_zonep
, entryp
);
1726 * aio_queue_async_request - queue up an async IO request on our work queue then
1727 * wake up one of our worker threads to do the actual work. We get a reference
1728 * to our caller's user land map in order to keep it around while we are
1729 * processing the request.
1732 aio_queue_async_request(proc_t procp
, user_addr_t aiocbp
,
1733 aio_entry_flags_t flags
)
1735 aio_workq_entry
*entryp
;
1738 entryp
= aio_create_queue_entry(procp
, aiocbp
, flags
);
1739 if (entryp
== NULL
) {
1744 aio_proc_lock_spin(procp
);
1745 if (!aio_try_enqueue_work_locked(procp
, entryp
, NULL
)) {
1749 aio_proc_unlock(procp
);
1754 * This entry has not been queued up so no worries about
1755 * unlocked state and aio_map
1757 aio_proc_unlock(procp
);
1758 aio_free_request(entryp
);
1765 * aio_free_request - remove our reference on the user land map and
1766 * free the work queue entry resources. The entry is off all lists
1767 * and has zero refcount, so no one can have a pointer to it.
1770 aio_free_request(aio_workq_entry
*entryp
)
1772 if (entryp
->aio_proc_link
.tqe_prev
|| entryp
->aio_workq_link
.tqe_prev
) {
1773 panic("aio_workq_entry %p being freed while still enqueued", entryp
);
1776 /* remove our reference to the user land map. */
1777 if (VM_MAP_NULL
!= entryp
->aio_map
) {
1778 vm_map_deallocate(entryp
->aio_map
);
1781 /* remove our reference to thread which enqueued the request */
1782 if (NULL
!= entryp
->thread
) {
1783 thread_deallocate(entryp
->thread
);
1786 zfree(aio_workq_zonep
, entryp
);
1793 * validate the aiocb passed in by one of the aio syscalls.
1796 aio_validate(proc_t p
, aio_workq_entry
*entryp
)
1798 struct fileproc
*fp
;
1804 if ((entryp
->flags
& AIO_LIO
) != 0) {
1805 if (entryp
->aiocb
.aio_lio_opcode
== LIO_READ
) {
1806 entryp
->flags
|= AIO_READ
;
1807 } else if (entryp
->aiocb
.aio_lio_opcode
== LIO_WRITE
) {
1808 entryp
->flags
|= AIO_WRITE
;
1809 } else if (entryp
->aiocb
.aio_lio_opcode
== LIO_NOP
) {
1817 if ((entryp
->flags
& (AIO_WRITE
| AIO_FSYNC
| AIO_DSYNC
)) != 0) {
1821 if ((entryp
->flags
& (AIO_READ
| AIO_WRITE
)) != 0) {
1822 if (entryp
->aiocb
.aio_nbytes
> INT_MAX
||
1823 entryp
->aiocb
.aio_buf
== USER_ADDR_NULL
||
1824 entryp
->aiocb
.aio_offset
< 0) {
1829 result
= aio_sigev_validate(&entryp
->aiocb
.aio_sigevent
);
1834 /* validate the file descriptor and that the file was opened
1835 * for the appropriate read / write access.
1839 fp
= fp_get_noref_locked(p
, entryp
->aiocb
.aio_fildes
);
1842 } else if ((fp
->fp_glob
->fg_flag
& flag
) == 0) {
1843 /* we don't have read or write access */
1845 } else if (FILEGLOB_DTYPE(fp
->fp_glob
) != DTYPE_VNODE
) {
1846 /* this is not a file */
1849 fp
->fp_flags
|= FP_AIOISSUED
;
1858 * do_aio_completion_and_unlock. Handle async IO completion.
1861 do_aio_completion_and_unlock(proc_t p
, aio_workq_entry
*entryp
)
1863 aio_workq_entry
*leader
= entryp
->lio_leader
;
1864 int lio_pending
= 0;
1865 bool do_signal
= false;
1867 ASSERT_AIO_PROC_LOCK_OWNED(p
);
1869 aio_proc_move_done_locked(p
, entryp
);
1872 lio_pending
= --leader
->lio_pending
;
1873 if (lio_pending
< 0) {
1874 panic("lio_pending accounting mistake");
1876 if (lio_pending
== 0 && (leader
->flags
& AIO_LIO_WAIT
)) {
1879 entryp
->lio_leader
= NULL
; /* no dangling pointers please */
1883 * need to handle case where a process is trying to exit, exec, or
1884 * close and is currently waiting for active aio requests to complete.
1885 * If AIO_CLEANUP_WAIT is set then we need to look to see if there are any
1886 * other requests in the active queue for this process. If there are
1887 * none then wakeup using the AIO_CLEANUP_SLEEP_CHAN tsleep channel.
1888 * If there are some still active then do nothing - we only want to
1889 * wakeup when all active aio requests for the process are complete.
1891 if (__improbable(entryp
->flags
& AIO_EXIT_WAIT
)) {
1892 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_cleanup_wait
) | DBG_FUNC_NONE
,
1893 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
1896 if (!aio_has_active_requests_for_process(p
)) {
1898 * no active aio requests for this process, continue exiting. In this
1899 * case, there should be no one else waiting ont he proc in AIO...
1901 wakeup_one((caddr_t
)&p
->AIO_CLEANUP_SLEEP_CHAN
);
1903 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_cleanup_wake
) | DBG_FUNC_NONE
,
1904 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
1907 } else if (entryp
->aiocb
.aio_sigevent
.sigev_notify
== SIGEV_SIGNAL
) {
1909 * If this was the last request in the group, or not part of
1910 * a group, and that a signal is desired, send one.
1912 do_signal
= (lio_pending
== 0);
1915 if (__improbable(entryp
->flags
& AIO_CLOSE_WAIT
)) {
1916 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_cleanup_wait
) | DBG_FUNC_NONE
,
1917 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
1920 if (!aio_proc_has_active_requests_for_file(p
, entryp
->aiocb
.aio_fildes
)) {
1921 /* Can't wakeup_one(); multiple closes might be in progress. */
1922 wakeup(&p
->AIO_CLEANUP_SLEEP_CHAN
);
1924 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_cleanup_wake
) | DBG_FUNC_NONE
,
1925 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
1933 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_sig
) | DBG_FUNC_NONE
,
1934 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
),
1935 entryp
->aiocb
.aio_sigevent
.sigev_signo
, 0, 0);
1937 psignal(p
, entryp
->aiocb
.aio_sigevent
.sigev_signo
);
1941 * A thread in aio_suspend() wants to known about completed IOs. If it checked
1942 * the done list before we moved our AIO there, then it already asserted its wait,
1943 * and we can wake it up without holding the lock. If it checked the list after
1944 * we did our move, then it already has seen the AIO that we moved. Herego, we
1945 * can do our wakeup without holding the lock.
1947 wakeup(&p
->AIO_SUSPEND_SLEEP_CHAN
);
1948 KERNEL_DEBUG(BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_suspend_wake
) | DBG_FUNC_NONE
,
1949 VM_KERNEL_ADDRPERM(p
), VM_KERNEL_ADDRPERM(entryp
->uaiocbp
), 0, 0, 0);
1951 aio_entry_unref(entryp
); /* see aio_try_enqueue_work_locked */
1953 aio_entry_unref(leader
); /* see lio_listio */
1962 do_aio_read(aio_workq_entry
*entryp
)
1964 struct proc
*p
= entryp
->procp
;
1965 struct fileproc
*fp
;
1968 if ((error
= fp_lookup(p
, entryp
->aiocb
.aio_fildes
, &fp
, 0))) {
1972 if (fp
->fp_glob
->fg_flag
& FREAD
) {
1973 struct vfs_context context
= {
1974 .vc_thread
= entryp
->thread
, /* XXX */
1975 .vc_ucred
= fp
->fp_glob
->fg_cred
,
1978 error
= dofileread(&context
, fp
,
1979 entryp
->aiocb
.aio_buf
,
1980 entryp
->aiocb
.aio_nbytes
,
1981 entryp
->aiocb
.aio_offset
, FOF_OFFSET
,
1982 &entryp
->returnval
);
1987 fp_drop(p
, entryp
->aiocb
.aio_fildes
, fp
, 0);
1996 do_aio_write(aio_workq_entry
*entryp
)
1998 struct proc
*p
= entryp
->procp
;
1999 struct fileproc
*fp
;
2002 if ((error
= fp_lookup(p
, entryp
->aiocb
.aio_fildes
, &fp
, 0))) {
2006 if (fp
->fp_glob
->fg_flag
& FWRITE
) {
2007 struct vfs_context context
= {
2008 .vc_thread
= entryp
->thread
, /* XXX */
2009 .vc_ucred
= fp
->fp_glob
->fg_cred
,
2011 int flags
= FOF_PCRED
;
2013 if ((fp
->fp_glob
->fg_flag
& O_APPEND
) == 0) {
2014 flags
|= FOF_OFFSET
;
2017 /* NB: tell dofilewrite the offset, and to use the proc cred */
2018 error
= dofilewrite(&context
,
2020 entryp
->aiocb
.aio_buf
,
2021 entryp
->aiocb
.aio_nbytes
,
2022 entryp
->aiocb
.aio_offset
,
2024 &entryp
->returnval
);
2029 fp_drop(p
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2035 * aio_has_active_requests_for_process - return whether the process has active
2039 aio_has_active_requests_for_process(proc_t procp
)
2041 return !TAILQ_EMPTY(&procp
->p_aio_activeq
);
2045 * Called with the proc locked.
2048 aio_proc_has_active_requests_for_file(proc_t procp
, int fd
)
2050 aio_workq_entry
*entryp
;
2052 TAILQ_FOREACH(entryp
, &procp
->p_aio_activeq
, aio_proc_link
) {
2053 if (entryp
->aiocb
.aio_fildes
== fd
) {
2066 do_aio_fsync(aio_workq_entry
*entryp
)
2068 struct proc
*p
= entryp
->procp
;
2070 struct fileproc
*fp
;
2075 * We are never called unless either AIO_FSYNC or AIO_DSYNC are set.
2077 * If AIO_DSYNC is set, we can tell the lower layers that it is OK
2078 * to mark for update the metadata not strictly necessary for data
2079 * retrieval, rather than forcing it to disk.
2081 * If AIO_FSYNC is set, we have to also wait for metadata not really
2082 * necessary to data retrival are committed to stable storage (e.g.
2083 * atime, mtime, ctime, etc.).
2085 * Metadata necessary for data retrieval ust be committed to stable
2086 * storage in either case (file length, etc.).
2088 if (entryp
->flags
& AIO_FSYNC
) {
2089 sync_flag
= MNT_WAIT
;
2091 sync_flag
= MNT_DWAIT
;
2094 error
= fp_get_ftype(p
, entryp
->aiocb
.aio_fildes
, DTYPE_VNODE
, ENOTSUP
, &fp
);
2096 entryp
->returnval
= -1;
2099 vp
= fp
->fp_glob
->fg_data
;
2101 if ((error
= vnode_getwithref(vp
)) == 0) {
2102 struct vfs_context context
= {
2103 .vc_thread
= entryp
->thread
, /* XXX */
2104 .vc_ucred
= fp
->fp_glob
->fg_cred
,
2107 error
= VNOP_FSYNC(vp
, sync_flag
, &context
);
2109 (void)vnode_put(vp
);
2111 entryp
->returnval
= -1;
2114 fp_drop(p
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2120 * is_already_queued - runs through our queues to see if the given
2121 * aiocbp / process is there. Returns TRUE if there is a match
2122 * on any of our aio queues.
2124 * Called with proc aio lock held (can be held spin)
2127 is_already_queued(proc_t procp
, user_addr_t aiocbp
)
2129 aio_workq_entry
*entryp
;
2134 /* look for matches on our queue of async IO requests that have completed */
2135 TAILQ_FOREACH(entryp
, &procp
->p_aio_doneq
, aio_proc_link
) {
2136 if (aiocbp
== entryp
->uaiocbp
) {
2138 goto ExitThisRoutine
;
2142 /* look for matches on our queue of active async IO requests */
2143 TAILQ_FOREACH(entryp
, &procp
->p_aio_activeq
, aio_proc_link
) {
2144 if (aiocbp
== entryp
->uaiocbp
) {
2146 goto ExitThisRoutine
;
2156 * aio initialization
2158 __private_extern__
void
2161 for (int i
= 0; i
< AIO_NUM_WORK_QUEUES
; i
++) {
2162 aio_workq_init(&aio_anchor
.aio_async_workqs
[i
]);
2165 _aio_create_worker_threads(aio_worker_threads
);
2170 * aio worker threads created here.
2172 __private_extern__
void
2173 _aio_create_worker_threads(int num
)
2177 /* create some worker threads to handle the async IO requests */
2178 for (i
= 0; i
< num
; i
++) {
2181 if (KERN_SUCCESS
!= kernel_thread_start(aio_work_thread
, NULL
, &myThread
)) {
2182 printf("%s - failed to create a work thread \n", __FUNCTION__
);
2184 thread_deallocate(myThread
);
2190 * Return the current activation utask
2195 return ((struct uthread
*)get_bsdthread_info(current_thread()))->uu_aio_task
;
2200 * In the case of an aiocb from a
2201 * 32-bit process we need to expand some longs and pointers to the correct
2202 * sizes in order to let downstream code always work on the same type of
2203 * aiocb (in our case that is a user_aiocb)
2206 do_munge_aiocb_user32_to_user(struct user32_aiocb
*my_aiocbp
, struct user_aiocb
*the_user_aiocbp
)
2208 the_user_aiocbp
->aio_fildes
= my_aiocbp
->aio_fildes
;
2209 the_user_aiocbp
->aio_offset
= my_aiocbp
->aio_offset
;
2210 the_user_aiocbp
->aio_buf
= CAST_USER_ADDR_T(my_aiocbp
->aio_buf
);
2211 the_user_aiocbp
->aio_nbytes
= my_aiocbp
->aio_nbytes
;
2212 the_user_aiocbp
->aio_reqprio
= my_aiocbp
->aio_reqprio
;
2213 the_user_aiocbp
->aio_lio_opcode
= my_aiocbp
->aio_lio_opcode
;
2215 /* special case here. since we do not know if sigev_value is an */
2216 /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */
2217 /* means if we send this info back to user space we need to remember */
2218 /* sigev_value was not expanded for the 32-bit case. */
2219 /* NOTE - this does NOT affect us since we don't support sigev_value */
2220 /* yet in the aio context. */
2222 the_user_aiocbp
->aio_sigevent
.sigev_notify
= my_aiocbp
->aio_sigevent
.sigev_notify
;
2223 the_user_aiocbp
->aio_sigevent
.sigev_signo
= my_aiocbp
->aio_sigevent
.sigev_signo
;
2224 the_user_aiocbp
->aio_sigevent
.sigev_value
.size_equivalent
.sival_int
=
2225 my_aiocbp
->aio_sigevent
.sigev_value
.sival_int
;
2226 the_user_aiocbp
->aio_sigevent
.sigev_notify_function
=
2227 CAST_USER_ADDR_T(my_aiocbp
->aio_sigevent
.sigev_notify_function
);
2228 the_user_aiocbp
->aio_sigevent
.sigev_notify_attributes
=
2229 CAST_USER_ADDR_T(my_aiocbp
->aio_sigevent
.sigev_notify_attributes
);
2232 /* Similar for 64-bit user process, so that we don't need to satisfy
2233 * the alignment constraints of the original user64_aiocb
2239 do_munge_aiocb_user64_to_user(struct user64_aiocb
*my_aiocbp
, struct user_aiocb
*the_user_aiocbp
)
2242 the_user_aiocbp
->aio_fildes
= my_aiocbp
->aio_fildes
;
2243 the_user_aiocbp
->aio_offset
= my_aiocbp
->aio_offset
;
2244 the_user_aiocbp
->aio_buf
= my_aiocbp
->aio_buf
;
2245 the_user_aiocbp
->aio_nbytes
= my_aiocbp
->aio_nbytes
;
2246 the_user_aiocbp
->aio_reqprio
= my_aiocbp
->aio_reqprio
;
2247 the_user_aiocbp
->aio_lio_opcode
= my_aiocbp
->aio_lio_opcode
;
2249 the_user_aiocbp
->aio_sigevent
.sigev_notify
= my_aiocbp
->aio_sigevent
.sigev_notify
;
2250 the_user_aiocbp
->aio_sigevent
.sigev_signo
= my_aiocbp
->aio_sigevent
.sigev_signo
;
2251 the_user_aiocbp
->aio_sigevent
.sigev_value
.size_equivalent
.sival_int
=
2252 my_aiocbp
->aio_sigevent
.sigev_value
.size_equivalent
.sival_int
;
2253 the_user_aiocbp
->aio_sigevent
.sigev_notify_function
=
2254 my_aiocbp
->aio_sigevent
.sigev_notify_function
;
2255 the_user_aiocbp
->aio_sigevent
.sigev_notify_attributes
=
2256 my_aiocbp
->aio_sigevent
.sigev_notify_attributes
;
2258 #pragma unused(my_aiocbp, the_user_aiocbp)
2259 panic("64bit process on 32bit kernel is not supported");