2 * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * 1) ramesh is looking into how to replace taking a reference on
33 * the user's map (vm_map_reference()) since it is believed that
34 * would not hold the process for us.
35 * 2) david is looking into a way for us to set the priority of the
36 * worker threads to match that of the user's thread when the
37 * async IO was queued.
42 * This file contains support for the POSIX 1003.1B AIO/LIO facility.
45 #include <sys/systm.h>
46 #include <sys/fcntl.h>
47 #include <sys/file_internal.h>
48 #include <sys/filedesc.h>
49 #include <sys/kernel.h>
50 #include <sys/vnode_internal.h>
51 #include <sys/malloc.h>
52 #include <sys/mount_internal.h>
53 #include <sys/param.h>
54 #include <sys/proc_internal.h>
55 #include <sys/sysctl.h>
56 #include <sys/unistd.h>
59 #include <sys/aio_kern.h>
60 #include <sys/sysproto.h>
62 #include <machine/limits.h>
64 #include <mach/mach_types.h>
65 #include <kern/kern_types.h>
66 #include <kern/zalloc.h>
67 #include <kern/task.h>
68 #include <kern/sched_prim.h>
70 #include <vm/vm_map.h>
72 #include <sys/kdebug.h>
73 #define AIO_work_queued 1
74 #define AIO_worker_wake 2
75 #define AIO_completion_sig 3
76 #define AIO_completion_cleanup_wait 4
77 #define AIO_completion_cleanup_wake 5
78 #define AIO_completion_suspend_wake 6
79 #define AIO_fsync_delay 7
81 #define AIO_cancel_async_workq 11
82 #define AIO_cancel_sync_workq 12
83 #define AIO_cancel_activeq 13
84 #define AIO_cancel_doneq 14
90 #define AIO_error_val 61
91 #define AIO_error_activeq 62
92 #define AIO_error_workq 63
94 #define AIO_return_val 71
95 #define AIO_return_activeq 72
96 #define AIO_return_workq 73
99 #define AIO_exit_sleep 91
100 #define AIO_close 100
101 #define AIO_close_sleep 101
102 #define AIO_suspend 110
103 #define AIO_suspend_sleep 111
104 #define AIO_worker_thread 120
108 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
112 * aio requests queue up on the aio_async_workq or lio_sync_workq (for
113 * lio_listio LIO_WAIT). Requests then move to the per process aio_activeq
114 * (proc.aio_activeq) when one of our worker threads start the IO.
115 * And finally, requests move to the per process aio_doneq (proc.aio_doneq)
116 * when the IO request completes. The request remains on aio_doneq until
117 * user process calls aio_return or the process exits, either way that is our
118 * trigger to release aio resources.
122 int aio_async_workq_count
; /* entries on aio_async_workq */
123 int lio_sync_workq_count
; /* entries on lio_sync_workq */
124 int aio_active_count
; /* entries on all active queues (proc.aio_activeq) */
125 int aio_done_count
; /* entries on all done queues (proc.aio_doneq) */
126 TAILQ_HEAD( , aio_workq_entry
) aio_async_workq
;
127 TAILQ_HEAD( , aio_workq_entry
) lio_sync_workq
;
129 typedef struct aio_anchor_cb aio_anchor_cb
;
133 * Notes on aio sleep / wake channels.
134 * We currently pick a couple fields within the proc structure that will allow
135 * us sleep channels that currently do not collide with any other kernel routines.
136 * At this time, for binary compatibility reasons, we cannot create new proc fields.
138 #define AIO_SUSPEND_SLEEP_CHAN aio_active_count
139 #define AIO_CLEANUP_SLEEP_CHAN aio_done_count
143 * aysnc IO locking macros used to protect critical sections.
145 #define AIO_LOCK lck_mtx_lock(aio_lock)
146 #define AIO_UNLOCK lck_mtx_unlock(aio_lock)
152 static int aio_active_requests_for_process(proc_t procp
);
153 static boolean_t
aio_delay_fsync_request( aio_workq_entry
*entryp
);
154 static int aio_free_request( aio_workq_entry
*entryp
, vm_map_t the_map
);
155 static int aio_get_all_queues_count( void );
156 static int aio_get_process_count(proc_t procp
);
157 static aio_workq_entry
* aio_get_some_work( void );
158 static boolean_t
aio_last_group_io( aio_workq_entry
*entryp
);
159 static void aio_mark_requests( aio_workq_entry
*entryp
);
160 static int aio_queue_async_request(proc_t procp
,
163 static int aio_validate( aio_workq_entry
*entryp
);
164 static void aio_work_thread( void );
165 static int do_aio_cancel(proc_t p
,
168 boolean_t wait_for_completion
,
169 boolean_t disable_notification
);
170 static void do_aio_completion( aio_workq_entry
*entryp
);
171 static int do_aio_fsync( aio_workq_entry
*entryp
);
172 static int do_aio_read( aio_workq_entry
*entryp
);
173 static int do_aio_write( aio_workq_entry
*entryp
);
174 static void do_munge_aiocb( struct aiocb
*my_aiocbp
, struct user_aiocb
*the_user_aiocbp
);
175 static boolean_t
is_already_queued(proc_t procp
,
176 user_addr_t aiocbp
);
177 static int lio_create_async_entry(proc_t procp
,
181 aio_workq_entry
**entrypp
);
182 static int lio_create_sync_entry(proc_t procp
,
185 aio_workq_entry
**entrypp
);
189 * EXTERNAL PROTOTYPES
192 /* in ...bsd/kern/sys_generic.c */
193 extern int dofileread(vfs_context_t ctx
, struct fileproc
*fp
,
194 user_addr_t bufp
, user_size_t nbyte
,
195 off_t offset
, int flags
, user_ssize_t
*retval
);
196 extern int dofilewrite(vfs_context_t ctx
, struct fileproc
*fp
,
197 user_addr_t bufp
, user_size_t nbyte
, off_t offset
,
198 int flags
, user_ssize_t
*retval
);
201 * aio external global variables.
203 extern int aio_max_requests
; /* AIO_MAX - configurable */
204 extern int aio_max_requests_per_process
; /* AIO_PROCESS_MAX - configurable */
205 extern int aio_worker_threads
; /* AIO_THREAD_COUNT - configurable */
209 * aio static variables.
211 static aio_anchor_cb aio_anchor
;
212 static lck_mtx_t
* aio_lock
;
213 static lck_grp_t
* aio_lock_grp
;
214 static lck_attr_t
* aio_lock_attr
;
215 static lck_grp_attr_t
* aio_lock_grp_attr
;
216 static struct zone
*aio_workq_zonep
;
222 * aio_cancel - attempt to cancel one or more async IO requests currently
223 * outstanding against file descriptor uap->fd. If uap->aiocbp is not
224 * NULL then only one specific IO is cancelled (if possible). If uap->aiocbp
225 * is NULL then all outstanding async IO request for the given file
226 * descriptor are cancelled (if possible).
230 aio_cancel(proc_t p
, struct aio_cancel_args
*uap
, int *retval
)
232 struct user_aiocb my_aiocb
;
235 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel
)) | DBG_FUNC_START
,
236 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
238 /* quick check to see if there are any async IO requests queued up */
240 result
= aio_get_all_queues_count( );
244 *retval
= AIO_ALLDONE
;
249 if ( uap
->aiocbp
!= USER_ADDR_NULL
) {
250 if ( !IS_64BIT_PROCESS(p
) ) {
251 struct aiocb aiocb32
;
253 result
= copyin( uap
->aiocbp
, &aiocb32
, sizeof(aiocb32
) );
255 do_munge_aiocb( &aiocb32
, &my_aiocb
);
257 result
= copyin( uap
->aiocbp
, &my_aiocb
, sizeof(my_aiocb
) );
264 /* NOTE - POSIX standard says a mismatch between the file */
265 /* descriptor passed in and the file descriptor embedded in */
266 /* the aiocb causes unspecified results. We return EBADF in */
267 /* that situation. */
268 if ( uap
->fd
!= my_aiocb
.aio_fildes
) {
273 result
= do_aio_cancel( p
, uap
->fd
, uap
->aiocbp
, FALSE
, FALSE
);
275 if ( result
!= -1 ) {
284 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel
)) | DBG_FUNC_END
,
285 (int)p
, (int)uap
->aiocbp
, result
, 0, 0 );
293 * _aio_close - internal function used to clean up async IO requests for
294 * a file descriptor that is closing.
298 __private_extern__
void
299 _aio_close(proc_t p
, int fd
)
303 /* quick check to see if there are any async IO requests queued up */
305 count
= aio_get_all_queues_count( );
310 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_close
)) | DBG_FUNC_START
,
311 (int)p
, fd
, 0, 0, 0 );
313 /* cancel all async IO requests on our todo queues for this file descriptor */
314 error
= do_aio_cancel( p
, fd
, 0, TRUE
, FALSE
);
315 if ( error
== AIO_NOTCANCELED
) {
317 * AIO_NOTCANCELED is returned when we find an aio request for this process
318 * and file descriptor on the active async IO queue. Active requests cannot
319 * be cancelled so we must wait for them to complete. We will get a special
320 * wake up call on our channel used to sleep for ALL active requests to
321 * complete. This sleep channel (proc.AIO_CLEANUP_SLEEP_CHAN) is only used
322 * when we must wait for all active aio requests.
325 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_close_sleep
)) | DBG_FUNC_NONE
,
326 (int)p
, fd
, 0, 0, 0 );
328 tsleep( &p
->AIO_CLEANUP_SLEEP_CHAN
, PRIBIO
, "aio_close", 0 );
331 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_close
)) | DBG_FUNC_END
,
332 (int)p
, fd
, 0, 0, 0 );
340 * aio_error - return the error status associated with the async IO
341 * request referred to by uap->aiocbp. The error status is the errno
342 * value that would be set by the corresponding IO request (read, wrtie,
343 * fdatasync, or sync).
347 aio_error(proc_t p
, struct aio_error_args
*uap
, int *retval
)
349 aio_workq_entry
*entryp
;
352 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error
)) | DBG_FUNC_START
,
353 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
357 /* quick check to see if there are any async IO requests queued up */
358 if ( aio_get_all_queues_count( ) < 1 ) {
363 /* look for a match on our queue of async IO requests that have completed */
364 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
365 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
366 *retval
= entryp
->errorval
;
368 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_val
)) | DBG_FUNC_NONE
,
369 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
374 /* look for a match on our queue of active async IO requests */
375 TAILQ_FOREACH( entryp
, &p
->aio_activeq
, aio_workq_link
) {
376 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
377 *retval
= EINPROGRESS
;
379 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_activeq
)) | DBG_FUNC_NONE
,
380 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
385 /* look for a match on our queue of todo work */
386 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
387 if ( p
== entryp
->procp
&& entryp
->uaiocbp
== uap
->aiocbp
) {
388 *retval
= EINPROGRESS
;
390 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_workq
)) | DBG_FUNC_NONE
,
391 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
398 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error
)) | DBG_FUNC_END
,
399 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
408 * aio_fsync - asynchronously force all IO operations associated
409 * with the file indicated by the file descriptor (uap->aiocbp->aio_fildes) and
410 * queued at the time of the call to the synchronized completion state.
411 * NOTE - we do not support op O_DSYNC at this point since we do not support the
416 aio_fsync(proc_t p
, struct aio_fsync_args
*uap
, int *retval
)
421 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync
)) | DBG_FUNC_START
,
422 (int)p
, (int)uap
->aiocbp
, uap
->op
, 0, 0 );
425 /* 0 := O_SYNC for binary backward compatibility with Panther */
426 if (uap
->op
== O_SYNC
|| uap
->op
== 0)
427 fsync_kind
= AIO_FSYNC
;
428 #if 0 // we don't support fdatasync() call yet
429 else if ( uap
->op
== O_DSYNC
)
430 fsync_kind
= AIO_DSYNC
;
438 error
= aio_queue_async_request( p
, uap
->aiocbp
, fsync_kind
);
443 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync
)) | DBG_FUNC_END
,
444 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
451 /* aio_read - asynchronously read uap->aiocbp->aio_nbytes bytes from the
452 * file descriptor (uap->aiocbp->aio_fildes) into the buffer
453 * (uap->aiocbp->aio_buf).
457 aio_read(proc_t p
, struct aio_read_args
*uap
, int *retval
)
461 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_read
)) | DBG_FUNC_START
,
462 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
466 error
= aio_queue_async_request( p
, uap
->aiocbp
, AIO_READ
);
470 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_read
)) | DBG_FUNC_END
,
471 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
479 * aio_return - return the return status associated with the async IO
480 * request referred to by uap->aiocbp. The return status is the value
481 * that would be returned by corresponding IO request (read, wrtie,
482 * fdatasync, or sync). This is where we release kernel resources
483 * held for async IO call associated with the given aiocb pointer.
487 aio_return(proc_t p
, struct aio_return_args
*uap
, user_ssize_t
*retval
)
489 aio_workq_entry
*entryp
;
493 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return
)) | DBG_FUNC_START
,
494 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
500 /* quick check to see if there are any async IO requests queued up */
501 if ( aio_get_all_queues_count( ) < 1 ) {
506 /* look for a match on our queue of async IO requests that have completed */
507 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
508 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
509 TAILQ_REMOVE( &p
->aio_doneq
, entryp
, aio_workq_link
);
510 aio_anchor
.aio_done_count
--;
513 *retval
= entryp
->returnval
;
515 /* we cannot free requests that are still completing */
516 if ( (entryp
->flags
& AIO_COMPLETION
) == 0 ) {
519 my_map
= entryp
->aio_map
;
520 entryp
->aio_map
= VM_MAP_NULL
;
523 aio_free_request( entryp
, my_map
);
526 /* tell completion code to free this request */
527 entryp
->flags
|= AIO_DO_FREE
;
529 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_val
)) | DBG_FUNC_NONE
,
530 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
535 /* look for a match on our queue of active async IO requests */
536 TAILQ_FOREACH( entryp
, &p
->aio_activeq
, aio_workq_link
) {
537 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
539 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_activeq
)) | DBG_FUNC_NONE
,
540 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
545 /* look for a match on our queue of todo work */
546 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
547 if ( p
== entryp
->procp
&& entryp
->uaiocbp
== uap
->aiocbp
) {
549 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_workq
)) | DBG_FUNC_NONE
,
550 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
559 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return
)) | DBG_FUNC_END
,
560 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
568 * _aio_exec - internal function used to clean up async IO requests for
569 * a process that is going away due to exec(). We cancel any async IOs
570 * we can and wait for those already active. We also disable signaling
571 * for cancelled or active aio requests that complete.
572 * This routine MAY block!
575 __private_extern__
void
579 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exec
)) | DBG_FUNC_START
,
580 (int)p
, 0, 0, 0, 0 );
584 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exec
)) | DBG_FUNC_END
,
585 (int)p
, 0, 0, 0, 0 );
593 * _aio_exit - internal function used to clean up async IO requests for
594 * a process that is terminating (via exit() or exec() ). We cancel any async IOs
595 * we can and wait for those already active. We also disable signaling
596 * for cancelled or active aio requests that complete. This routine MAY block!
599 __private_extern__
void
603 aio_workq_entry
*entryp
;
605 /* quick check to see if there are any async IO requests queued up */
607 count
= aio_get_all_queues_count( );
613 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit
)) | DBG_FUNC_START
,
614 (int)p
, 0, 0, 0, 0 );
617 * cancel async IO requests on the todo work queue and wait for those
618 * already active to complete.
620 error
= do_aio_cancel( p
, 0, 0, TRUE
, TRUE
);
621 if ( error
== AIO_NOTCANCELED
) {
623 * AIO_NOTCANCELED is returned when we find an aio request for this process
624 * on the active async IO queue. Active requests cannot be cancelled so we
625 * must wait for them to complete. We will get a special wake up call on
626 * our channel used to sleep for ALL active requests to complete. This sleep
627 * channel (proc.AIO_CLEANUP_SLEEP_CHAN) is only used when we must wait for all
628 * active aio requests.
631 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit_sleep
)) | DBG_FUNC_NONE
,
632 (int)p
, 0, 0, 0, 0 );
634 tsleep( &p
->AIO_CLEANUP_SLEEP_CHAN
, PRIBIO
, "aio_exit", 0 );
637 /* release all aio resources used by this process */
639 entryp
= TAILQ_FIRST( &p
->aio_doneq
);
640 while ( entryp
!= NULL
) {
641 aio_workq_entry
*next_entryp
;
643 next_entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
644 TAILQ_REMOVE( &p
->aio_doneq
, entryp
, aio_workq_link
);
645 aio_anchor
.aio_done_count
--;
648 /* we cannot free requests that are still completing */
649 if ( (entryp
->flags
& AIO_COMPLETION
) == 0 ) {
652 my_map
= entryp
->aio_map
;
653 entryp
->aio_map
= VM_MAP_NULL
;
655 aio_free_request( entryp
, my_map
);
657 /* need to start over since aio_doneq may have been */
658 /* changed while we were away. */
660 entryp
= TAILQ_FIRST( &p
->aio_doneq
);
664 /* tell completion code to free this request */
665 entryp
->flags
|= AIO_DO_FREE
;
666 entryp
= next_entryp
;
670 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit
)) | DBG_FUNC_END
,
671 (int)p
, 0, 0, 0, 0 );
679 * do_aio_cancel - cancel async IO requests (if possible). We get called by
680 * aio_cancel, close, and at exit.
681 * There are three modes of operation: 1) cancel all async IOs for a process -
682 * fd is 0 and aiocbp is NULL 2) cancel all async IOs for file descriptor - fd
683 * is > 0 and aiocbp is NULL 3) cancel one async IO associated with the given
685 * Returns -1 if no matches were found, AIO_CANCELED when we cancelled all
686 * target async IO requests, AIO_NOTCANCELED if we could not cancel all
687 * target async IO requests, and AIO_ALLDONE if all target async IO requests
688 * were already complete.
689 * WARNING - do not deference aiocbp in this routine, it may point to user
690 * land data that has not been copied in (when called from aio_cancel() )
694 do_aio_cancel(proc_t p
, int fd
, user_addr_t aiocbp
,
695 boolean_t wait_for_completion
, boolean_t disable_notification
)
697 aio_workq_entry
*entryp
;
702 /* look for a match on our queue of async todo work. */
704 entryp
= TAILQ_FIRST( &aio_anchor
.aio_async_workq
);
705 while ( entryp
!= NULL
) {
706 aio_workq_entry
*next_entryp
;
708 next_entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
709 if ( p
== entryp
->procp
) {
710 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
711 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
712 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
713 /* we found a match so we remove the entry from the */
714 /* todo work queue and place it on the done queue */
715 TAILQ_REMOVE( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
716 aio_anchor
.aio_async_workq_count
--;
717 entryp
->errorval
= ECANCELED
;
718 entryp
->returnval
= -1;
719 if ( disable_notification
)
720 entryp
->flags
|= AIO_DISABLE
; /* flag for special completion processing */
721 result
= AIO_CANCELED
;
723 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_async_workq
)) | DBG_FUNC_NONE
,
724 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
726 TAILQ_INSERT_TAIL( &p
->aio_doneq
, entryp
, aio_workq_link
);
727 aio_anchor
.aio_done_count
++;
729 entryp
->flags
|= AIO_COMPLETION
;
732 /* do completion processing for this request */
733 do_aio_completion( entryp
);
736 entryp
->flags
&= ~AIO_COMPLETION
;
737 if ( (entryp
->flags
& AIO_DO_FREE
) != 0 ) {
740 my_map
= entryp
->aio_map
;
741 entryp
->aio_map
= VM_MAP_NULL
;
743 aio_free_request( entryp
, my_map
);
748 if ( aiocbp
!= USER_ADDR_NULL
) {
752 /* need to start over since aio_async_workq may have been */
753 /* changed while we were away doing completion processing. */
755 entryp
= TAILQ_FIRST( &aio_anchor
.aio_async_workq
);
759 entryp
= next_entryp
;
763 * look for a match on our queue of synchronous todo work. This will
764 * be a rare occurrence but could happen if a process is terminated while
765 * processing a lio_listio call.
767 entryp
= TAILQ_FIRST( &aio_anchor
.lio_sync_workq
);
768 while ( entryp
!= NULL
) {
769 aio_workq_entry
*next_entryp
;
771 next_entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
772 if ( p
== entryp
->procp
) {
773 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
774 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
775 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
776 /* we found a match so we remove the entry from the */
777 /* todo work queue and place it on the done queue */
778 TAILQ_REMOVE( &aio_anchor
.lio_sync_workq
, entryp
, aio_workq_link
);
779 aio_anchor
.lio_sync_workq_count
--;
780 entryp
->errorval
= ECANCELED
;
781 entryp
->returnval
= -1;
782 if ( disable_notification
)
783 entryp
->flags
|= AIO_DISABLE
; /* flag for special completion processing */
784 result
= AIO_CANCELED
;
786 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_sync_workq
)) | DBG_FUNC_NONE
,
787 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
789 TAILQ_INSERT_TAIL( &p
->aio_doneq
, entryp
, aio_workq_link
);
790 aio_anchor
.aio_done_count
++;
792 if ( aiocbp
!= USER_ADDR_NULL
) {
798 entryp
= next_entryp
;
802 * look for a match on our queue of active async IO requests and
803 * return AIO_NOTCANCELED result.
805 TAILQ_FOREACH( entryp
, &p
->aio_activeq
, aio_workq_link
) {
806 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
807 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
808 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
809 result
= AIO_NOTCANCELED
;
811 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_activeq
)) | DBG_FUNC_NONE
,
812 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
814 if ( wait_for_completion
)
815 entryp
->flags
|= AIO_WAITING
; /* flag for special completion processing */
816 if ( disable_notification
)
817 entryp
->flags
|= AIO_DISABLE
; /* flag for special completion processing */
818 if ( aiocbp
!= USER_ADDR_NULL
) {
826 * if we didn't find any matches on the todo or active queues then look for a
827 * match on our queue of async IO requests that have completed and if found
828 * return AIO_ALLDONE result.
830 if ( result
== -1 ) {
831 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
832 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
833 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
834 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
835 result
= AIO_ALLDONE
;
837 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_doneq
)) | DBG_FUNC_NONE
,
838 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
840 if ( aiocbp
!= USER_ADDR_NULL
) {
851 } /* do_aio_cancel */
855 * aio_suspend - suspend the calling thread until at least one of the async
856 * IO operations referenced by uap->aiocblist has completed, until a signal
857 * interrupts the function, or uap->timeoutp time interval (optional) has
859 * Returns 0 if one or more async IOs have completed else -1 and errno is
860 * set appropriately - EAGAIN if timeout elapses or EINTR if an interrupt
864 aio_suspend(proc_t p
, struct aio_suspend_args
*uap
, int *retval
)
866 __pthread_testcancel(1);
867 return(aio_suspend_nocancel(p
, (struct aio_suspend_nocancel_args
*)uap
, retval
));
872 aio_suspend_nocancel(proc_t p
, struct aio_suspend_nocancel_args
*uap
, int *retval
)
877 struct user_timespec ts
;
878 aio_workq_entry
*entryp
;
879 user_addr_t
*aiocbpp
;
881 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend
)) | DBG_FUNC_START
,
882 (int)p
, uap
->nent
, 0, 0, 0 );
888 /* quick check to see if there are any async IO requests queued up */
890 count
= aio_get_all_queues_count( );
894 goto ExitThisRoutine
;
897 if ( uap
->nent
< 1 || uap
->nent
> aio_max_requests_per_process
) {
899 goto ExitThisRoutine
;
902 if ( uap
->timeoutp
!= USER_ADDR_NULL
) {
903 if ( proc_is64bit(p
) ) {
904 error
= copyin( uap
->timeoutp
, &ts
, sizeof(ts
) );
907 struct timespec temp
;
908 error
= copyin( uap
->timeoutp
, &temp
, sizeof(temp
) );
910 ts
.tv_sec
= temp
.tv_sec
;
911 ts
.tv_nsec
= temp
.tv_nsec
;
916 goto ExitThisRoutine
;
919 if ( ts
.tv_sec
< 0 || ts
.tv_nsec
< 0 || ts
.tv_nsec
>= 1000000000 ) {
921 goto ExitThisRoutine
;
924 nanoseconds_to_absolutetime( (uint64_t)ts
.tv_sec
* NSEC_PER_SEC
+ ts
.tv_nsec
,
926 clock_absolutetime_interval_to_deadline( abstime
, &abstime
);
929 /* we reserve enough space for largest possible pointer size */
930 MALLOC( aiocbpp
, user_addr_t
*, (uap
->nent
* sizeof(user_addr_t
)), M_TEMP
, M_WAITOK
);
931 if ( aiocbpp
== NULL
) {
933 goto ExitThisRoutine
;
936 /* copyin our aiocb pointers from list */
937 error
= copyin( uap
->aiocblist
, aiocbpp
,
938 proc_is64bit(p
) ? (uap
->nent
* sizeof(user_addr_t
))
939 : (uap
->nent
* sizeof(uintptr_t)) );
942 goto ExitThisRoutine
;
945 /* we depend on a list of user_addr_t's so we need to munge and expand */
946 /* when these pointers came from a 32-bit process */
947 if ( !proc_is64bit(p
) && sizeof(uintptr_t) < sizeof(user_addr_t
) ) {
948 /* position to the last entry and work back from there */
949 uintptr_t *my_ptrp
= ((uintptr_t *)aiocbpp
) + (uap
->nent
- 1);
950 user_addr_t
*my_addrp
= aiocbpp
+ (uap
->nent
- 1);
951 for (i
= 0; i
< uap
->nent
; i
++, my_ptrp
--, my_addrp
--) {
952 *my_addrp
= (user_addr_t
) (*my_ptrp
);
956 /* check list of aio requests to see if any have completed */
957 check_for_our_aiocbp
:
959 for ( i
= 0; i
< uap
->nent
; i
++ ) {
962 /* NULL elements are legal so check for 'em */
963 aiocbp
= *(aiocbpp
+ i
);
964 if ( aiocbp
== USER_ADDR_NULL
)
967 /* return immediately if any aio request in the list is done */
968 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
969 if ( entryp
->uaiocbp
== aiocbp
) {
973 goto ExitThisRoutine
;
976 } /* for ( ; i < uap->nent; ) */
978 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend_sleep
)) | DBG_FUNC_NONE
,
979 (int)p
, uap
->nent
, 0, 0, 0 );
982 * wait for an async IO to complete or a signal fires or timeout expires.
983 * we return EAGAIN (35) for timeout expiration and EINTR (4) when a signal
984 * interrupts us. If an async IO completes before a signal fires or our
985 * timeout expires, we get a wakeup call from aio_work_thread().
987 assert_wait_deadline( (event_t
) &p
->AIO_SUSPEND_SLEEP_CHAN
, THREAD_ABORTSAFE
, abstime
);
990 error
= thread_block( THREAD_CONTINUE_NULL
);
992 if ( error
== THREAD_AWAKENED
) {
994 * got our wakeup call from aio_work_thread().
995 * Since we can get a wakeup on this channel from another thread in the
996 * same process we head back up to make sure this is for the correct aiocbp.
997 * If it is the correct aiocbp we will return from where we do the check
998 * (see entryp->uaiocbp == aiocbp after check_for_our_aiocbp label)
999 * else we will fall out and just sleep again.
1001 goto check_for_our_aiocbp
;
1003 else if ( error
== THREAD_TIMED_OUT
) {
1004 /* our timeout expired */
1008 /* we were interrupted */
1013 if ( aiocbpp
!= NULL
)
1014 FREE( aiocbpp
, M_TEMP
);
1016 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend
)) | DBG_FUNC_END
,
1017 (int)p
, uap
->nent
, error
, 0, 0 );
1024 /* aio_write - asynchronously write uap->aiocbp->aio_nbytes bytes to the
1025 * file descriptor (uap->aiocbp->aio_fildes) from the buffer
1026 * (uap->aiocbp->aio_buf).
1030 aio_write(proc_t p
, struct aio_write_args
*uap
, int *retval
)
1036 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_write
)) | DBG_FUNC_START
,
1037 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
1039 error
= aio_queue_async_request( p
, uap
->aiocbp
, AIO_WRITE
);
1043 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_write
)) | DBG_FUNC_END
,
1044 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
1052 * lio_listio - initiate a list of IO requests. We process the list of aiocbs
1053 * either synchronously (mode == LIO_WAIT) or asynchronously (mode == LIO_NOWAIT).
1054 * The caller gets error and return status for each aiocb in the list via aio_error
1055 * and aio_return. We must keep completed requests until released by the
1060 lio_listio(proc_t p
, struct lio_listio_args
*uap
, int *retval
)
1066 aio_workq_entry
* *entryp_listp
;
1067 user_addr_t
*aiocbpp
;
1069 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_listio
)) | DBG_FUNC_START
,
1070 (int)p
, uap
->nent
, uap
->mode
, 0, 0 );
1072 entryp_listp
= NULL
;
1076 if ( !(uap
->mode
== LIO_NOWAIT
|| uap
->mode
== LIO_WAIT
) ) {
1077 call_result
= EINVAL
;
1081 if ( uap
->nent
< 1 || uap
->nent
> AIO_LISTIO_MAX
) {
1082 call_result
= EINVAL
;
1087 * we use group_tag to mark IO requests for delayed completion processing
1088 * which means we wait until all IO requests in the group have completed
1089 * before we either return to the caller when mode is LIO_WAIT or signal
1090 * user when mode is LIO_NOWAIT.
1092 group_tag
= random();
1095 * allocate a list of aio_workq_entry pointers that we will use to queue
1096 * up all our requests at once while holding our lock.
1098 MALLOC( entryp_listp
, void *, (uap
->nent
* sizeof(aio_workq_entry
*)), M_TEMP
, M_WAITOK
);
1099 if ( entryp_listp
== NULL
) {
1100 call_result
= EAGAIN
;
1104 /* we reserve enough space for largest possible pointer size */
1105 MALLOC( aiocbpp
, user_addr_t
*, (uap
->nent
* sizeof(user_addr_t
)), M_TEMP
, M_WAITOK
);
1106 if ( aiocbpp
== NULL
) {
1107 call_result
= EAGAIN
;
1111 /* copyin our aiocb pointers from list */
1112 result
= copyin( uap
->aiocblist
, aiocbpp
,
1113 IS_64BIT_PROCESS(p
) ? (uap
->nent
* sizeof(user_addr_t
))
1114 : (uap
->nent
* sizeof(uintptr_t)) );
1115 if ( result
!= 0 ) {
1116 call_result
= EAGAIN
;
1120 /* we depend on a list of user_addr_t's so we need to munge and expand */
1121 /* when these pointers came from a 32-bit process */
1122 if ( !IS_64BIT_PROCESS(p
) && sizeof(uintptr_t) < sizeof(user_addr_t
) ) {
1123 /* position to the last entry and work back from there */
1124 uintptr_t *my_ptrp
= ((uintptr_t *)aiocbpp
) + (uap
->nent
- 1);
1125 user_addr_t
*my_addrp
= aiocbpp
+ (uap
->nent
- 1);
1126 for (i
= 0; i
< uap
->nent
; i
++, my_ptrp
--, my_addrp
--) {
1127 *my_addrp
= (user_addr_t
) (*my_ptrp
);
1131 /* process list of aio requests */
1132 for ( i
= 0; i
< uap
->nent
; i
++ ) {
1133 user_addr_t my_aiocbp
;
1135 *(entryp_listp
+ i
) = NULL
;
1136 my_aiocbp
= *(aiocbpp
+ i
);
1138 /* NULL elements are legal so check for 'em */
1139 if ( my_aiocbp
== USER_ADDR_NULL
)
1142 if ( uap
->mode
== LIO_NOWAIT
)
1143 result
= lio_create_async_entry( p
, my_aiocbp
, uap
->sigp
,
1144 group_tag
, (entryp_listp
+ i
) );
1146 result
= lio_create_sync_entry( p
, my_aiocbp
, group_tag
,
1147 (entryp_listp
+ i
) );
1149 if ( result
!= 0 && call_result
== -1 )
1150 call_result
= result
;
1154 * we need to protect this section since we do not want any of these grouped
1155 * IO requests to begin until we have them all on the queue.
1158 for ( i
= 0; i
< uap
->nent
; i
++ ) {
1159 aio_workq_entry
*entryp
;
1161 /* NULL elements are legal so check for 'em */
1162 entryp
= *(entryp_listp
+ i
);
1163 if ( entryp
== NULL
)
1166 /* check our aio limits to throttle bad or rude user land behavior */
1167 if ( aio_get_all_queues_count( ) >= aio_max_requests
||
1168 aio_get_process_count( entryp
->procp
) >= aio_max_requests_per_process
||
1169 is_already_queued( entryp
->procp
, entryp
->uaiocbp
) == TRUE
) {
1172 my_map
= entryp
->aio_map
;
1173 entryp
->aio_map
= VM_MAP_NULL
;
1174 if ( call_result
== -1 )
1175 call_result
= EAGAIN
;
1177 aio_free_request( entryp
, my_map
);
1182 /* place the request on the appropriate queue */
1183 if ( uap
->mode
== LIO_NOWAIT
) {
1184 TAILQ_INSERT_TAIL( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
1185 aio_anchor
.aio_async_workq_count
++;
1187 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_work_queued
)) | DBG_FUNC_NONE
,
1188 (int)p
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1191 TAILQ_INSERT_TAIL( &aio_anchor
.lio_sync_workq
, entryp
, aio_workq_link
);
1192 aio_anchor
.lio_sync_workq_count
++;
1196 if ( uap
->mode
== LIO_NOWAIT
) {
1197 /* caller does not want to wait so we'll fire off a worker thread and return */
1198 wakeup_one( (caddr_t
) &aio_anchor
.aio_async_workq
);
1201 aio_workq_entry
*entryp
;
1205 * mode is LIO_WAIT - handle the IO requests now.
1207 entryp
= TAILQ_FIRST( &aio_anchor
.lio_sync_workq
);
1208 while ( entryp
!= NULL
) {
1209 if ( p
== entryp
->procp
&& group_tag
== entryp
->group_tag
) {
1211 TAILQ_REMOVE( &aio_anchor
.lio_sync_workq
, entryp
, aio_workq_link
);
1212 aio_anchor
.lio_sync_workq_count
--;
1215 if ( (entryp
->flags
& AIO_READ
) != 0 ) {
1216 error
= do_aio_read( entryp
);
1218 else if ( (entryp
->flags
& AIO_WRITE
) != 0 ) {
1219 error
= do_aio_write( entryp
);
1221 else if ( (entryp
->flags
& AIO_FSYNC
) != 0 ) {
1222 error
= do_aio_fsync( entryp
);
1225 printf( "%s - unknown aio request - flags 0x%02X \n",
1226 __FUNCTION__
, entryp
->flags
);
1229 entryp
->errorval
= error
;
1230 if ( error
!= 0 && call_result
== -1 )
1234 /* we're done with the IO request so move it on the done queue */
1235 TAILQ_INSERT_TAIL( &p
->aio_doneq
, entryp
, aio_workq_link
);
1236 aio_anchor
.aio_done_count
++;
1237 p
->aio_done_count
++;
1239 /* need to start over since lio_sync_workq may have been changed while we */
1240 /* were away doing the IO. */
1241 entryp
= TAILQ_FIRST( &aio_anchor
.lio_sync_workq
);
1243 } /* p == entryp->procp */
1245 entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
1246 } /* while ( entryp != NULL ) */
1247 } /* uap->mode == LIO_WAIT */
1250 /* call_result == -1 means we had no trouble queueing up requests */
1251 if ( call_result
== -1 ) {
1257 if ( entryp_listp
!= NULL
)
1258 FREE( entryp_listp
, M_TEMP
);
1259 if ( aiocbpp
!= NULL
)
1260 FREE( aiocbpp
, M_TEMP
);
1262 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_listio
)) | DBG_FUNC_END
,
1263 (int)p
, call_result
, 0, 0, 0 );
1265 return( call_result
);
1271 * aio worker thread. this is where all the real work gets done.
1272 * we get a wake up call on sleep channel &aio_anchor.aio_async_workq
1273 * after new work is queued up.
1277 aio_work_thread( void )
1279 aio_workq_entry
*entryp
;
1283 entryp
= aio_get_some_work();
1284 if ( entryp
== NULL
) {
1286 * aio worker threads wait for some work to get queued up
1287 * by aio_queue_async_request. Once some work gets queued
1288 * it will wake up one of these worker threads just before
1289 * returning to our caller in user land.
1291 assert_wait( (event_t
) &aio_anchor
.aio_async_workq
, THREAD_UNINT
);
1294 thread_block( (thread_continue_t
)aio_work_thread
);
1299 vm_map_t currentmap
;
1300 vm_map_t oldmap
= VM_MAP_NULL
;
1301 task_t oldaiotask
= TASK_NULL
;
1302 struct uthread
*uthreadp
= NULL
;
1306 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_worker_thread
)) | DBG_FUNC_START
,
1307 (int)entryp
->procp
, (int)entryp
->uaiocbp
, entryp
->flags
, 0, 0 );
1310 * Assume the target's address space identity for the duration
1313 currentmap
= get_task_map( (current_proc())->task
);
1314 if ( currentmap
!= entryp
->aio_map
) {
1315 uthreadp
= (struct uthread
*) get_bsdthread_info(current_thread());
1316 oldaiotask
= uthreadp
->uu_aio_task
;
1317 uthreadp
->uu_aio_task
= entryp
->procp
->task
;
1318 oldmap
= vm_map_switch( entryp
->aio_map
);
1321 if ( (entryp
->flags
& AIO_READ
) != 0 ) {
1322 error
= do_aio_read( entryp
);
1324 else if ( (entryp
->flags
& AIO_WRITE
) != 0 ) {
1325 error
= do_aio_write( entryp
);
1327 else if ( (entryp
->flags
& AIO_FSYNC
) != 0 ) {
1328 error
= do_aio_fsync( entryp
);
1331 printf( "%s - unknown aio request - flags 0x%02X \n",
1332 __FUNCTION__
, entryp
->flags
);
1335 entryp
->errorval
= error
;
1336 if ( currentmap
!= entryp
->aio_map
) {
1337 (void) vm_map_switch( oldmap
);
1338 uthreadp
->uu_aio_task
= oldaiotask
;
1341 /* we're done with the IO request so pop it off the active queue and */
1342 /* push it on the done queue */
1344 TAILQ_REMOVE( &entryp
->procp
->aio_activeq
, entryp
, aio_workq_link
);
1345 aio_anchor
.aio_active_count
--;
1346 entryp
->procp
->aio_active_count
--;
1347 TAILQ_INSERT_TAIL( &entryp
->procp
->aio_doneq
, entryp
, aio_workq_link
);
1348 aio_anchor
.aio_done_count
++;
1349 entryp
->procp
->aio_done_count
++;
1350 entryp
->flags
|= AIO_COMPLETION
;
1352 /* remove our reference to the user land map. */
1353 if ( VM_MAP_NULL
!= entryp
->aio_map
) {
1356 my_map
= entryp
->aio_map
;
1357 entryp
->aio_map
= VM_MAP_NULL
;
1358 AIO_UNLOCK
; /* must unlock before calling vm_map_deallocate() */
1359 vm_map_deallocate( my_map
);
1365 do_aio_completion( entryp
);
1367 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_worker_thread
)) | DBG_FUNC_END
,
1368 (int)entryp
->procp
, (int)entryp
->uaiocbp
, entryp
->errorval
,
1369 entryp
->returnval
, 0 );
1372 entryp
->flags
&= ~AIO_COMPLETION
;
1373 if ( (entryp
->flags
& AIO_DO_FREE
) != 0 ) {
1376 my_map
= entryp
->aio_map
;
1377 entryp
->aio_map
= VM_MAP_NULL
;
1379 aio_free_request( entryp
, my_map
);
1388 } /* aio_work_thread */
1392 * aio_get_some_work - get the next async IO request that is ready to be executed.
1393 * aio_fsync complicates matters a bit since we cannot do the fsync until all async
1394 * IO requests at the time the aio_fsync call came in have completed.
1395 * NOTE - AIO_LOCK must be held by caller
1398 static aio_workq_entry
*
1399 aio_get_some_work( void )
1401 aio_workq_entry
*entryp
;
1403 /* pop some work off the work queue and add to our active queue */
1404 for ( entryp
= TAILQ_FIRST( &aio_anchor
.aio_async_workq
);
1406 entryp
= TAILQ_NEXT( entryp
, aio_workq_link
) ) {
1408 if ( (entryp
->flags
& AIO_FSYNC
) != 0 ) {
1409 /* leave aio_fsync calls on the work queue if there are IO */
1410 /* requests on the active queue for the same file descriptor. */
1411 if ( aio_delay_fsync_request( entryp
) ) {
1413 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync_delay
)) | DBG_FUNC_NONE
,
1414 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1421 if ( entryp
!= NULL
) {
1422 TAILQ_REMOVE( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
1423 aio_anchor
.aio_async_workq_count
--;
1424 TAILQ_INSERT_TAIL( &entryp
->procp
->aio_activeq
, entryp
, aio_workq_link
);
1425 aio_anchor
.aio_active_count
++;
1426 entryp
->procp
->aio_active_count
++;
1431 } /* aio_get_some_work */
1435 * aio_delay_fsync_request - look to see if this aio_fsync request should be delayed at
1436 * this time. Delay will happen when there are any active IOs for the same file
1437 * descriptor that were queued at time the aio_sync call was queued.
1438 * NOTE - AIO_LOCK must be held by caller
1441 aio_delay_fsync_request( aio_workq_entry
*entryp
)
1443 aio_workq_entry
*my_entryp
;
1445 TAILQ_FOREACH( my_entryp
, &entryp
->procp
->aio_activeq
, aio_workq_link
) {
1446 if ( my_entryp
->fsyncp
!= USER_ADDR_NULL
&&
1447 entryp
->uaiocbp
== my_entryp
->fsyncp
&&
1448 entryp
->aiocb
.aio_fildes
== my_entryp
->aiocb
.aio_fildes
) {
1455 } /* aio_delay_fsync_request */
1459 * aio_queue_async_request - queue up an async IO request on our work queue then
1460 * wake up one of our worker threads to do the actual work. We get a reference
1461 * to our caller's user land map in order to keep it around while we are
1462 * processing the request.
1466 aio_queue_async_request(proc_t procp
, user_addr_t aiocbp
, int kindOfIO
)
1468 aio_workq_entry
*entryp
;
1471 entryp
= (aio_workq_entry
*) zalloc( aio_workq_zonep
);
1472 if ( entryp
== NULL
) {
1476 bzero( entryp
, sizeof(*entryp
) );
1478 /* fill in the rest of the aio_workq_entry */
1479 entryp
->procp
= procp
;
1480 entryp
->uaiocbp
= aiocbp
;
1481 entryp
->flags
|= kindOfIO
;
1482 entryp
->aio_map
= VM_MAP_NULL
;
1484 if ( !IS_64BIT_PROCESS(procp
) ) {
1485 struct aiocb aiocb32
;
1487 result
= copyin( aiocbp
, &aiocb32
, sizeof(aiocb32
) );
1489 do_munge_aiocb( &aiocb32
, &entryp
->aiocb
);
1491 result
= copyin( aiocbp
, &entryp
->aiocb
, sizeof(entryp
->aiocb
) );
1493 if ( result
!= 0 ) {
1498 /* do some more validation on the aiocb and embedded file descriptor */
1499 result
= aio_validate( entryp
);
1503 /* get a reference to the user land map in order to keep it around */
1504 entryp
->aio_map
= get_task_map( procp
->task
);
1505 vm_map_reference( entryp
->aio_map
);
1509 if ( is_already_queued( entryp
->procp
, entryp
->uaiocbp
) == TRUE
) {
1515 /* check our aio limits to throttle bad or rude user land behavior */
1516 if ( aio_get_all_queues_count( ) >= aio_max_requests
||
1517 aio_get_process_count( procp
) >= aio_max_requests_per_process
) {
1524 * aio_fsync calls sync up all async IO requests queued at the time
1525 * the aio_fsync call was made. So we mark each currently queued async
1526 * IO with a matching file descriptor as must complete before we do the
1527 * fsync. We set the fsyncp field of each matching async IO
1528 * request with the aiocb pointer passed in on the aio_fsync call to
1529 * know which IOs must complete before we process the aio_fsync call.
1531 if ( (kindOfIO
& AIO_FSYNC
) != 0 )
1532 aio_mark_requests( entryp
);
1534 /* queue up on our aio asynchronous work queue */
1535 TAILQ_INSERT_TAIL( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
1536 aio_anchor
.aio_async_workq_count
++;
1538 wakeup_one( (caddr_t
) &aio_anchor
.aio_async_workq
);
1541 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_work_queued
)) | DBG_FUNC_NONE
,
1542 (int)procp
, (int)aiocbp
, 0, 0, 0 );
1547 if ( entryp
!= NULL
) {
1548 /* this entry has not been queued up so no worries about unlocked */
1549 /* state and aio_map */
1550 aio_free_request( entryp
, entryp
->aio_map
);
1555 } /* aio_queue_async_request */
1559 * lio_create_async_entry - allocate an aio_workq_entry and fill it in.
1560 * If all goes well return 0 and pass the aio_workq_entry pointer back to
1561 * our caller. We get a reference to our caller's user land map in order to keep
1562 * it around while we are processing the request.
1563 * lio_listio calls behave differently at completion they do completion notification
1564 * when all async IO requests have completed. We use group_tag to tag IO requests
1565 * that behave in the delay notification manner.
1569 lio_create_async_entry(proc_t procp
, user_addr_t aiocbp
,
1570 user_addr_t sigp
, long group_tag
,
1571 aio_workq_entry
**entrypp
)
1573 aio_workq_entry
*entryp
;
1576 entryp
= (aio_workq_entry
*) zalloc( aio_workq_zonep
);
1577 if ( entryp
== NULL
) {
1581 bzero( entryp
, sizeof(*entryp
) );
1583 /* fill in the rest of the aio_workq_entry */
1584 entryp
->procp
= procp
;
1585 entryp
->uaiocbp
= aiocbp
;
1586 entryp
->flags
|= AIO_LIO
;
1587 entryp
->group_tag
= group_tag
;
1588 entryp
->aio_map
= VM_MAP_NULL
;
1590 if ( !IS_64BIT_PROCESS(procp
) ) {
1591 struct aiocb aiocb32
;
1593 result
= copyin( aiocbp
, &aiocb32
, sizeof(aiocb32
) );
1595 do_munge_aiocb( &aiocb32
, &entryp
->aiocb
);
1597 result
= copyin( aiocbp
, &entryp
->aiocb
, sizeof(entryp
->aiocb
) );
1599 if ( result
!= 0 ) {
1604 /* look for lio_listio LIO_NOP requests and ignore them. */
1605 /* Not really an error, but we need to free our aio_workq_entry. */
1606 if ( entryp
->aiocb
.aio_lio_opcode
== LIO_NOP
) {
1611 /* use sigevent passed in to lio_listio for each of our calls, but only */
1612 /* do completion notification after the last request completes. */
1613 if ( sigp
!= USER_ADDR_NULL
) {
1614 if ( !IS_64BIT_PROCESS(procp
) ) {
1615 struct sigevent sigevent32
;
1617 result
= copyin( sigp
, &sigevent32
, sizeof(sigevent32
) );
1618 if ( result
== 0 ) {
1619 /* also need to munge aio_sigevent since it contains pointers */
1620 /* special case here. since we do not know if sigev_value is an */
1621 /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */
1622 /* means if we send this info back to user space we need to remember */
1623 /* sigev_value was not expanded for the 32-bit case. */
1624 /* NOTE - this does NOT affect us since we don't support sigev_value */
1625 /* yet in the aio context. */
1627 entryp
->aiocb
.aio_sigevent
.sigev_notify
= sigevent32
.sigev_notify
;
1628 entryp
->aiocb
.aio_sigevent
.sigev_signo
= sigevent32
.sigev_signo
;
1629 entryp
->aiocb
.aio_sigevent
.sigev_value
.size_equivalent
.sival_int
=
1630 sigevent32
.sigev_value
.sival_int
;
1631 entryp
->aiocb
.aio_sigevent
.sigev_notify_function
=
1632 CAST_USER_ADDR_T(sigevent32
.sigev_notify_function
);
1633 entryp
->aiocb
.aio_sigevent
.sigev_notify_attributes
=
1634 CAST_USER_ADDR_T(sigevent32
.sigev_notify_attributes
);
1637 result
= copyin( sigp
, &entryp
->aiocb
.aio_sigevent
, sizeof(entryp
->aiocb
.aio_sigevent
) );
1639 if ( result
!= 0 ) {
1645 /* do some more validation on the aiocb and embedded file descriptor */
1646 result
= aio_validate( entryp
);
1650 /* get a reference to the user land map in order to keep it around */
1651 entryp
->aio_map
= get_task_map( procp
->task
);
1652 vm_map_reference( entryp
->aio_map
);
1658 if ( entryp
!= NULL
)
1659 zfree( aio_workq_zonep
, entryp
);
1663 } /* lio_create_async_entry */
1667 * aio_mark_requests - aio_fsync calls synchronize file data for all queued async IO
1668 * requests at the moment the aio_fsync call is queued. We use aio_workq_entry.fsyncp
1669 * to mark each async IO that must complete before the fsync is done. We use the uaiocbp
1670 * field from the aio_fsync call as the aio_workq_entry.fsyncp in marked requests.
1671 * NOTE - AIO_LOCK must be held by caller
1675 aio_mark_requests( aio_workq_entry
*entryp
)
1677 aio_workq_entry
*my_entryp
;
1679 TAILQ_FOREACH( my_entryp
, &entryp
->procp
->aio_activeq
, aio_workq_link
) {
1680 if ( entryp
->aiocb
.aio_fildes
== my_entryp
->aiocb
.aio_fildes
) {
1681 my_entryp
->fsyncp
= entryp
->uaiocbp
;
1685 TAILQ_FOREACH( my_entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
1686 if ( entryp
->procp
== my_entryp
->procp
&&
1687 entryp
->aiocb
.aio_fildes
== my_entryp
->aiocb
.aio_fildes
) {
1688 my_entryp
->fsyncp
= entryp
->uaiocbp
;
1692 } /* aio_mark_requests */
1696 * lio_create_sync_entry - allocate an aio_workq_entry and fill it in.
1697 * If all goes well return 0 and pass the aio_workq_entry pointer back to
1699 * lio_listio calls behave differently at completion they do completion notification
1700 * when all async IO requests have completed. We use group_tag to tag IO requests
1701 * that behave in the delay notification manner.
1705 lio_create_sync_entry(proc_t procp
, user_addr_t aiocbp
,
1706 long group_tag
, aio_workq_entry
**entrypp
)
1708 aio_workq_entry
*entryp
;
1711 entryp
= (aio_workq_entry
*) zalloc( aio_workq_zonep
);
1712 if ( entryp
== NULL
) {
1716 bzero( entryp
, sizeof(*entryp
) );
1718 /* fill in the rest of the aio_workq_entry */
1719 entryp
->procp
= procp
;
1720 entryp
->uaiocbp
= aiocbp
;
1721 entryp
->flags
|= AIO_LIO
;
1722 entryp
->group_tag
= group_tag
;
1723 entryp
->aio_map
= VM_MAP_NULL
;
1725 if ( !IS_64BIT_PROCESS(procp
) ) {
1726 struct aiocb aiocb32
;
1728 result
= copyin( aiocbp
, &aiocb32
, sizeof(aiocb32
) );
1730 do_munge_aiocb( &aiocb32
, &entryp
->aiocb
);
1732 result
= copyin( aiocbp
, &entryp
->aiocb
, sizeof(entryp
->aiocb
) );
1734 if ( result
!= 0 ) {
1739 /* look for lio_listio LIO_NOP requests and ignore them. */
1740 /* Not really an error, but we need to free our aio_workq_entry. */
1741 if ( entryp
->aiocb
.aio_lio_opcode
== LIO_NOP
) {
1746 result
= aio_validate( entryp
);
1747 if ( result
!= 0 ) {
1755 if ( entryp
!= NULL
)
1756 zfree( aio_workq_zonep
, entryp
);
1760 } /* lio_create_sync_entry */
1764 * aio_free_request - remove our reference on the user land map and
1765 * free the work queue entry resources.
1766 * We are not holding the lock here thus aio_map is passed in and
1767 * zeroed while we did have the lock.
1771 aio_free_request( aio_workq_entry
*entryp
, vm_map_t the_map
)
1773 /* remove our reference to the user land map. */
1774 if ( VM_MAP_NULL
!= the_map
) {
1775 vm_map_deallocate( the_map
);
1778 zfree( aio_workq_zonep
, entryp
);
1782 } /* aio_free_request */
1785 /* aio_validate - validate the aiocb passed in by one of the aio syscalls.
1789 aio_validate( aio_workq_entry
*entryp
)
1791 struct fileproc
*fp
;
1797 if ( (entryp
->flags
& AIO_LIO
) != 0 ) {
1798 if ( entryp
->aiocb
.aio_lio_opcode
== LIO_READ
)
1799 entryp
->flags
|= AIO_READ
;
1800 else if ( entryp
->aiocb
.aio_lio_opcode
== LIO_WRITE
)
1801 entryp
->flags
|= AIO_WRITE
;
1802 else if ( entryp
->aiocb
.aio_lio_opcode
== LIO_NOP
)
1809 if ( (entryp
->flags
& (AIO_WRITE
| AIO_FSYNC
)) != 0 ) {
1813 if ( (entryp
->flags
& (AIO_READ
| AIO_WRITE
)) != 0 ) {
1814 // LP64todo - does max value for aio_nbytes need to grow?
1815 if ( entryp
->aiocb
.aio_nbytes
> INT_MAX
||
1816 entryp
->aiocb
.aio_buf
== USER_ADDR_NULL
||
1817 entryp
->aiocb
.aio_offset
< 0 )
1821 /* validate aiocb.aio_sigevent. at this point we only support sigev_notify
1822 * equal to SIGEV_SIGNAL or SIGEV_NONE. this means sigev_value,
1823 * sigev_notify_function, and sigev_notify_attributes are ignored.
1825 if ( entryp
->aiocb
.aio_sigevent
.sigev_notify
== SIGEV_SIGNAL
) {
1827 /* make sure we have a valid signal number */
1828 signum
= entryp
->aiocb
.aio_sigevent
.sigev_signo
;
1829 if ( signum
<= 0 || signum
>= NSIG
||
1830 signum
== SIGKILL
|| signum
== SIGSTOP
)
1833 else if ( entryp
->aiocb
.aio_sigevent
.sigev_notify
!= SIGEV_NONE
)
1836 /* validate the file descriptor and that the file was opened
1837 * for the appropriate read / write access.
1839 proc_fdlock(entryp
->procp
);
1841 result
= fp_lookup( entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, 1);
1842 if ( result
== 0 ) {
1843 if ( (fp
->f_fglob
->fg_flag
& flag
) == 0 ) {
1844 /* we don't have read or write access */
1847 else if ( fp
->f_fglob
->fg_type
!= DTYPE_VNODE
) {
1848 /* this is not a file */
1851 fp
->f_flags
|= FP_AIOISSUED
;
1853 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 1);
1859 proc_fdunlock(entryp
->procp
);
1863 } /* aio_validate */
1867 * aio_get_process_count - runs through our queues that hold outstanding
1868 * async IO reqests and totals up number of requests for the given
1870 * NOTE - caller must hold aio lock!
1874 aio_get_process_count(proc_t procp
)
1876 aio_workq_entry
*entryp
;
1879 /* begin with count of completed async IO requests for this process */
1880 count
= procp
->aio_done_count
;
1882 /* add in count of active async IO requests for this process */
1883 count
+= procp
->aio_active_count
;
1885 /* look for matches on our queue of asynchronous todo work */
1886 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
1887 if ( procp
== entryp
->procp
) {
1892 /* look for matches on our queue of synchronous todo work */
1893 TAILQ_FOREACH( entryp
, &aio_anchor
.lio_sync_workq
, aio_workq_link
) {
1894 if ( procp
== entryp
->procp
) {
1901 } /* aio_get_process_count */
1905 * aio_get_all_queues_count - get total number of entries on all aio work queues.
1906 * NOTE - caller must hold aio lock!
1910 aio_get_all_queues_count( void )
1914 count
= aio_anchor
.aio_async_workq_count
;
1915 count
+= aio_anchor
.lio_sync_workq_count
;
1916 count
+= aio_anchor
.aio_active_count
;
1917 count
+= aio_anchor
.aio_done_count
;
1921 } /* aio_get_all_queues_count */
1925 * do_aio_completion. Handle async IO completion.
1929 do_aio_completion( aio_workq_entry
*entryp
)
1931 /* signal user land process if appropriate */
1932 if ( entryp
->aiocb
.aio_sigevent
.sigev_notify
== SIGEV_SIGNAL
&&
1933 (entryp
->flags
& AIO_DISABLE
) == 0 ) {
1936 * if group_tag is non zero then make sure this is the last IO request
1937 * in the group before we signal.
1939 if ( entryp
->group_tag
== 0 ||
1940 (entryp
->group_tag
!= 0 && aio_last_group_io( entryp
)) ) {
1941 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_sig
)) | DBG_FUNC_NONE
,
1942 (int)entryp
->procp
, (int)entryp
->uaiocbp
,
1943 entryp
->aiocb
.aio_sigevent
.sigev_signo
, 0, 0 );
1945 psignal( entryp
->procp
, entryp
->aiocb
.aio_sigevent
.sigev_signo
);
1951 * need to handle case where a process is trying to exit, exec, or close
1952 * and is currently waiting for active aio requests to complete. If
1953 * AIO_WAITING is set then we need to look to see if there are any
1954 * other requests in the active queue for this process. If there are
1955 * none then wakeup using the AIO_CLEANUP_SLEEP_CHAN tsleep channel. If
1956 * there are some still active then do nothing - we only want to wakeup
1957 * when all active aio requests for the process are complete.
1959 if ( (entryp
->flags
& AIO_WAITING
) != 0 ) {
1960 int active_requests
;
1962 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_cleanup_wait
)) | DBG_FUNC_NONE
,
1963 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1966 active_requests
= aio_active_requests_for_process( entryp
->procp
);
1968 if ( active_requests
< 1 ) {
1969 /* no active aio requests for this process, continue exiting */
1970 wakeup_one( (caddr_t
) &entryp
->procp
->AIO_CLEANUP_SLEEP_CHAN
);
1972 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_cleanup_wake
)) | DBG_FUNC_NONE
,
1973 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1980 * aio_suspend case when a signal was not requested. In that scenario we
1981 * are sleeping on the AIO_SUSPEND_SLEEP_CHAN channel.
1982 * NOTE - the assumption here is that this wakeup call is inexpensive.
1983 * we really only need to do this when an aio_suspend call is pending.
1984 * If we find the wakeup call should be avoided we could mark the
1985 * async IO requests given in the list provided by aio_suspend and only
1986 * call wakeup for them. If we do mark them we should unmark them after
1987 * the aio_suspend wakes up.
1990 wakeup_one( (caddr_t
) &entryp
->procp
->AIO_SUSPEND_SLEEP_CHAN
);
1993 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_suspend_wake
)) | DBG_FUNC_NONE
,
1994 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1998 } /* do_aio_completion */
2002 * aio_last_group_io - checks to see if this is the last unfinished IO request
2003 * for the given group_tag. Returns TRUE if there are no other active IO
2004 * requests for this group or FALSE if the are active IO requests
2005 * NOTE - AIO_LOCK must be held by caller
2009 aio_last_group_io( aio_workq_entry
*entryp
)
2011 aio_workq_entry
*my_entryp
;
2013 /* look for matches on our queue of active async IO requests */
2014 TAILQ_FOREACH( my_entryp
, &entryp
->procp
->aio_activeq
, aio_workq_link
) {
2015 if ( my_entryp
->group_tag
== entryp
->group_tag
)
2019 /* look for matches on our queue of asynchronous todo work */
2020 TAILQ_FOREACH( my_entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
2021 if ( my_entryp
->group_tag
== entryp
->group_tag
)
2025 /* look for matches on our queue of synchronous todo work */
2026 TAILQ_FOREACH( my_entryp
, &aio_anchor
.lio_sync_workq
, aio_workq_link
) {
2027 if ( my_entryp
->group_tag
== entryp
->group_tag
)
2033 } /* aio_last_group_io */
2040 do_aio_read( aio_workq_entry
*entryp
)
2042 struct fileproc
*fp
;
2044 struct vfs_context context
;
2046 if ( (error
= fp_lookup(entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, 0)) )
2048 if ( (fp
->f_fglob
->fg_flag
& FREAD
) == 0 ) {
2049 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2055 * Needs vfs_context_t from vfs_context_create() in entryp!
2057 context
.vc_thread
= proc_thread(entryp
->procp
); /* XXX */
2058 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
2060 error
= dofileread(&context
, fp
,
2061 entryp
->aiocb
.aio_buf
,
2062 entryp
->aiocb
.aio_nbytes
,
2063 entryp
->aiocb
.aio_offset
, FOF_OFFSET
,
2064 &entryp
->returnval
);
2065 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2076 do_aio_write( aio_workq_entry
*entryp
)
2078 struct fileproc
*fp
;
2080 struct vfs_context context
;
2082 if ( (error
= fp_lookup(entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, 0)) )
2084 if ( (fp
->f_fglob
->fg_flag
& FWRITE
) == 0 ) {
2085 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2091 * Needs vfs_context_t from vfs_context_create() in entryp!
2093 context
.vc_thread
= proc_thread(entryp
->procp
); /* XXX */
2094 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
2096 /* NB: tell dofilewrite the offset, and to use the proc cred */
2097 error
= dofilewrite(&context
,
2099 entryp
->aiocb
.aio_buf
,
2100 entryp
->aiocb
.aio_nbytes
,
2101 entryp
->aiocb
.aio_offset
,
2102 FOF_OFFSET
| FOF_PCRED
,
2103 &entryp
->returnval
);
2105 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2109 } /* do_aio_write */
2113 * aio_active_requests_for_process - return number of active async IO
2114 * requests for the given process.
2115 * NOTE - caller must hold aio lock!
2119 aio_active_requests_for_process(proc_t procp
)
2122 return( procp
->aio_active_count
);
2124 } /* aio_active_requests_for_process */
2131 do_aio_fsync( aio_workq_entry
*entryp
)
2133 struct vfs_context context
;
2135 struct fileproc
*fp
;
2139 * NOTE - we will not support AIO_DSYNC until fdatasync() is supported.
2140 * AIO_DSYNC is caught before we queue up a request and flagged as an error.
2141 * The following was shamelessly extracted from fsync() implementation.
2144 error
= fp_getfvp( entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, &vp
);
2146 if ( (error
= vnode_getwithref(vp
)) ) {
2147 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2148 entryp
->returnval
= -1;
2151 context
.vc_thread
= current_thread();
2152 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
2154 error
= VNOP_FSYNC( vp
, MNT_WAIT
, &context
);
2156 (void)vnode_put(vp
);
2158 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2161 entryp
->returnval
= -1;
2165 } /* do_aio_fsync */
2169 * is_already_queued - runs through our queues to see if the given
2170 * aiocbp / process is there. Returns TRUE if there is a match
2171 * on any of our aio queues.
2172 * NOTE - callers must hold aio lock!
2176 is_already_queued(proc_t procp
,
2177 user_addr_t aiocbp
)
2179 aio_workq_entry
*entryp
;
2184 /* look for matches on our queue of async IO requests that have completed */
2185 TAILQ_FOREACH( entryp
, &procp
->aio_doneq
, aio_workq_link
) {
2186 if ( aiocbp
== entryp
->uaiocbp
) {
2188 goto ExitThisRoutine
;
2192 /* look for matches on our queue of active async IO requests */
2193 TAILQ_FOREACH( entryp
, &procp
->aio_activeq
, aio_workq_link
) {
2194 if ( aiocbp
== entryp
->uaiocbp
) {
2196 goto ExitThisRoutine
;
2200 /* look for matches on our queue of asynchronous todo work */
2201 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
2202 if ( procp
== entryp
->procp
&& aiocbp
== entryp
->uaiocbp
) {
2204 goto ExitThisRoutine
;
2208 /* look for matches on our queue of synchronous todo work */
2209 TAILQ_FOREACH( entryp
, &aio_anchor
.lio_sync_workq
, aio_workq_link
) {
2210 if ( procp
== entryp
->procp
&& aiocbp
== entryp
->uaiocbp
) {
2212 goto ExitThisRoutine
;
2219 } /* is_already_queued */
2223 * aio initialization
2225 __private_extern__
void
2230 aio_lock_grp_attr
= lck_grp_attr_alloc_init();
2231 aio_lock_grp
= lck_grp_alloc_init("aio", aio_lock_grp_attr
);
2232 aio_lock_attr
= lck_attr_alloc_init();
2234 aio_lock
= lck_mtx_alloc_init(aio_lock_grp
, aio_lock_attr
);
2237 TAILQ_INIT( &aio_anchor
.aio_async_workq
);
2238 TAILQ_INIT( &aio_anchor
.lio_sync_workq
);
2239 aio_anchor
.aio_async_workq_count
= 0;
2240 aio_anchor
.lio_sync_workq_count
= 0;
2241 aio_anchor
.aio_active_count
= 0;
2242 aio_anchor
.aio_done_count
= 0;
2245 i
= sizeof( aio_workq_entry
);
2246 aio_workq_zonep
= zinit( i
, i
* aio_max_requests
, i
* aio_max_requests
, "aiowq" );
2248 _aio_create_worker_threads( aio_worker_threads
);
2256 * aio worker threads created here.
2258 __private_extern__
void
2259 _aio_create_worker_threads( int num
)
2263 /* create some worker threads to handle the async IO requests */
2264 for ( i
= 0; i
< num
; i
++ ) {
2267 myThread
= kernel_thread( kernel_task
, aio_work_thread
);
2268 if ( THREAD_NULL
== myThread
) {
2269 printf( "%s - failed to create a work thread \n", __FUNCTION__
);
2275 } /* _aio_create_worker_threads */
2278 * Return the current activation utask
2283 return ((struct uthread
*)get_bsdthread_info(current_thread()))->uu_aio_task
;
2288 * In the case of an aiocb from a
2289 * 32-bit process we need to expand some longs and pointers to the correct
2290 * sizes in order to let downstream code always work on the same type of
2291 * aiocb (in our case that is a user_aiocb)
2294 do_munge_aiocb( struct aiocb
*my_aiocbp
, struct user_aiocb
*the_user_aiocbp
)
2296 the_user_aiocbp
->aio_fildes
= my_aiocbp
->aio_fildes
;
2297 the_user_aiocbp
->aio_offset
= my_aiocbp
->aio_offset
;
2298 the_user_aiocbp
->aio_buf
= CAST_USER_ADDR_T(my_aiocbp
->aio_buf
);
2299 the_user_aiocbp
->aio_nbytes
= my_aiocbp
->aio_nbytes
;
2300 the_user_aiocbp
->aio_reqprio
= my_aiocbp
->aio_reqprio
;
2301 the_user_aiocbp
->aio_lio_opcode
= my_aiocbp
->aio_lio_opcode
;
2303 /* special case here. since we do not know if sigev_value is an */
2304 /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */
2305 /* means if we send this info back to user space we need to remember */
2306 /* sigev_value was not expanded for the 32-bit case. */
2307 /* NOTE - this does NOT affect us since we don't support sigev_value */
2308 /* yet in the aio context. */
2310 the_user_aiocbp
->aio_sigevent
.sigev_notify
= my_aiocbp
->aio_sigevent
.sigev_notify
;
2311 the_user_aiocbp
->aio_sigevent
.sigev_signo
= my_aiocbp
->aio_sigevent
.sigev_signo
;
2312 the_user_aiocbp
->aio_sigevent
.sigev_value
.size_equivalent
.sival_int
=
2313 my_aiocbp
->aio_sigevent
.sigev_value
.sival_int
;
2314 the_user_aiocbp
->aio_sigevent
.sigev_notify_function
=
2315 CAST_USER_ADDR_T(my_aiocbp
->aio_sigevent
.sigev_notify_function
);
2316 the_user_aiocbp
->aio_sigevent
.sigev_notify_attributes
=
2317 CAST_USER_ADDR_T(my_aiocbp
->aio_sigevent
.sigev_notify_attributes
);