2 * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * 1) ramesh is looking into how to replace taking a reference on
27 * the user's map (vm_map_reference()) since it is believed that
28 * would not hold the process for us.
29 * 2) david is looking into a way for us to set the priority of the
30 * worker threads to match that of the user's thread when the
31 * async IO was queued.
36 * This file contains support for the POSIX 1003.1B AIO/LIO facility.
39 #include <sys/systm.h>
40 #include <sys/fcntl.h>
41 #include <sys/file_internal.h>
42 #include <sys/filedesc.h>
43 #include <sys/kernel.h>
44 #include <sys/vnode_internal.h>
45 #include <sys/malloc.h>
46 #include <sys/mount_internal.h>
47 #include <sys/param.h>
48 #include <sys/proc_internal.h>
49 #include <sys/sysctl.h>
50 #include <sys/unistd.h>
53 #include <sys/aio_kern.h>
54 #include <sys/sysproto.h>
56 #include <machine/limits.h>
58 #include <mach/mach_types.h>
59 #include <kern/kern_types.h>
60 #include <kern/zalloc.h>
61 #include <kern/task.h>
62 #include <kern/sched_prim.h>
64 #include <vm/vm_map.h>
66 #include <sys/kdebug.h>
67 #define AIO_work_queued 1
68 #define AIO_worker_wake 2
69 #define AIO_completion_sig 3
70 #define AIO_completion_cleanup_wait 4
71 #define AIO_completion_cleanup_wake 5
72 #define AIO_completion_suspend_wake 6
73 #define AIO_fsync_delay 7
75 #define AIO_cancel_async_workq 11
76 #define AIO_cancel_sync_workq 12
77 #define AIO_cancel_activeq 13
78 #define AIO_cancel_doneq 14
84 #define AIO_error_val 61
85 #define AIO_error_activeq 62
86 #define AIO_error_workq 63
88 #define AIO_return_val 71
89 #define AIO_return_activeq 72
90 #define AIO_return_workq 73
93 #define AIO_exit_sleep 91
95 #define AIO_close_sleep 101
96 #define AIO_suspend 110
97 #define AIO_suspend_sleep 111
98 #define AIO_worker_thread 120
102 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
106 * aio requests queue up on the aio_async_workq or lio_sync_workq (for
107 * lio_listio LIO_WAIT). Requests then move to the per process aio_activeq
108 * (proc.aio_activeq) when one of our worker threads start the IO.
109 * And finally, requests move to the per process aio_doneq (proc.aio_doneq)
110 * when the IO request completes. The request remains on aio_doneq until
111 * user process calls aio_return or the process exits, either way that is our
112 * trigger to release aio resources.
116 int aio_async_workq_count
; /* entries on aio_async_workq */
117 int lio_sync_workq_count
; /* entries on lio_sync_workq */
118 int aio_active_count
; /* entries on all active queues (proc.aio_activeq) */
119 int aio_done_count
; /* entries on all done queues (proc.aio_doneq) */
120 TAILQ_HEAD( , aio_workq_entry
) aio_async_workq
;
121 TAILQ_HEAD( , aio_workq_entry
) lio_sync_workq
;
123 typedef struct aio_anchor_cb aio_anchor_cb
;
127 * Notes on aio sleep / wake channels.
128 * We currently pick a couple fields within the proc structure that will allow
129 * us sleep channels that currently do not collide with any other kernel routines.
130 * At this time, for binary compatibility reasons, we cannot create new proc fields.
132 #define AIO_SUSPEND_SLEEP_CHAN p_estcpu
133 #define AIO_CLEANUP_SLEEP_CHAN p_pctcpu
137 * aysnc IO locking macros used to protect critical sections.
139 #define AIO_LOCK lck_mtx_lock(aio_lock)
140 #define AIO_UNLOCK lck_mtx_unlock(aio_lock)
146 static int aio_active_requests_for_process( struct proc
*procp
);
147 static boolean_t
aio_delay_fsync_request( aio_workq_entry
*entryp
);
148 static int aio_free_request( aio_workq_entry
*entryp
, vm_map_t the_map
);
149 static int aio_get_all_queues_count( void );
150 static int aio_get_process_count( struct proc
*procp
);
151 static aio_workq_entry
* aio_get_some_work( void );
152 static boolean_t
aio_last_group_io( aio_workq_entry
*entryp
);
153 static void aio_mark_requests( aio_workq_entry
*entryp
);
154 static int aio_queue_async_request( struct proc
*procp
,
157 static int aio_validate( aio_workq_entry
*entryp
);
158 static void aio_work_thread( void );
159 static int do_aio_cancel( struct proc
*p
,
162 boolean_t wait_for_completion
,
163 boolean_t disable_notification
);
164 static void do_aio_completion( aio_workq_entry
*entryp
);
165 static int do_aio_fsync( aio_workq_entry
*entryp
);
166 static int do_aio_read( aio_workq_entry
*entryp
);
167 static int do_aio_write( aio_workq_entry
*entryp
);
168 static void do_munge_aiocb( struct aiocb
*my_aiocbp
, struct user_aiocb
*the_user_aiocbp
);
169 static boolean_t
is_already_queued( struct proc
*procp
,
170 user_addr_t aiocbp
);
171 static int lio_create_async_entry( struct proc
*procp
,
175 aio_workq_entry
**entrypp
);
176 static int lio_create_sync_entry( struct proc
*procp
,
179 aio_workq_entry
**entrypp
);
183 * EXTERNAL PROTOTYPES
186 /* in ...bsd/kern/sys_generic.c */
187 extern int dofileread( struct proc
*p
, struct fileproc
*fp
, int fd
,
188 user_addr_t bufp
, user_size_t nbyte
,
189 off_t offset
, int flags
, user_ssize_t
*retval
);
190 extern int dofilewrite( struct proc
*p
, struct fileproc
*fp
, int fd
,
191 user_addr_t bufp
, user_size_t nbyte
, off_t offset
,
192 int flags
, user_ssize_t
*retval
);
195 * aio external global variables.
197 extern int aio_max_requests
; /* AIO_MAX - configurable */
198 extern int aio_max_requests_per_process
; /* AIO_PROCESS_MAX - configurable */
199 extern int aio_worker_threads
; /* AIO_THREAD_COUNT - configurable */
203 * aio static variables.
205 static aio_anchor_cb aio_anchor
;
206 static lck_mtx_t
* aio_lock
;
207 static lck_grp_t
* aio_lock_grp
;
208 static lck_attr_t
* aio_lock_attr
;
209 static lck_grp_attr_t
* aio_lock_grp_attr
;
210 static struct zone
*aio_workq_zonep
;
216 * aio_cancel - attempt to cancel one or more async IO requests currently
217 * outstanding against file descriptor uap->fd. If uap->aiocbp is not
218 * NULL then only one specific IO is cancelled (if possible). If uap->aiocbp
219 * is NULL then all outstanding async IO request for the given file
220 * descriptor are cancelled (if possible).
224 aio_cancel( struct proc
*p
, struct aio_cancel_args
*uap
, int *retval
)
226 struct user_aiocb my_aiocb
;
229 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel
)) | DBG_FUNC_START
,
230 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
232 /* quick check to see if there are any async IO requests queued up */
234 result
= aio_get_all_queues_count( );
242 if ( uap
->aiocbp
!= USER_ADDR_NULL
) {
243 if ( !IS_64BIT_PROCESS(p
) ) {
244 struct aiocb aiocb32
;
246 result
= copyin( uap
->aiocbp
, &aiocb32
, sizeof(aiocb32
) );
248 do_munge_aiocb( &aiocb32
, &my_aiocb
);
250 result
= copyin( uap
->aiocbp
, &my_aiocb
, sizeof(my_aiocb
) );
257 /* NOTE - POSIX standard says a mismatch between the file */
258 /* descriptor passed in and the file descriptor embedded in */
259 /* the aiocb causes unspecified results. We return EBADF in */
260 /* that situation. */
261 if ( uap
->fd
!= my_aiocb
.aio_fildes
) {
266 result
= do_aio_cancel( p
, uap
->fd
, uap
->aiocbp
, FALSE
, FALSE
);
268 if ( result
!= -1 ) {
277 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel
)) | DBG_FUNC_END
,
278 (int)p
, (int)uap
->aiocbp
, result
, 0, 0 );
286 * _aio_close - internal function used to clean up async IO requests for
287 * a file descriptor that is closing.
291 __private_extern__
void
292 _aio_close( struct proc
*p
, int fd
)
296 /* quick check to see if there are any async IO requests queued up */
298 count
= aio_get_all_queues_count( );
303 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_close
)) | DBG_FUNC_START
,
304 (int)p
, fd
, 0, 0, 0 );
306 /* cancel all async IO requests on our todo queues for this file descriptor */
307 error
= do_aio_cancel( p
, fd
, 0, TRUE
, FALSE
);
308 if ( error
== AIO_NOTCANCELED
) {
310 * AIO_NOTCANCELED is returned when we find an aio request for this process
311 * and file descriptor on the active async IO queue. Active requests cannot
312 * be cancelled so we must wait for them to complete. We will get a special
313 * wake up call on our channel used to sleep for ALL active requests to
314 * complete. This sleep channel (proc.AIO_CLEANUP_SLEEP_CHAN) is only used
315 * when we must wait for all active aio requests.
318 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_close_sleep
)) | DBG_FUNC_NONE
,
319 (int)p
, fd
, 0, 0, 0 );
321 tsleep( &p
->AIO_CLEANUP_SLEEP_CHAN
, PRIBIO
, "aio_close", 0 );
324 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_close
)) | DBG_FUNC_END
,
325 (int)p
, fd
, 0, 0, 0 );
333 * aio_error - return the error status associated with the async IO
334 * request referred to by uap->aiocbp. The error status is the errno
335 * value that would be set by the corresponding IO request (read, wrtie,
336 * fdatasync, or sync).
340 aio_error( struct proc
*p
, struct aio_error_args
*uap
, int *retval
)
342 aio_workq_entry
*entryp
;
345 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error
)) | DBG_FUNC_START
,
346 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
350 /* quick check to see if there are any async IO requests queued up */
351 if ( aio_get_all_queues_count( ) < 1 ) {
356 /* look for a match on our queue of async IO requests that have completed */
357 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
358 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
359 *retval
= entryp
->errorval
;
361 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_val
)) | DBG_FUNC_NONE
,
362 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
367 /* look for a match on our queue of active async IO requests */
368 TAILQ_FOREACH( entryp
, &p
->aio_activeq
, aio_workq_link
) {
369 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
370 *retval
= EINPROGRESS
;
372 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_activeq
)) | DBG_FUNC_NONE
,
373 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
378 /* look for a match on our queue of todo work */
379 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
380 if ( p
== entryp
->procp
&& entryp
->uaiocbp
== uap
->aiocbp
) {
381 *retval
= EINPROGRESS
;
383 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_workq
)) | DBG_FUNC_NONE
,
384 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
391 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error
)) | DBG_FUNC_END
,
392 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
401 * aio_fsync - asynchronously force all IO operations associated
402 * with the file indicated by the file descriptor (uap->aiocbp->aio_fildes) and
403 * queued at the time of the call to the synchronized completion state.
404 * NOTE - we do not support op O_DSYNC at this point since we do not support the
409 aio_fsync( struct proc
*p
, struct aio_fsync_args
*uap
, int *retval
)
414 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync
)) | DBG_FUNC_START
,
415 (int)p
, (int)uap
->aiocbp
, uap
->op
, 0, 0 );
418 /* 0 := O_SYNC for binary backward compatibility with Panther */
419 if (uap
->op
== O_SYNC
|| uap
->op
== 0)
420 fsync_kind
= AIO_FSYNC
;
421 #if 0 // we don't support fdatasync() call yet
422 else if ( uap
->op
== O_DSYNC
)
423 fsync_kind
= AIO_DSYNC
;
431 error
= aio_queue_async_request( p
, uap
->aiocbp
, fsync_kind
);
436 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync
)) | DBG_FUNC_END
,
437 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
444 /* aio_read - asynchronously read uap->aiocbp->aio_nbytes bytes from the
445 * file descriptor (uap->aiocbp->aio_fildes) into the buffer
446 * (uap->aiocbp->aio_buf).
450 aio_read( struct proc
*p
, struct aio_read_args
*uap
, int *retval
)
454 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_read
)) | DBG_FUNC_START
,
455 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
459 error
= aio_queue_async_request( p
, uap
->aiocbp
, AIO_READ
);
463 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_read
)) | DBG_FUNC_END
,
464 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
472 * aio_return - return the return status associated with the async IO
473 * request referred to by uap->aiocbp. The return status is the value
474 * that would be returned by corresponding IO request (read, wrtie,
475 * fdatasync, or sync). This is where we release kernel resources
476 * held for async IO call associated with the given aiocb pointer.
480 aio_return( struct proc
*p
, struct aio_return_args
*uap
, user_ssize_t
*retval
)
482 aio_workq_entry
*entryp
;
486 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return
)) | DBG_FUNC_START
,
487 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
493 /* quick check to see if there are any async IO requests queued up */
494 if ( aio_get_all_queues_count( ) < 1 ) {
499 /* look for a match on our queue of async IO requests that have completed */
500 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
501 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
502 TAILQ_REMOVE( &p
->aio_doneq
, entryp
, aio_workq_link
);
503 aio_anchor
.aio_done_count
--;
506 *retval
= entryp
->returnval
;
508 /* we cannot free requests that are still completing */
509 if ( (entryp
->flags
& AIO_COMPLETION
) == 0 ) {
512 my_map
= entryp
->aio_map
;
513 entryp
->aio_map
= VM_MAP_NULL
;
516 aio_free_request( entryp
, my_map
);
519 /* tell completion code to free this request */
520 entryp
->flags
|= AIO_DO_FREE
;
522 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_val
)) | DBG_FUNC_NONE
,
523 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
528 /* look for a match on our queue of active async IO requests */
529 TAILQ_FOREACH( entryp
, &p
->aio_activeq
, aio_workq_link
) {
530 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
532 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_activeq
)) | DBG_FUNC_NONE
,
533 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
538 /* look for a match on our queue of todo work */
539 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
540 if ( p
== entryp
->procp
&& entryp
->uaiocbp
== uap
->aiocbp
) {
542 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_workq
)) | DBG_FUNC_NONE
,
543 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
552 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return
)) | DBG_FUNC_END
,
553 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
561 * _aio_exec - internal function used to clean up async IO requests for
562 * a process that is going away due to exec(). We cancel any async IOs
563 * we can and wait for those already active. We also disable signaling
564 * for cancelled or active aio requests that complete.
565 * This routine MAY block!
568 __private_extern__
void
569 _aio_exec( struct proc
*p
)
572 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exec
)) | DBG_FUNC_START
,
573 (int)p
, 0, 0, 0, 0 );
577 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exec
)) | DBG_FUNC_END
,
578 (int)p
, 0, 0, 0, 0 );
586 * _aio_exit - internal function used to clean up async IO requests for
587 * a process that is terminating (via exit() or exec() ). We cancel any async IOs
588 * we can and wait for those already active. We also disable signaling
589 * for cancelled or active aio requests that complete. This routine MAY block!
592 __private_extern__
void
593 _aio_exit( struct proc
*p
)
596 aio_workq_entry
*entryp
;
598 /* quick check to see if there are any async IO requests queued up */
600 count
= aio_get_all_queues_count( );
606 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit
)) | DBG_FUNC_START
,
607 (int)p
, 0, 0, 0, 0 );
610 * cancel async IO requests on the todo work queue and wait for those
611 * already active to complete.
613 error
= do_aio_cancel( p
, 0, 0, TRUE
, TRUE
);
614 if ( error
== AIO_NOTCANCELED
) {
616 * AIO_NOTCANCELED is returned when we find an aio request for this process
617 * on the active async IO queue. Active requests cannot be cancelled so we
618 * must wait for them to complete. We will get a special wake up call on
619 * our channel used to sleep for ALL active requests to complete. This sleep
620 * channel (proc.AIO_CLEANUP_SLEEP_CHAN) is only used when we must wait for all
621 * active aio requests.
624 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit_sleep
)) | DBG_FUNC_NONE
,
625 (int)p
, 0, 0, 0, 0 );
627 tsleep( &p
->AIO_CLEANUP_SLEEP_CHAN
, PRIBIO
, "aio_exit", 0 );
630 /* release all aio resources used by this process */
632 entryp
= TAILQ_FIRST( &p
->aio_doneq
);
633 while ( entryp
!= NULL
) {
634 aio_workq_entry
*next_entryp
;
636 next_entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
637 TAILQ_REMOVE( &p
->aio_doneq
, entryp
, aio_workq_link
);
638 aio_anchor
.aio_done_count
--;
641 /* we cannot free requests that are still completing */
642 if ( (entryp
->flags
& AIO_COMPLETION
) == 0 ) {
645 my_map
= entryp
->aio_map
;
646 entryp
->aio_map
= VM_MAP_NULL
;
648 aio_free_request( entryp
, my_map
);
650 /* need to start over since aio_doneq may have been */
651 /* changed while we were away. */
653 entryp
= TAILQ_FIRST( &p
->aio_doneq
);
657 /* tell completion code to free this request */
658 entryp
->flags
|= AIO_DO_FREE
;
659 entryp
= next_entryp
;
663 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit
)) | DBG_FUNC_END
,
664 (int)p
, 0, 0, 0, 0 );
672 * do_aio_cancel - cancel async IO requests (if possible). We get called by
673 * aio_cancel, close, and at exit.
674 * There are three modes of operation: 1) cancel all async IOs for a process -
675 * fd is 0 and aiocbp is NULL 2) cancel all async IOs for file descriptor - fd
676 * is > 0 and aiocbp is NULL 3) cancel one async IO associated with the given
678 * Returns -1 if no matches were found, AIO_CANCELED when we cancelled all
679 * target async IO requests, AIO_NOTCANCELED if we could not cancel all
680 * target async IO requests, and AIO_ALLDONE if all target async IO requests
681 * were already complete.
682 * WARNING - do not deference aiocbp in this routine, it may point to user
683 * land data that has not been copied in (when called from aio_cancel() )
687 do_aio_cancel( struct proc
*p
, int fd
, user_addr_t aiocbp
,
688 boolean_t wait_for_completion
, boolean_t disable_notification
)
690 aio_workq_entry
*entryp
;
695 /* look for a match on our queue of async todo work. */
697 entryp
= TAILQ_FIRST( &aio_anchor
.aio_async_workq
);
698 while ( entryp
!= NULL
) {
699 aio_workq_entry
*next_entryp
;
701 next_entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
702 if ( p
== entryp
->procp
) {
703 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
704 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
705 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
706 /* we found a match so we remove the entry from the */
707 /* todo work queue and place it on the done queue */
708 TAILQ_REMOVE( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
709 aio_anchor
.aio_async_workq_count
--;
710 entryp
->errorval
= ECANCELED
;
711 entryp
->returnval
= -1;
712 if ( disable_notification
)
713 entryp
->flags
|= AIO_DISABLE
; /* flag for special completion processing */
714 result
= AIO_CANCELED
;
716 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_async_workq
)) | DBG_FUNC_NONE
,
717 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
719 TAILQ_INSERT_TAIL( &p
->aio_doneq
, entryp
, aio_workq_link
);
720 aio_anchor
.aio_done_count
++;
722 entryp
->flags
|= AIO_COMPLETION
;
725 /* do completion processing for this request */
726 do_aio_completion( entryp
);
729 entryp
->flags
&= ~AIO_COMPLETION
;
730 if ( (entryp
->flags
& AIO_DO_FREE
) != 0 ) {
733 my_map
= entryp
->aio_map
;
734 entryp
->aio_map
= VM_MAP_NULL
;
736 aio_free_request( entryp
, my_map
);
741 if ( aiocbp
!= USER_ADDR_NULL
) {
745 /* need to start over since aio_async_workq may have been */
746 /* changed while we were away doing completion processing. */
748 entryp
= TAILQ_FIRST( &aio_anchor
.aio_async_workq
);
752 entryp
= next_entryp
;
756 * look for a match on our queue of synchronous todo work. This will
757 * be a rare occurrence but could happen if a process is terminated while
758 * processing a lio_listio call.
760 entryp
= TAILQ_FIRST( &aio_anchor
.lio_sync_workq
);
761 while ( entryp
!= NULL
) {
762 aio_workq_entry
*next_entryp
;
764 next_entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
765 if ( p
== entryp
->procp
) {
766 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
767 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
768 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
769 /* we found a match so we remove the entry from the */
770 /* todo work queue and place it on the done queue */
771 TAILQ_REMOVE( &aio_anchor
.lio_sync_workq
, entryp
, aio_workq_link
);
772 aio_anchor
.lio_sync_workq_count
--;
773 entryp
->errorval
= ECANCELED
;
774 entryp
->returnval
= -1;
775 if ( disable_notification
)
776 entryp
->flags
|= AIO_DISABLE
; /* flag for special completion processing */
777 result
= AIO_CANCELED
;
779 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_sync_workq
)) | DBG_FUNC_NONE
,
780 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
782 TAILQ_INSERT_TAIL( &p
->aio_doneq
, entryp
, aio_workq_link
);
783 aio_anchor
.aio_done_count
++;
785 if ( aiocbp
!= USER_ADDR_NULL
) {
791 entryp
= next_entryp
;
795 * look for a match on our queue of active async IO requests and
796 * return AIO_NOTCANCELED result.
798 TAILQ_FOREACH( entryp
, &p
->aio_activeq
, aio_workq_link
) {
799 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
800 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
801 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
802 result
= AIO_NOTCANCELED
;
804 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_activeq
)) | DBG_FUNC_NONE
,
805 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
807 if ( wait_for_completion
)
808 entryp
->flags
|= AIO_WAITING
; /* flag for special completion processing */
809 if ( disable_notification
)
810 entryp
->flags
|= AIO_DISABLE
; /* flag for special completion processing */
811 if ( aiocbp
!= USER_ADDR_NULL
) {
819 * if we didn't find any matches on the todo or active queues then look for a
820 * match on our queue of async IO requests that have completed and if found
821 * return AIO_ALLDONE result.
823 if ( result
== -1 ) {
824 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
825 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
826 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
827 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
828 result
= AIO_ALLDONE
;
830 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_doneq
)) | DBG_FUNC_NONE
,
831 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
833 if ( aiocbp
!= USER_ADDR_NULL
) {
844 } /* do_aio_cancel */
848 * aio_suspend - suspend the calling thread until at least one of the async
849 * IO operations referenced by uap->aiocblist has completed, until a signal
850 * interrupts the function, or uap->timeoutp time interval (optional) has
852 * Returns 0 if one or more async IOs have completed else -1 and errno is
853 * set appropriately - EAGAIN if timeout elapses or EINTR if an interrupt
858 aio_suspend( struct proc
*p
, struct aio_suspend_args
*uap
, int *retval
)
863 struct user_timespec ts
;
864 aio_workq_entry
*entryp
;
865 user_addr_t
*aiocbpp
;
867 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend
)) | DBG_FUNC_START
,
868 (int)p
, uap
->nent
, 0, 0, 0 );
874 /* quick check to see if there are any async IO requests queued up */
876 count
= aio_get_all_queues_count( );
880 goto ExitThisRoutine
;
883 if ( uap
->nent
< 1 || uap
->nent
> aio_max_requests_per_process
) {
885 goto ExitThisRoutine
;
888 if ( uap
->timeoutp
!= USER_ADDR_NULL
) {
889 if ( proc_is64bit(p
) ) {
890 error
= copyin( uap
->timeoutp
, &ts
, sizeof(ts
) );
893 struct timespec temp
;
894 error
= copyin( uap
->timeoutp
, &temp
, sizeof(temp
) );
896 ts
.tv_sec
= temp
.tv_sec
;
897 ts
.tv_nsec
= temp
.tv_nsec
;
902 goto ExitThisRoutine
;
905 if ( ts
.tv_nsec
< 0 || ts
.tv_nsec
>= 1000000000 ) {
907 goto ExitThisRoutine
;
910 nanoseconds_to_absolutetime( (uint64_t)ts
.tv_sec
* NSEC_PER_SEC
+ ts
.tv_nsec
,
912 clock_absolutetime_interval_to_deadline( abstime
, &abstime
);
915 /* we reserve enough space for largest possible pointer size */
916 MALLOC( aiocbpp
, user_addr_t
*, (uap
->nent
* sizeof(user_addr_t
)), M_TEMP
, M_WAITOK
);
917 if ( aiocbpp
== NULL
) {
919 goto ExitThisRoutine
;
922 /* copyin our aiocb pointers from list */
923 error
= copyin( uap
->aiocblist
, aiocbpp
,
924 proc_is64bit(p
) ? (uap
->nent
* sizeof(user_addr_t
))
925 : (uap
->nent
* sizeof(uintptr_t)) );
928 goto ExitThisRoutine
;
931 /* we depend on a list of user_addr_t's so we need to munge and expand */
932 /* when these pointers came from a 32-bit process */
933 if ( !proc_is64bit(p
) && sizeof(uintptr_t) < sizeof(user_addr_t
) ) {
934 /* position to the last entry and work back from there */
935 uintptr_t *my_ptrp
= ((uintptr_t *)aiocbpp
) + (uap
->nent
- 1);
936 user_addr_t
*my_addrp
= aiocbpp
+ (uap
->nent
- 1);
937 for (i
= 0; i
< uap
->nent
; i
++, my_ptrp
--, my_addrp
--) {
938 *my_addrp
= (user_addr_t
) (*my_ptrp
);
942 /* check list of aio requests to see if any have completed */
944 for ( i
= 0; i
< uap
->nent
; i
++ ) {
947 /* NULL elements are legal so check for 'em */
948 aiocbp
= *(aiocbpp
+ i
);
949 if ( aiocbp
== USER_ADDR_NULL
)
952 /* return immediately if any aio request in the list is done */
953 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
954 if ( entryp
->uaiocbp
== aiocbp
) {
958 goto ExitThisRoutine
;
961 } /* for ( ; i < uap->nent; ) */
963 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend_sleep
)) | DBG_FUNC_NONE
,
964 (int)p
, uap
->nent
, 0, 0, 0 );
967 * wait for an async IO to complete or a signal fires or timeout expires.
968 * we return EAGAIN (35) for timeout expiration and EINTR (4) when a signal
969 * interrupts us. If an async IO completes before a signal fires or our
970 * timeout expires, we get a wakeup call from aio_work_thread().
972 assert_wait_deadline( (event_t
) &p
->AIO_SUSPEND_SLEEP_CHAN
, THREAD_ABORTSAFE
, abstime
);
975 error
= thread_block( THREAD_CONTINUE_NULL
);
977 if ( error
== THREAD_AWAKENED
) {
978 /* got our wakeup call from aio_work_thread() */
982 else if ( error
== THREAD_TIMED_OUT
) {
983 /* our timeout expired */
987 /* we were interrupted */
992 if ( aiocbpp
!= NULL
)
993 FREE( aiocbpp
, M_TEMP
);
995 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend
)) | DBG_FUNC_END
,
996 (int)p
, uap
->nent
, error
, 0, 0 );
1003 /* aio_write - asynchronously write uap->aiocbp->aio_nbytes bytes to the
1004 * file descriptor (uap->aiocbp->aio_fildes) from the buffer
1005 * (uap->aiocbp->aio_buf).
1009 aio_write( struct proc
*p
, struct aio_write_args
*uap
, int *retval
)
1015 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_write
)) | DBG_FUNC_START
,
1016 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
1018 error
= aio_queue_async_request( p
, uap
->aiocbp
, AIO_WRITE
);
1022 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_write
)) | DBG_FUNC_END
,
1023 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
1031 * lio_listio - initiate a list of IO requests. We process the list of aiocbs
1032 * either synchronously (mode == LIO_WAIT) or asynchronously (mode == LIO_NOWAIT).
1033 * The caller gets error and return status for each aiocb in the list via aio_error
1034 * and aio_return. We must keep completed requests until released by the
1039 lio_listio( struct proc
*p
, struct lio_listio_args
*uap
, int *retval
)
1045 aio_workq_entry
* *entryp_listp
;
1046 user_addr_t
*aiocbpp
;
1048 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_listio
)) | DBG_FUNC_START
,
1049 (int)p
, uap
->nent
, uap
->mode
, 0, 0 );
1051 entryp_listp
= NULL
;
1055 if ( !(uap
->mode
== LIO_NOWAIT
|| uap
->mode
== LIO_WAIT
) ) {
1056 call_result
= EINVAL
;
1060 if ( uap
->nent
< 1 || uap
->nent
> AIO_LISTIO_MAX
) {
1061 call_result
= EINVAL
;
1066 * we use group_tag to mark IO requests for delayed completion processing
1067 * which means we wait until all IO requests in the group have completed
1068 * before we either return to the caller when mode is LIO_WAIT or signal
1069 * user when mode is LIO_NOWAIT.
1071 group_tag
= random();
1074 * allocate a list of aio_workq_entry pointers that we will use to queue
1075 * up all our requests at once while holding our lock.
1077 MALLOC( entryp_listp
, void *, (uap
->nent
* sizeof(aio_workq_entry
*)), M_TEMP
, M_WAITOK
);
1078 if ( entryp_listp
== NULL
) {
1079 call_result
= EAGAIN
;
1083 /* we reserve enough space for largest possible pointer size */
1084 MALLOC( aiocbpp
, user_addr_t
*, (uap
->nent
* sizeof(user_addr_t
)), M_TEMP
, M_WAITOK
);
1085 if ( aiocbpp
== NULL
) {
1086 call_result
= EAGAIN
;
1090 /* copyin our aiocb pointers from list */
1091 result
= copyin( uap
->aiocblist
, aiocbpp
,
1092 IS_64BIT_PROCESS(p
) ? (uap
->nent
* sizeof(user_addr_t
))
1093 : (uap
->nent
* sizeof(uintptr_t)) );
1094 if ( result
!= 0 ) {
1095 call_result
= EAGAIN
;
1099 /* we depend on a list of user_addr_t's so we need to munge and expand */
1100 /* when these pointers came from a 32-bit process */
1101 if ( !IS_64BIT_PROCESS(p
) && sizeof(uintptr_t) < sizeof(user_addr_t
) ) {
1102 /* position to the last entry and work back from there */
1103 uintptr_t *my_ptrp
= ((uintptr_t *)aiocbpp
) + (uap
->nent
- 1);
1104 user_addr_t
*my_addrp
= aiocbpp
+ (uap
->nent
- 1);
1105 for (i
= 0; i
< uap
->nent
; i
++, my_ptrp
--, my_addrp
--) {
1106 *my_addrp
= (user_addr_t
) (*my_ptrp
);
1110 /* process list of aio requests */
1111 for ( i
= 0; i
< uap
->nent
; i
++ ) {
1112 user_addr_t my_aiocbp
;
1114 *(entryp_listp
+ i
) = NULL
;
1115 my_aiocbp
= *(aiocbpp
+ i
);
1117 /* NULL elements are legal so check for 'em */
1118 if ( my_aiocbp
== USER_ADDR_NULL
)
1121 if ( uap
->mode
== LIO_NOWAIT
)
1122 result
= lio_create_async_entry( p
, my_aiocbp
, uap
->sigp
,
1123 group_tag
, (entryp_listp
+ i
) );
1125 result
= lio_create_sync_entry( p
, my_aiocbp
, group_tag
,
1126 (entryp_listp
+ i
) );
1128 if ( result
!= 0 && call_result
== -1 )
1129 call_result
= result
;
1133 * we need to protect this section since we do not want any of these grouped
1134 * IO requests to begin until we have them all on the queue.
1137 for ( i
= 0; i
< uap
->nent
; i
++ ) {
1138 aio_workq_entry
*entryp
;
1140 /* NULL elements are legal so check for 'em */
1141 entryp
= *(entryp_listp
+ i
);
1142 if ( entryp
== NULL
)
1145 /* check our aio limits to throttle bad or rude user land behavior */
1146 if ( aio_get_all_queues_count( ) >= aio_max_requests
||
1147 aio_get_process_count( entryp
->procp
) >= aio_max_requests_per_process
||
1148 is_already_queued( entryp
->procp
, entryp
->uaiocbp
) == TRUE
) {
1151 my_map
= entryp
->aio_map
;
1152 entryp
->aio_map
= VM_MAP_NULL
;
1153 if ( call_result
== -1 )
1154 call_result
= EAGAIN
;
1156 aio_free_request( entryp
, my_map
);
1161 /* place the request on the appropriate queue */
1162 if ( uap
->mode
== LIO_NOWAIT
) {
1163 TAILQ_INSERT_TAIL( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
1164 aio_anchor
.aio_async_workq_count
++;
1166 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_work_queued
)) | DBG_FUNC_NONE
,
1167 (int)p
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1170 TAILQ_INSERT_TAIL( &aio_anchor
.lio_sync_workq
, entryp
, aio_workq_link
);
1171 aio_anchor
.lio_sync_workq_count
++;
1175 if ( uap
->mode
== LIO_NOWAIT
) {
1176 /* caller does not want to wait so we'll fire off a worker thread and return */
1177 wakeup_one( (caddr_t
) &aio_anchor
.aio_async_workq
);
1180 aio_workq_entry
*entryp
;
1184 * mode is LIO_WAIT - handle the IO requests now.
1186 entryp
= TAILQ_FIRST( &aio_anchor
.lio_sync_workq
);
1187 while ( entryp
!= NULL
) {
1188 if ( p
== entryp
->procp
&& group_tag
== entryp
->group_tag
) {
1190 TAILQ_REMOVE( &aio_anchor
.lio_sync_workq
, entryp
, aio_workq_link
);
1191 aio_anchor
.lio_sync_workq_count
--;
1194 if ( (entryp
->flags
& AIO_READ
) != 0 ) {
1195 error
= do_aio_read( entryp
);
1197 else if ( (entryp
->flags
& AIO_WRITE
) != 0 ) {
1198 error
= do_aio_write( entryp
);
1200 else if ( (entryp
->flags
& AIO_FSYNC
) != 0 ) {
1201 error
= do_aio_fsync( entryp
);
1204 printf( "%s - unknown aio request - flags 0x%02X \n",
1205 __FUNCTION__
, entryp
->flags
);
1208 entryp
->errorval
= error
;
1209 if ( error
!= 0 && call_result
== -1 )
1213 /* we're done with the IO request so move it on the done queue */
1214 TAILQ_INSERT_TAIL( &p
->aio_doneq
, entryp
, aio_workq_link
);
1215 aio_anchor
.aio_done_count
++;
1216 p
->aio_done_count
++;
1218 /* need to start over since lio_sync_workq may have been changed while we */
1219 /* were away doing the IO. */
1220 entryp
= TAILQ_FIRST( &aio_anchor
.lio_sync_workq
);
1222 } /* p == entryp->procp */
1224 entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
1225 } /* while ( entryp != NULL ) */
1226 } /* uap->mode == LIO_WAIT */
1229 /* call_result == -1 means we had no trouble queueing up requests */
1230 if ( call_result
== -1 ) {
1236 if ( entryp_listp
!= NULL
)
1237 FREE( entryp_listp
, M_TEMP
);
1238 if ( aiocbpp
!= NULL
)
1239 FREE( aiocbpp
, M_TEMP
);
1241 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_listio
)) | DBG_FUNC_END
,
1242 (int)p
, call_result
, 0, 0, 0 );
1244 return( call_result
);
1250 * aio worker thread. this is where all the real work gets done.
1251 * we get a wake up call on sleep channel &aio_anchor.aio_async_workq
1252 * after new work is queued up.
1256 aio_work_thread( void )
1258 aio_workq_entry
*entryp
;
1262 entryp
= aio_get_some_work();
1263 if ( entryp
== NULL
) {
1265 * aio worker threads wait for some work to get queued up
1266 * by aio_queue_async_request. Once some work gets queued
1267 * it will wake up one of these worker threads just before
1268 * returning to our caller in user land.
1270 assert_wait( (event_t
) &aio_anchor
.aio_async_workq
, THREAD_UNINT
);
1273 thread_block( (thread_continue_t
)aio_work_thread
);
1278 vm_map_t currentmap
;
1279 vm_map_t oldmap
= VM_MAP_NULL
;
1280 task_t oldaiotask
= TASK_NULL
;
1281 struct uthread
*uthreadp
= NULL
;
1285 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_worker_thread
)) | DBG_FUNC_START
,
1286 (int)entryp
->procp
, (int)entryp
->uaiocbp
, entryp
->flags
, 0, 0 );
1289 * Assume the target's address space identity for the duration
1292 currentmap
= get_task_map( (current_proc())->task
);
1293 if ( currentmap
!= entryp
->aio_map
) {
1294 uthreadp
= (struct uthread
*) get_bsdthread_info(current_thread());
1295 oldaiotask
= uthreadp
->uu_aio_task
;
1296 uthreadp
->uu_aio_task
= entryp
->procp
->task
;
1297 oldmap
= vm_map_switch( entryp
->aio_map
);
1300 if ( (entryp
->flags
& AIO_READ
) != 0 ) {
1301 error
= do_aio_read( entryp
);
1303 else if ( (entryp
->flags
& AIO_WRITE
) != 0 ) {
1304 error
= do_aio_write( entryp
);
1306 else if ( (entryp
->flags
& AIO_FSYNC
) != 0 ) {
1307 error
= do_aio_fsync( entryp
);
1310 printf( "%s - unknown aio request - flags 0x%02X \n",
1311 __FUNCTION__
, entryp
->flags
);
1314 entryp
->errorval
= error
;
1315 if ( currentmap
!= entryp
->aio_map
) {
1316 (void) vm_map_switch( oldmap
);
1317 uthreadp
->uu_aio_task
= oldaiotask
;
1320 /* we're done with the IO request so pop it off the active queue and */
1321 /* push it on the done queue */
1323 TAILQ_REMOVE( &entryp
->procp
->aio_activeq
, entryp
, aio_workq_link
);
1324 aio_anchor
.aio_active_count
--;
1325 entryp
->procp
->aio_active_count
--;
1326 TAILQ_INSERT_TAIL( &entryp
->procp
->aio_doneq
, entryp
, aio_workq_link
);
1327 aio_anchor
.aio_done_count
++;
1328 entryp
->procp
->aio_done_count
++;
1329 entryp
->flags
|= AIO_COMPLETION
;
1331 /* remove our reference to the user land map. */
1332 if ( VM_MAP_NULL
!= entryp
->aio_map
) {
1335 my_map
= entryp
->aio_map
;
1336 entryp
->aio_map
= VM_MAP_NULL
;
1337 AIO_UNLOCK
; /* must unlock before calling vm_map_deallocate() */
1338 vm_map_deallocate( my_map
);
1344 do_aio_completion( entryp
);
1346 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_worker_thread
)) | DBG_FUNC_END
,
1347 (int)entryp
->procp
, (int)entryp
->uaiocbp
, entryp
->errorval
,
1348 entryp
->returnval
, 0 );
1351 entryp
->flags
&= ~AIO_COMPLETION
;
1352 if ( (entryp
->flags
& AIO_DO_FREE
) != 0 ) {
1355 my_map
= entryp
->aio_map
;
1356 entryp
->aio_map
= VM_MAP_NULL
;
1358 aio_free_request( entryp
, my_map
);
1367 } /* aio_work_thread */
1371 * aio_get_some_work - get the next async IO request that is ready to be executed.
1372 * aio_fsync complicates matters a bit since we cannot do the fsync until all async
1373 * IO requests at the time the aio_fsync call came in have completed.
1374 * NOTE - AIO_LOCK must be held by caller
1377 static aio_workq_entry
*
1378 aio_get_some_work( void )
1380 aio_workq_entry
*entryp
;
1382 /* pop some work off the work queue and add to our active queue */
1383 for ( entryp
= TAILQ_FIRST( &aio_anchor
.aio_async_workq
);
1385 entryp
= TAILQ_NEXT( entryp
, aio_workq_link
) ) {
1387 if ( (entryp
->flags
& AIO_FSYNC
) != 0 ) {
1388 /* leave aio_fsync calls on the work queue if there are IO */
1389 /* requests on the active queue for the same file descriptor. */
1390 if ( aio_delay_fsync_request( entryp
) ) {
1392 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync_delay
)) | DBG_FUNC_NONE
,
1393 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1400 if ( entryp
!= NULL
) {
1401 TAILQ_REMOVE( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
1402 aio_anchor
.aio_async_workq_count
--;
1403 TAILQ_INSERT_TAIL( &entryp
->procp
->aio_activeq
, entryp
, aio_workq_link
);
1404 aio_anchor
.aio_active_count
++;
1405 entryp
->procp
->aio_active_count
++;
1410 } /* aio_get_some_work */
1414 * aio_delay_fsync_request - look to see if this aio_fsync request should be delayed at
1415 * this time. Delay will happen when there are any active IOs for the same file
1416 * descriptor that were queued at time the aio_sync call was queued.
1417 * NOTE - AIO_LOCK must be held by caller
1420 aio_delay_fsync_request( aio_workq_entry
*entryp
)
1422 aio_workq_entry
*my_entryp
;
1424 TAILQ_FOREACH( my_entryp
, &entryp
->procp
->aio_activeq
, aio_workq_link
) {
1425 if ( my_entryp
->fsyncp
!= USER_ADDR_NULL
&&
1426 entryp
->uaiocbp
== my_entryp
->fsyncp
&&
1427 entryp
->aiocb
.aio_fildes
== my_entryp
->aiocb
.aio_fildes
) {
1434 } /* aio_delay_fsync_request */
1438 * aio_queue_async_request - queue up an async IO request on our work queue then
1439 * wake up one of our worker threads to do the actual work. We get a reference
1440 * to our caller's user land map in order to keep it around while we are
1441 * processing the request.
1445 aio_queue_async_request( struct proc
*procp
, user_addr_t aiocbp
, int kindOfIO
)
1447 aio_workq_entry
*entryp
;
1450 entryp
= (aio_workq_entry
*) zalloc( aio_workq_zonep
);
1451 if ( entryp
== NULL
) {
1455 bzero( entryp
, sizeof(*entryp
) );
1457 /* fill in the rest of the aio_workq_entry */
1458 entryp
->procp
= procp
;
1459 entryp
->uaiocbp
= aiocbp
;
1460 entryp
->flags
|= kindOfIO
;
1461 entryp
->aio_map
= VM_MAP_NULL
;
1463 if ( !IS_64BIT_PROCESS(procp
) ) {
1464 struct aiocb aiocb32
;
1466 result
= copyin( aiocbp
, &aiocb32
, sizeof(aiocb32
) );
1468 do_munge_aiocb( &aiocb32
, &entryp
->aiocb
);
1470 result
= copyin( aiocbp
, &entryp
->aiocb
, sizeof(entryp
->aiocb
) );
1472 if ( result
!= 0 ) {
1477 /* do some more validation on the aiocb and embedded file descriptor */
1478 result
= aio_validate( entryp
);
1482 /* get a reference to the user land map in order to keep it around */
1483 entryp
->aio_map
= get_task_map( procp
->task
);
1484 vm_map_reference( entryp
->aio_map
);
1488 if ( is_already_queued( entryp
->procp
, entryp
->uaiocbp
) == TRUE
) {
1494 /* check our aio limits to throttle bad or rude user land behavior */
1495 if ( aio_get_all_queues_count( ) >= aio_max_requests
||
1496 aio_get_process_count( procp
) >= aio_max_requests_per_process
) {
1503 * aio_fsync calls sync up all async IO requests queued at the time
1504 * the aio_fsync call was made. So we mark each currently queued async
1505 * IO with a matching file descriptor as must complete before we do the
1506 * fsync. We set the fsyncp field of each matching async IO
1507 * request with the aiocb pointer passed in on the aio_fsync call to
1508 * know which IOs must complete before we process the aio_fsync call.
1510 if ( (kindOfIO
& AIO_FSYNC
) != 0 )
1511 aio_mark_requests( entryp
);
1513 /* queue up on our aio asynchronous work queue */
1514 TAILQ_INSERT_TAIL( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
1515 aio_anchor
.aio_async_workq_count
++;
1517 wakeup_one( (caddr_t
) &aio_anchor
.aio_async_workq
);
1520 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_work_queued
)) | DBG_FUNC_NONE
,
1521 (int)procp
, (int)aiocbp
, 0, 0, 0 );
1526 if ( entryp
!= NULL
) {
1527 /* this entry has not been queued up so no worries about unlocked */
1528 /* state and aio_map */
1529 aio_free_request( entryp
, entryp
->aio_map
);
1534 } /* aio_queue_async_request */
1538 * lio_create_async_entry - allocate an aio_workq_entry and fill it in.
1539 * If all goes well return 0 and pass the aio_workq_entry pointer back to
1540 * our caller. We get a reference to our caller's user land map in order to keep
1541 * it around while we are processing the request.
1542 * lio_listio calls behave differently at completion they do completion notification
1543 * when all async IO requests have completed. We use group_tag to tag IO requests
1544 * that behave in the delay notification manner.
1548 lio_create_async_entry( struct proc
*procp
, user_addr_t aiocbp
,
1549 user_addr_t sigp
, long group_tag
,
1550 aio_workq_entry
**entrypp
)
1552 aio_workq_entry
*entryp
;
1555 entryp
= (aio_workq_entry
*) zalloc( aio_workq_zonep
);
1556 if ( entryp
== NULL
) {
1560 bzero( entryp
, sizeof(*entryp
) );
1562 /* fill in the rest of the aio_workq_entry */
1563 entryp
->procp
= procp
;
1564 entryp
->uaiocbp
= aiocbp
;
1565 entryp
->flags
|= AIO_LIO
;
1566 entryp
->group_tag
= group_tag
;
1567 entryp
->aio_map
= VM_MAP_NULL
;
1569 if ( !IS_64BIT_PROCESS(procp
) ) {
1570 struct aiocb aiocb32
;
1572 result
= copyin( aiocbp
, &aiocb32
, sizeof(aiocb32
) );
1574 do_munge_aiocb( &aiocb32
, &entryp
->aiocb
);
1576 result
= copyin( aiocbp
, &entryp
->aiocb
, sizeof(entryp
->aiocb
) );
1578 if ( result
!= 0 ) {
1583 /* look for lio_listio LIO_NOP requests and ignore them. */
1584 /* Not really an error, but we need to free our aio_workq_entry. */
1585 if ( entryp
->aiocb
.aio_lio_opcode
== LIO_NOP
) {
1590 /* use sigevent passed in to lio_listio for each of our calls, but only */
1591 /* do completion notification after the last request completes. */
1592 if ( sigp
!= USER_ADDR_NULL
) {
1593 if ( !IS_64BIT_PROCESS(procp
) ) {
1594 struct sigevent sigevent32
;
1596 result
= copyin( sigp
, &sigevent32
, sizeof(sigevent32
) );
1597 if ( result
== 0 ) {
1598 /* also need to munge aio_sigevent since it contains pointers */
1599 /* special case here. since we do not know if sigev_value is an */
1600 /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */
1601 /* means if we send this info back to user space we need to remember */
1602 /* sigev_value was not expanded for the 32-bit case. */
1603 /* NOTE - this does NOT affect us since we don't support sigev_value */
1604 /* yet in the aio context. */
1606 entryp
->aiocb
.aio_sigevent
.sigev_notify
= sigevent32
.sigev_notify
;
1607 entryp
->aiocb
.aio_sigevent
.sigev_signo
= sigevent32
.sigev_signo
;
1608 entryp
->aiocb
.aio_sigevent
.sigev_value
.size_equivalent
.sival_int
=
1609 sigevent32
.sigev_value
.sival_int
;
1610 entryp
->aiocb
.aio_sigevent
.sigev_notify_function
=
1611 CAST_USER_ADDR_T(sigevent32
.sigev_notify_function
);
1612 entryp
->aiocb
.aio_sigevent
.sigev_notify_attributes
=
1613 CAST_USER_ADDR_T(sigevent32
.sigev_notify_attributes
);
1616 result
= copyin( sigp
, &entryp
->aiocb
.aio_sigevent
, sizeof(entryp
->aiocb
.aio_sigevent
) );
1618 if ( result
!= 0 ) {
1624 /* do some more validation on the aiocb and embedded file descriptor */
1625 result
= aio_validate( entryp
);
1629 /* get a reference to the user land map in order to keep it around */
1630 entryp
->aio_map
= get_task_map( procp
->task
);
1631 vm_map_reference( entryp
->aio_map
);
1637 if ( entryp
!= NULL
)
1638 zfree( aio_workq_zonep
, entryp
);
1642 } /* lio_create_async_entry */
1646 * aio_mark_requests - aio_fsync calls synchronize file data for all queued async IO
1647 * requests at the moment the aio_fsync call is queued. We use aio_workq_entry.fsyncp
1648 * to mark each async IO that must complete before the fsync is done. We use the uaiocbp
1649 * field from the aio_fsync call as the aio_workq_entry.fsyncp in marked requests.
1650 * NOTE - AIO_LOCK must be held by caller
1654 aio_mark_requests( aio_workq_entry
*entryp
)
1656 aio_workq_entry
*my_entryp
;
1658 TAILQ_FOREACH( my_entryp
, &entryp
->procp
->aio_activeq
, aio_workq_link
) {
1659 if ( entryp
->aiocb
.aio_fildes
== my_entryp
->aiocb
.aio_fildes
) {
1660 my_entryp
->fsyncp
= entryp
->uaiocbp
;
1664 TAILQ_FOREACH( my_entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
1665 if ( entryp
->procp
== my_entryp
->procp
&&
1666 entryp
->aiocb
.aio_fildes
== my_entryp
->aiocb
.aio_fildes
) {
1667 my_entryp
->fsyncp
= entryp
->uaiocbp
;
1671 } /* aio_mark_requests */
1675 * lio_create_sync_entry - allocate an aio_workq_entry and fill it in.
1676 * If all goes well return 0 and pass the aio_workq_entry pointer back to
1678 * lio_listio calls behave differently at completion they do completion notification
1679 * when all async IO requests have completed. We use group_tag to tag IO requests
1680 * that behave in the delay notification manner.
1684 lio_create_sync_entry( struct proc
*procp
, user_addr_t aiocbp
,
1685 long group_tag
, aio_workq_entry
**entrypp
)
1687 aio_workq_entry
*entryp
;
1690 entryp
= (aio_workq_entry
*) zalloc( aio_workq_zonep
);
1691 if ( entryp
== NULL
) {
1695 bzero( entryp
, sizeof(*entryp
) );
1697 /* fill in the rest of the aio_workq_entry */
1698 entryp
->procp
= procp
;
1699 entryp
->uaiocbp
= aiocbp
;
1700 entryp
->flags
|= AIO_LIO
;
1701 entryp
->group_tag
= group_tag
;
1702 entryp
->aio_map
= VM_MAP_NULL
;
1704 if ( !IS_64BIT_PROCESS(procp
) ) {
1705 struct aiocb aiocb32
;
1707 result
= copyin( aiocbp
, &aiocb32
, sizeof(aiocb32
) );
1709 do_munge_aiocb( &aiocb32
, &entryp
->aiocb
);
1711 result
= copyin( aiocbp
, &entryp
->aiocb
, sizeof(entryp
->aiocb
) );
1713 if ( result
!= 0 ) {
1718 /* look for lio_listio LIO_NOP requests and ignore them. */
1719 /* Not really an error, but we need to free our aio_workq_entry. */
1720 if ( entryp
->aiocb
.aio_lio_opcode
== LIO_NOP
) {
1725 result
= aio_validate( entryp
);
1726 if ( result
!= 0 ) {
1734 if ( entryp
!= NULL
)
1735 zfree( aio_workq_zonep
, entryp
);
1739 } /* lio_create_sync_entry */
1743 * aio_free_request - remove our reference on the user land map and
1744 * free the work queue entry resources.
1745 * We are not holding the lock here thus aio_map is passed in and
1746 * zeroed while we did have the lock.
1750 aio_free_request( aio_workq_entry
*entryp
, vm_map_t the_map
)
1752 /* remove our reference to the user land map. */
1753 if ( VM_MAP_NULL
!= the_map
) {
1754 vm_map_deallocate( the_map
);
1757 zfree( aio_workq_zonep
, entryp
);
1761 } /* aio_free_request */
1764 /* aio_validate - validate the aiocb passed in by one of the aio syscalls.
1768 aio_validate( aio_workq_entry
*entryp
)
1770 struct fileproc
*fp
;
1776 if ( (entryp
->flags
& AIO_LIO
) != 0 ) {
1777 if ( entryp
->aiocb
.aio_lio_opcode
== LIO_READ
)
1778 entryp
->flags
|= AIO_READ
;
1779 else if ( entryp
->aiocb
.aio_lio_opcode
== LIO_WRITE
)
1780 entryp
->flags
|= AIO_WRITE
;
1781 else if ( entryp
->aiocb
.aio_lio_opcode
== LIO_NOP
)
1788 if ( (entryp
->flags
& (AIO_WRITE
| AIO_FSYNC
)) != 0 ) {
1792 if ( (entryp
->flags
& (AIO_READ
| AIO_WRITE
)) != 0 ) {
1793 // LP64todo - does max value for aio_nbytes need to grow?
1794 if ( entryp
->aiocb
.aio_nbytes
> INT_MAX
||
1795 entryp
->aiocb
.aio_buf
== USER_ADDR_NULL
||
1796 entryp
->aiocb
.aio_offset
< 0 )
1800 /* validate aiocb.aio_sigevent. at this point we only support sigev_notify
1801 * equal to SIGEV_SIGNAL or SIGEV_NONE. this means sigev_value,
1802 * sigev_notify_function, and sigev_notify_attributes are ignored.
1804 if ( entryp
->aiocb
.aio_sigevent
.sigev_notify
== SIGEV_SIGNAL
) {
1806 /* make sure we have a valid signal number */
1807 signum
= entryp
->aiocb
.aio_sigevent
.sigev_signo
;
1808 if ( signum
<= 0 || signum
>= NSIG
||
1809 signum
== SIGKILL
|| signum
== SIGSTOP
)
1812 else if ( entryp
->aiocb
.aio_sigevent
.sigev_notify
!= SIGEV_NONE
)
1815 /* validate the file descriptor and that the file was opened
1816 * for the appropriate read / write access.
1818 proc_fdlock(entryp
->procp
);
1820 result
= fp_lookup( entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, 1);
1821 if ( result
== 0 ) {
1822 if ( (fp
->f_fglob
->fg_flag
& flag
) == 0 ) {
1823 /* we don't have read or write access */
1826 else if ( fp
->f_fglob
->fg_type
!= DTYPE_VNODE
) {
1827 /* this is not a file */
1830 fp
->f_flags
|= FP_AIOISSUED
;
1832 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 1);
1838 proc_fdunlock(entryp
->procp
);
1842 } /* aio_validate */
1846 * aio_get_process_count - runs through our queues that hold outstanding
1847 * async IO reqests and totals up number of requests for the given
1849 * NOTE - caller must hold aio lock!
1853 aio_get_process_count( struct proc
*procp
)
1855 aio_workq_entry
*entryp
;
1858 /* begin with count of completed async IO requests for this process */
1859 count
= procp
->aio_done_count
;
1861 /* add in count of active async IO requests for this process */
1862 count
+= procp
->aio_active_count
;
1864 /* look for matches on our queue of asynchronous todo work */
1865 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
1866 if ( procp
== entryp
->procp
) {
1871 /* look for matches on our queue of synchronous todo work */
1872 TAILQ_FOREACH( entryp
, &aio_anchor
.lio_sync_workq
, aio_workq_link
) {
1873 if ( procp
== entryp
->procp
) {
1880 } /* aio_get_process_count */
1884 * aio_get_all_queues_count - get total number of entries on all aio work queues.
1885 * NOTE - caller must hold aio lock!
1889 aio_get_all_queues_count( void )
1893 count
= aio_anchor
.aio_async_workq_count
;
1894 count
+= aio_anchor
.lio_sync_workq_count
;
1895 count
+= aio_anchor
.aio_active_count
;
1896 count
+= aio_anchor
.aio_done_count
;
1900 } /* aio_get_all_queues_count */
1904 * do_aio_completion. Handle async IO completion.
1908 do_aio_completion( aio_workq_entry
*entryp
)
1910 /* signal user land process if appropriate */
1911 if ( entryp
->aiocb
.aio_sigevent
.sigev_notify
== SIGEV_SIGNAL
&&
1912 (entryp
->flags
& AIO_DISABLE
) == 0 ) {
1915 * if group_tag is non zero then make sure this is the last IO request
1916 * in the group before we signal.
1918 if ( entryp
->group_tag
== 0 ||
1919 (entryp
->group_tag
!= 0 && aio_last_group_io( entryp
)) ) {
1920 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_sig
)) | DBG_FUNC_NONE
,
1921 (int)entryp
->procp
, (int)entryp
->uaiocbp
,
1922 entryp
->aiocb
.aio_sigevent
.sigev_signo
, 0, 0 );
1924 psignal( entryp
->procp
, entryp
->aiocb
.aio_sigevent
.sigev_signo
);
1930 * need to handle case where a process is trying to exit, exec, or close
1931 * and is currently waiting for active aio requests to complete. If
1932 * AIO_WAITING is set then we need to look to see if there are any
1933 * other requests in the active queue for this process. If there are
1934 * none then wakeup using the AIO_CLEANUP_SLEEP_CHAN tsleep channel. If
1935 * there are some still active then do nothing - we only want to wakeup
1936 * when all active aio requests for the process are complete.
1938 if ( (entryp
->flags
& AIO_WAITING
) != 0 ) {
1939 int active_requests
;
1941 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_cleanup_wait
)) | DBG_FUNC_NONE
,
1942 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1945 active_requests
= aio_active_requests_for_process( entryp
->procp
);
1947 if ( active_requests
< 1 ) {
1948 /* no active aio requests for this process, continue exiting */
1949 wakeup_one( (caddr_t
) &entryp
->procp
->AIO_CLEANUP_SLEEP_CHAN
);
1951 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_cleanup_wake
)) | DBG_FUNC_NONE
,
1952 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1959 * aio_suspend case when a signal was not requested. In that scenario we
1960 * are sleeping on the AIO_SUSPEND_SLEEP_CHAN channel.
1961 * NOTE - the assumption here is that this wakeup call is inexpensive.
1962 * we really only need to do this when an aio_suspend call is pending.
1963 * If we find the wakeup call should be avoided we could mark the
1964 * async IO requests given in the list provided by aio_suspend and only
1965 * call wakeup for them. If we do mark them we should unmark them after
1966 * the aio_suspend wakes up.
1969 wakeup_one( (caddr_t
) &entryp
->procp
->AIO_SUSPEND_SLEEP_CHAN
);
1972 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_suspend_wake
)) | DBG_FUNC_NONE
,
1973 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1977 } /* do_aio_completion */
1981 * aio_last_group_io - checks to see if this is the last unfinished IO request
1982 * for the given group_tag. Returns TRUE if there are no other active IO
1983 * requests for this group or FALSE if the are active IO requests
1984 * NOTE - AIO_LOCK must be held by caller
1988 aio_last_group_io( aio_workq_entry
*entryp
)
1990 aio_workq_entry
*my_entryp
;
1992 /* look for matches on our queue of active async IO requests */
1993 TAILQ_FOREACH( my_entryp
, &entryp
->procp
->aio_activeq
, aio_workq_link
) {
1994 if ( my_entryp
->group_tag
== entryp
->group_tag
)
1998 /* look for matches on our queue of asynchronous todo work */
1999 TAILQ_FOREACH( my_entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
2000 if ( my_entryp
->group_tag
== entryp
->group_tag
)
2004 /* look for matches on our queue of synchronous todo work */
2005 TAILQ_FOREACH( my_entryp
, &aio_anchor
.lio_sync_workq
, aio_workq_link
) {
2006 if ( my_entryp
->group_tag
== entryp
->group_tag
)
2012 } /* aio_last_group_io */
2019 do_aio_read( aio_workq_entry
*entryp
)
2021 struct fileproc
*fp
;
2024 if ( (error
= fp_lookup(entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, 0)) )
2026 if ( (fp
->f_fglob
->fg_flag
& FREAD
) == 0 ) {
2027 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2031 error
= dofileread( entryp
->procp
, fp
, entryp
->aiocb
.aio_fildes
,
2032 entryp
->aiocb
.aio_buf
,
2033 entryp
->aiocb
.aio_nbytes
,
2034 entryp
->aiocb
.aio_offset
, FOF_OFFSET
,
2035 &entryp
->returnval
);
2036 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2039 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2052 do_aio_write( aio_workq_entry
*entryp
)
2054 struct fileproc
*fp
;
2057 if ( (error
= fp_lookup(entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, 0)) )
2059 if ( (fp
->f_fglob
->fg_flag
& FWRITE
) == 0 ) {
2060 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2064 error
= dofilewrite( entryp
->procp
, fp
, entryp
->aiocb
.aio_fildes
,
2065 entryp
->aiocb
.aio_buf
,
2066 entryp
->aiocb
.aio_nbytes
,
2067 entryp
->aiocb
.aio_offset
, FOF_OFFSET
,
2068 &entryp
->returnval
);
2070 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2073 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2079 } /* do_aio_write */
2083 * aio_active_requests_for_process - return number of active async IO
2084 * requests for the given process.
2085 * NOTE - caller must hold aio lock!
2089 aio_active_requests_for_process( struct proc
*procp
)
2092 return( procp
->aio_active_count
);
2094 } /* aio_active_requests_for_process */
2101 do_aio_fsync( aio_workq_entry
*entryp
)
2103 struct vfs_context context
;
2105 struct fileproc
*fp
;
2109 * NOTE - we will not support AIO_DSYNC until fdatasync() is supported.
2110 * AIO_DSYNC is caught before we queue up a request and flagged as an error.
2111 * The following was shamelessly extracted from fsync() implementation.
2114 error
= fp_getfvp( entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, &vp
);
2116 if ( (error
= vnode_getwithref(vp
)) ) {
2117 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2118 entryp
->returnval
= -1;
2121 context
.vc_proc
= entryp
->procp
;
2122 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
2124 error
= VNOP_FSYNC( vp
, MNT_WAIT
, &context
);
2126 (void)vnode_put(vp
);
2128 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2131 entryp
->returnval
= -1;
2135 } /* do_aio_fsync */
2139 * is_already_queued - runs through our queues to see if the given
2140 * aiocbp / process is there. Returns TRUE if there is a match
2141 * on any of our aio queues.
2142 * NOTE - callers must hold aio lock!
2146 is_already_queued( struct proc
*procp
,
2147 user_addr_t aiocbp
)
2149 aio_workq_entry
*entryp
;
2154 /* look for matches on our queue of async IO requests that have completed */
2155 TAILQ_FOREACH( entryp
, &procp
->aio_doneq
, aio_workq_link
) {
2156 if ( aiocbp
== entryp
->uaiocbp
) {
2158 goto ExitThisRoutine
;
2162 /* look for matches on our queue of active async IO requests */
2163 TAILQ_FOREACH( entryp
, &procp
->aio_activeq
, aio_workq_link
) {
2164 if ( aiocbp
== entryp
->uaiocbp
) {
2166 goto ExitThisRoutine
;
2170 /* look for matches on our queue of asynchronous todo work */
2171 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
2172 if ( procp
== entryp
->procp
&& aiocbp
== entryp
->uaiocbp
) {
2174 goto ExitThisRoutine
;
2178 /* look for matches on our queue of synchronous todo work */
2179 TAILQ_FOREACH( entryp
, &aio_anchor
.lio_sync_workq
, aio_workq_link
) {
2180 if ( procp
== entryp
->procp
&& aiocbp
== entryp
->uaiocbp
) {
2182 goto ExitThisRoutine
;
2189 } /* is_already_queued */
2193 * aio initialization
2195 __private_extern__
void
2200 aio_lock_grp_attr
= lck_grp_attr_alloc_init();
2201 lck_grp_attr_setstat(aio_lock_grp_attr
);
2202 aio_lock_grp
= lck_grp_alloc_init("aio", aio_lock_grp_attr
);
2203 aio_lock_attr
= lck_attr_alloc_init();
2204 //lck_attr_setdebug(aio_lock_attr);
2206 aio_lock
= lck_mtx_alloc_init(aio_lock_grp
, aio_lock_attr
);
2209 TAILQ_INIT( &aio_anchor
.aio_async_workq
);
2210 TAILQ_INIT( &aio_anchor
.lio_sync_workq
);
2211 aio_anchor
.aio_async_workq_count
= 0;
2212 aio_anchor
.lio_sync_workq_count
= 0;
2213 aio_anchor
.aio_active_count
= 0;
2214 aio_anchor
.aio_done_count
= 0;
2217 i
= sizeof( aio_workq_entry
);
2218 aio_workq_zonep
= zinit( i
, i
* aio_max_requests
, i
* aio_max_requests
, "aiowq" );
2220 _aio_create_worker_threads( aio_worker_threads
);
2228 * aio worker threads created here.
2230 __private_extern__
void
2231 _aio_create_worker_threads( int num
)
2235 /* create some worker threads to handle the async IO requests */
2236 for ( i
= 0; i
< num
; i
++ ) {
2239 myThread
= kernel_thread( kernel_task
, aio_work_thread
);
2240 if ( THREAD_NULL
== myThread
) {
2241 printf( "%s - failed to create a work thread \n", __FUNCTION__
);
2247 } /* _aio_create_worker_threads */
2250 * Return the current activation utask
2255 return ((struct uthread
*)get_bsdthread_info(current_thread()))->uu_aio_task
;
2260 * In the case of an aiocb from a
2261 * 32-bit process we need to expand some longs and pointers to the correct
2262 * sizes in order to let downstream code always work on the same type of
2263 * aiocb (in our case that is a user_aiocb)
2266 do_munge_aiocb( struct aiocb
*my_aiocbp
, struct user_aiocb
*the_user_aiocbp
)
2268 the_user_aiocbp
->aio_fildes
= my_aiocbp
->aio_fildes
;
2269 the_user_aiocbp
->aio_offset
= my_aiocbp
->aio_offset
;
2270 the_user_aiocbp
->aio_buf
= CAST_USER_ADDR_T(my_aiocbp
->aio_buf
);
2271 the_user_aiocbp
->aio_nbytes
= my_aiocbp
->aio_nbytes
;
2272 the_user_aiocbp
->aio_reqprio
= my_aiocbp
->aio_reqprio
;
2273 the_user_aiocbp
->aio_lio_opcode
= my_aiocbp
->aio_lio_opcode
;
2275 /* special case here. since we do not know if sigev_value is an */
2276 /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */
2277 /* means if we send this info back to user space we need to remember */
2278 /* sigev_value was not expanded for the 32-bit case. */
2279 /* NOTE - this does NOT affect us since we don't support sigev_value */
2280 /* yet in the aio context. */
2282 the_user_aiocbp
->aio_sigevent
.sigev_notify
= my_aiocbp
->aio_sigevent
.sigev_notify
;
2283 the_user_aiocbp
->aio_sigevent
.sigev_signo
= my_aiocbp
->aio_sigevent
.sigev_signo
;
2284 the_user_aiocbp
->aio_sigevent
.sigev_value
.size_equivalent
.sival_int
=
2285 my_aiocbp
->aio_sigevent
.sigev_value
.sival_int
;
2286 the_user_aiocbp
->aio_sigevent
.sigev_notify_function
=
2287 CAST_USER_ADDR_T(my_aiocbp
->aio_sigevent
.sigev_notify_function
);
2288 the_user_aiocbp
->aio_sigevent
.sigev_notify_attributes
=
2289 CAST_USER_ADDR_T(my_aiocbp
->aio_sigevent
.sigev_notify_attributes
);