2 * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
34 * 1) ramesh is looking into how to replace taking a reference on
35 * the user's map (vm_map_reference()) since it is believed that
36 * would not hold the process for us.
37 * 2) david is looking into a way for us to set the priority of the
38 * worker threads to match that of the user's thread when the
39 * async IO was queued.
44 * This file contains support for the POSIX 1003.1B AIO/LIO facility.
47 #include <sys/systm.h>
48 #include <sys/fcntl.h>
49 #include <sys/file_internal.h>
50 #include <sys/filedesc.h>
51 #include <sys/kernel.h>
52 #include <sys/vnode_internal.h>
53 #include <sys/malloc.h>
54 #include <sys/mount_internal.h>
55 #include <sys/param.h>
56 #include <sys/proc_internal.h>
57 #include <sys/sysctl.h>
58 #include <sys/unistd.h>
61 #include <sys/aio_kern.h>
62 #include <sys/sysproto.h>
64 #include <machine/limits.h>
66 #include <mach/mach_types.h>
67 #include <kern/kern_types.h>
68 #include <kern/zalloc.h>
69 #include <kern/task.h>
70 #include <kern/sched_prim.h>
72 #include <vm/vm_map.h>
74 #include <sys/kdebug.h>
75 #define AIO_work_queued 1
76 #define AIO_worker_wake 2
77 #define AIO_completion_sig 3
78 #define AIO_completion_cleanup_wait 4
79 #define AIO_completion_cleanup_wake 5
80 #define AIO_completion_suspend_wake 6
81 #define AIO_fsync_delay 7
83 #define AIO_cancel_async_workq 11
84 #define AIO_cancel_sync_workq 12
85 #define AIO_cancel_activeq 13
86 #define AIO_cancel_doneq 14
92 #define AIO_error_val 61
93 #define AIO_error_activeq 62
94 #define AIO_error_workq 63
96 #define AIO_return_val 71
97 #define AIO_return_activeq 72
98 #define AIO_return_workq 73
101 #define AIO_exit_sleep 91
102 #define AIO_close 100
103 #define AIO_close_sleep 101
104 #define AIO_suspend 110
105 #define AIO_suspend_sleep 111
106 #define AIO_worker_thread 120
110 #define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
114 * aio requests queue up on the aio_async_workq or lio_sync_workq (for
115 * lio_listio LIO_WAIT). Requests then move to the per process aio_activeq
116 * (proc.aio_activeq) when one of our worker threads start the IO.
117 * And finally, requests move to the per process aio_doneq (proc.aio_doneq)
118 * when the IO request completes. The request remains on aio_doneq until
119 * user process calls aio_return or the process exits, either way that is our
120 * trigger to release aio resources.
124 int aio_async_workq_count
; /* entries on aio_async_workq */
125 int lio_sync_workq_count
; /* entries on lio_sync_workq */
126 int aio_active_count
; /* entries on all active queues (proc.aio_activeq) */
127 int aio_done_count
; /* entries on all done queues (proc.aio_doneq) */
128 TAILQ_HEAD( , aio_workq_entry
) aio_async_workq
;
129 TAILQ_HEAD( , aio_workq_entry
) lio_sync_workq
;
131 typedef struct aio_anchor_cb aio_anchor_cb
;
135 * Notes on aio sleep / wake channels.
136 * We currently pick a couple fields within the proc structure that will allow
137 * us sleep channels that currently do not collide with any other kernel routines.
138 * At this time, for binary compatibility reasons, we cannot create new proc fields.
140 #define AIO_SUSPEND_SLEEP_CHAN p_estcpu
141 #define AIO_CLEANUP_SLEEP_CHAN p_pctcpu
145 * aysnc IO locking macros used to protect critical sections.
147 #define AIO_LOCK lck_mtx_lock(aio_lock)
148 #define AIO_UNLOCK lck_mtx_unlock(aio_lock)
154 static int aio_active_requests_for_process( struct proc
*procp
);
155 static boolean_t
aio_delay_fsync_request( aio_workq_entry
*entryp
);
156 static int aio_free_request( aio_workq_entry
*entryp
, vm_map_t the_map
);
157 static int aio_get_all_queues_count( void );
158 static int aio_get_process_count( struct proc
*procp
);
159 static aio_workq_entry
* aio_get_some_work( void );
160 static boolean_t
aio_last_group_io( aio_workq_entry
*entryp
);
161 static void aio_mark_requests( aio_workq_entry
*entryp
);
162 static int aio_queue_async_request( struct proc
*procp
,
165 static int aio_validate( aio_workq_entry
*entryp
);
166 static void aio_work_thread( void );
167 static int do_aio_cancel( struct proc
*p
,
170 boolean_t wait_for_completion
,
171 boolean_t disable_notification
);
172 static void do_aio_completion( aio_workq_entry
*entryp
);
173 static int do_aio_fsync( aio_workq_entry
*entryp
);
174 static int do_aio_read( aio_workq_entry
*entryp
);
175 static int do_aio_write( aio_workq_entry
*entryp
);
176 static void do_munge_aiocb( struct aiocb
*my_aiocbp
, struct user_aiocb
*the_user_aiocbp
);
177 static boolean_t
is_already_queued( struct proc
*procp
,
178 user_addr_t aiocbp
);
179 static int lio_create_async_entry( struct proc
*procp
,
183 aio_workq_entry
**entrypp
);
184 static int lio_create_sync_entry( struct proc
*procp
,
187 aio_workq_entry
**entrypp
);
191 * EXTERNAL PROTOTYPES
194 /* in ...bsd/kern/sys_generic.c */
195 extern int dofileread( struct proc
*p
, struct fileproc
*fp
, int fd
,
196 user_addr_t bufp
, user_size_t nbyte
,
197 off_t offset
, int flags
, user_ssize_t
*retval
);
198 extern int dofilewrite( struct proc
*p
, struct fileproc
*fp
, int fd
,
199 user_addr_t bufp
, user_size_t nbyte
, off_t offset
,
200 int flags
, user_ssize_t
*retval
);
203 * aio external global variables.
205 extern int aio_max_requests
; /* AIO_MAX - configurable */
206 extern int aio_max_requests_per_process
; /* AIO_PROCESS_MAX - configurable */
207 extern int aio_worker_threads
; /* AIO_THREAD_COUNT - configurable */
211 * aio static variables.
213 static aio_anchor_cb aio_anchor
;
214 static lck_mtx_t
* aio_lock
;
215 static lck_grp_t
* aio_lock_grp
;
216 static lck_attr_t
* aio_lock_attr
;
217 static lck_grp_attr_t
* aio_lock_grp_attr
;
218 static struct zone
*aio_workq_zonep
;
224 * aio_cancel - attempt to cancel one or more async IO requests currently
225 * outstanding against file descriptor uap->fd. If uap->aiocbp is not
226 * NULL then only one specific IO is cancelled (if possible). If uap->aiocbp
227 * is NULL then all outstanding async IO request for the given file
228 * descriptor are cancelled (if possible).
232 aio_cancel( struct proc
*p
, struct aio_cancel_args
*uap
, int *retval
)
234 struct user_aiocb my_aiocb
;
237 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel
)) | DBG_FUNC_START
,
238 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
240 /* quick check to see if there are any async IO requests queued up */
242 result
= aio_get_all_queues_count( );
250 if ( uap
->aiocbp
!= USER_ADDR_NULL
) {
251 if ( !IS_64BIT_PROCESS(p
) ) {
252 struct aiocb aiocb32
;
254 result
= copyin( uap
->aiocbp
, &aiocb32
, sizeof(aiocb32
) );
256 do_munge_aiocb( &aiocb32
, &my_aiocb
);
258 result
= copyin( uap
->aiocbp
, &my_aiocb
, sizeof(my_aiocb
) );
265 /* NOTE - POSIX standard says a mismatch between the file */
266 /* descriptor passed in and the file descriptor embedded in */
267 /* the aiocb causes unspecified results. We return EBADF in */
268 /* that situation. */
269 if ( uap
->fd
!= my_aiocb
.aio_fildes
) {
274 result
= do_aio_cancel( p
, uap
->fd
, uap
->aiocbp
, FALSE
, FALSE
);
276 if ( result
!= -1 ) {
285 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel
)) | DBG_FUNC_END
,
286 (int)p
, (int)uap
->aiocbp
, result
, 0, 0 );
294 * _aio_close - internal function used to clean up async IO requests for
295 * a file descriptor that is closing.
299 __private_extern__
void
300 _aio_close( struct proc
*p
, int fd
)
304 /* quick check to see if there are any async IO requests queued up */
306 count
= aio_get_all_queues_count( );
311 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_close
)) | DBG_FUNC_START
,
312 (int)p
, fd
, 0, 0, 0 );
314 /* cancel all async IO requests on our todo queues for this file descriptor */
315 error
= do_aio_cancel( p
, fd
, 0, TRUE
, FALSE
);
316 if ( error
== AIO_NOTCANCELED
) {
318 * AIO_NOTCANCELED is returned when we find an aio request for this process
319 * and file descriptor on the active async IO queue. Active requests cannot
320 * be cancelled so we must wait for them to complete. We will get a special
321 * wake up call on our channel used to sleep for ALL active requests to
322 * complete. This sleep channel (proc.AIO_CLEANUP_SLEEP_CHAN) is only used
323 * when we must wait for all active aio requests.
326 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_close_sleep
)) | DBG_FUNC_NONE
,
327 (int)p
, fd
, 0, 0, 0 );
329 tsleep( &p
->AIO_CLEANUP_SLEEP_CHAN
, PRIBIO
, "aio_close", 0 );
332 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_close
)) | DBG_FUNC_END
,
333 (int)p
, fd
, 0, 0, 0 );
341 * aio_error - return the error status associated with the async IO
342 * request referred to by uap->aiocbp. The error status is the errno
343 * value that would be set by the corresponding IO request (read, wrtie,
344 * fdatasync, or sync).
348 aio_error( struct proc
*p
, struct aio_error_args
*uap
, int *retval
)
350 aio_workq_entry
*entryp
;
353 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error
)) | DBG_FUNC_START
,
354 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
358 /* quick check to see if there are any async IO requests queued up */
359 if ( aio_get_all_queues_count( ) < 1 ) {
364 /* look for a match on our queue of async IO requests that have completed */
365 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
366 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
367 *retval
= entryp
->errorval
;
369 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_val
)) | DBG_FUNC_NONE
,
370 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
375 /* look for a match on our queue of active async IO requests */
376 TAILQ_FOREACH( entryp
, &p
->aio_activeq
, aio_workq_link
) {
377 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
378 *retval
= EINPROGRESS
;
380 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_activeq
)) | DBG_FUNC_NONE
,
381 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
386 /* look for a match on our queue of todo work */
387 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
388 if ( p
== entryp
->procp
&& entryp
->uaiocbp
== uap
->aiocbp
) {
389 *retval
= EINPROGRESS
;
391 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error_workq
)) | DBG_FUNC_NONE
,
392 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
399 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_error
)) | DBG_FUNC_END
,
400 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
409 * aio_fsync - asynchronously force all IO operations associated
410 * with the file indicated by the file descriptor (uap->aiocbp->aio_fildes) and
411 * queued at the time of the call to the synchronized completion state.
412 * NOTE - we do not support op O_DSYNC at this point since we do not support the
417 aio_fsync( struct proc
*p
, struct aio_fsync_args
*uap
, int *retval
)
422 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync
)) | DBG_FUNC_START
,
423 (int)p
, (int)uap
->aiocbp
, uap
->op
, 0, 0 );
426 /* 0 := O_SYNC for binary backward compatibility with Panther */
427 if (uap
->op
== O_SYNC
|| uap
->op
== 0)
428 fsync_kind
= AIO_FSYNC
;
429 #if 0 // we don't support fdatasync() call yet
430 else if ( uap
->op
== O_DSYNC
)
431 fsync_kind
= AIO_DSYNC
;
439 error
= aio_queue_async_request( p
, uap
->aiocbp
, fsync_kind
);
444 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync
)) | DBG_FUNC_END
,
445 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
452 /* aio_read - asynchronously read uap->aiocbp->aio_nbytes bytes from the
453 * file descriptor (uap->aiocbp->aio_fildes) into the buffer
454 * (uap->aiocbp->aio_buf).
458 aio_read( struct proc
*p
, struct aio_read_args
*uap
, int *retval
)
462 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_read
)) | DBG_FUNC_START
,
463 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
467 error
= aio_queue_async_request( p
, uap
->aiocbp
, AIO_READ
);
471 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_read
)) | DBG_FUNC_END
,
472 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
480 * aio_return - return the return status associated with the async IO
481 * request referred to by uap->aiocbp. The return status is the value
482 * that would be returned by corresponding IO request (read, wrtie,
483 * fdatasync, or sync). This is where we release kernel resources
484 * held for async IO call associated with the given aiocb pointer.
488 aio_return( struct proc
*p
, struct aio_return_args
*uap
, user_ssize_t
*retval
)
490 aio_workq_entry
*entryp
;
494 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return
)) | DBG_FUNC_START
,
495 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
501 /* quick check to see if there are any async IO requests queued up */
502 if ( aio_get_all_queues_count( ) < 1 ) {
507 /* look for a match on our queue of async IO requests that have completed */
508 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
509 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
510 TAILQ_REMOVE( &p
->aio_doneq
, entryp
, aio_workq_link
);
511 aio_anchor
.aio_done_count
--;
514 *retval
= entryp
->returnval
;
516 /* we cannot free requests that are still completing */
517 if ( (entryp
->flags
& AIO_COMPLETION
) == 0 ) {
520 my_map
= entryp
->aio_map
;
521 entryp
->aio_map
= VM_MAP_NULL
;
524 aio_free_request( entryp
, my_map
);
527 /* tell completion code to free this request */
528 entryp
->flags
|= AIO_DO_FREE
;
530 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_val
)) | DBG_FUNC_NONE
,
531 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
536 /* look for a match on our queue of active async IO requests */
537 TAILQ_FOREACH( entryp
, &p
->aio_activeq
, aio_workq_link
) {
538 if ( entryp
->uaiocbp
== uap
->aiocbp
) {
540 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_activeq
)) | DBG_FUNC_NONE
,
541 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
546 /* look for a match on our queue of todo work */
547 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
548 if ( p
== entryp
->procp
&& entryp
->uaiocbp
== uap
->aiocbp
) {
550 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return_workq
)) | DBG_FUNC_NONE
,
551 (int)p
, (int)uap
->aiocbp
, *retval
, 0, 0 );
560 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_return
)) | DBG_FUNC_END
,
561 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
569 * _aio_exec - internal function used to clean up async IO requests for
570 * a process that is going away due to exec(). We cancel any async IOs
571 * we can and wait for those already active. We also disable signaling
572 * for cancelled or active aio requests that complete.
573 * This routine MAY block!
576 __private_extern__
void
577 _aio_exec( struct proc
*p
)
580 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exec
)) | DBG_FUNC_START
,
581 (int)p
, 0, 0, 0, 0 );
585 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exec
)) | DBG_FUNC_END
,
586 (int)p
, 0, 0, 0, 0 );
594 * _aio_exit - internal function used to clean up async IO requests for
595 * a process that is terminating (via exit() or exec() ). We cancel any async IOs
596 * we can and wait for those already active. We also disable signaling
597 * for cancelled or active aio requests that complete. This routine MAY block!
600 __private_extern__
void
601 _aio_exit( struct proc
*p
)
604 aio_workq_entry
*entryp
;
606 /* quick check to see if there are any async IO requests queued up */
608 count
= aio_get_all_queues_count( );
614 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit
)) | DBG_FUNC_START
,
615 (int)p
, 0, 0, 0, 0 );
618 * cancel async IO requests on the todo work queue and wait for those
619 * already active to complete.
621 error
= do_aio_cancel( p
, 0, 0, TRUE
, TRUE
);
622 if ( error
== AIO_NOTCANCELED
) {
624 * AIO_NOTCANCELED is returned when we find an aio request for this process
625 * on the active async IO queue. Active requests cannot be cancelled so we
626 * must wait for them to complete. We will get a special wake up call on
627 * our channel used to sleep for ALL active requests to complete. This sleep
628 * channel (proc.AIO_CLEANUP_SLEEP_CHAN) is only used when we must wait for all
629 * active aio requests.
632 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit_sleep
)) | DBG_FUNC_NONE
,
633 (int)p
, 0, 0, 0, 0 );
635 tsleep( &p
->AIO_CLEANUP_SLEEP_CHAN
, PRIBIO
, "aio_exit", 0 );
638 /* release all aio resources used by this process */
640 entryp
= TAILQ_FIRST( &p
->aio_doneq
);
641 while ( entryp
!= NULL
) {
642 aio_workq_entry
*next_entryp
;
644 next_entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
645 TAILQ_REMOVE( &p
->aio_doneq
, entryp
, aio_workq_link
);
646 aio_anchor
.aio_done_count
--;
649 /* we cannot free requests that are still completing */
650 if ( (entryp
->flags
& AIO_COMPLETION
) == 0 ) {
653 my_map
= entryp
->aio_map
;
654 entryp
->aio_map
= VM_MAP_NULL
;
656 aio_free_request( entryp
, my_map
);
658 /* need to start over since aio_doneq may have been */
659 /* changed while we were away. */
661 entryp
= TAILQ_FIRST( &p
->aio_doneq
);
665 /* tell completion code to free this request */
666 entryp
->flags
|= AIO_DO_FREE
;
667 entryp
= next_entryp
;
671 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_exit
)) | DBG_FUNC_END
,
672 (int)p
, 0, 0, 0, 0 );
680 * do_aio_cancel - cancel async IO requests (if possible). We get called by
681 * aio_cancel, close, and at exit.
682 * There are three modes of operation: 1) cancel all async IOs for a process -
683 * fd is 0 and aiocbp is NULL 2) cancel all async IOs for file descriptor - fd
684 * is > 0 and aiocbp is NULL 3) cancel one async IO associated with the given
686 * Returns -1 if no matches were found, AIO_CANCELED when we cancelled all
687 * target async IO requests, AIO_NOTCANCELED if we could not cancel all
688 * target async IO requests, and AIO_ALLDONE if all target async IO requests
689 * were already complete.
690 * WARNING - do not deference aiocbp in this routine, it may point to user
691 * land data that has not been copied in (when called from aio_cancel() )
695 do_aio_cancel( struct proc
*p
, int fd
, user_addr_t aiocbp
,
696 boolean_t wait_for_completion
, boolean_t disable_notification
)
698 aio_workq_entry
*entryp
;
703 /* look for a match on our queue of async todo work. */
705 entryp
= TAILQ_FIRST( &aio_anchor
.aio_async_workq
);
706 while ( entryp
!= NULL
) {
707 aio_workq_entry
*next_entryp
;
709 next_entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
710 if ( p
== entryp
->procp
) {
711 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
712 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
713 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
714 /* we found a match so we remove the entry from the */
715 /* todo work queue and place it on the done queue */
716 TAILQ_REMOVE( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
717 aio_anchor
.aio_async_workq_count
--;
718 entryp
->errorval
= ECANCELED
;
719 entryp
->returnval
= -1;
720 if ( disable_notification
)
721 entryp
->flags
|= AIO_DISABLE
; /* flag for special completion processing */
722 result
= AIO_CANCELED
;
724 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_async_workq
)) | DBG_FUNC_NONE
,
725 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
727 TAILQ_INSERT_TAIL( &p
->aio_doneq
, entryp
, aio_workq_link
);
728 aio_anchor
.aio_done_count
++;
730 entryp
->flags
|= AIO_COMPLETION
;
733 /* do completion processing for this request */
734 do_aio_completion( entryp
);
737 entryp
->flags
&= ~AIO_COMPLETION
;
738 if ( (entryp
->flags
& AIO_DO_FREE
) != 0 ) {
741 my_map
= entryp
->aio_map
;
742 entryp
->aio_map
= VM_MAP_NULL
;
744 aio_free_request( entryp
, my_map
);
749 if ( aiocbp
!= USER_ADDR_NULL
) {
753 /* need to start over since aio_async_workq may have been */
754 /* changed while we were away doing completion processing. */
756 entryp
= TAILQ_FIRST( &aio_anchor
.aio_async_workq
);
760 entryp
= next_entryp
;
764 * look for a match on our queue of synchronous todo work. This will
765 * be a rare occurrence but could happen if a process is terminated while
766 * processing a lio_listio call.
768 entryp
= TAILQ_FIRST( &aio_anchor
.lio_sync_workq
);
769 while ( entryp
!= NULL
) {
770 aio_workq_entry
*next_entryp
;
772 next_entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
773 if ( p
== entryp
->procp
) {
774 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
775 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
776 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
777 /* we found a match so we remove the entry from the */
778 /* todo work queue and place it on the done queue */
779 TAILQ_REMOVE( &aio_anchor
.lio_sync_workq
, entryp
, aio_workq_link
);
780 aio_anchor
.lio_sync_workq_count
--;
781 entryp
->errorval
= ECANCELED
;
782 entryp
->returnval
= -1;
783 if ( disable_notification
)
784 entryp
->flags
|= AIO_DISABLE
; /* flag for special completion processing */
785 result
= AIO_CANCELED
;
787 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_sync_workq
)) | DBG_FUNC_NONE
,
788 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
790 TAILQ_INSERT_TAIL( &p
->aio_doneq
, entryp
, aio_workq_link
);
791 aio_anchor
.aio_done_count
++;
793 if ( aiocbp
!= USER_ADDR_NULL
) {
799 entryp
= next_entryp
;
803 * look for a match on our queue of active async IO requests and
804 * return AIO_NOTCANCELED result.
806 TAILQ_FOREACH( entryp
, &p
->aio_activeq
, aio_workq_link
) {
807 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
808 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
809 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
810 result
= AIO_NOTCANCELED
;
812 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_activeq
)) | DBG_FUNC_NONE
,
813 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
815 if ( wait_for_completion
)
816 entryp
->flags
|= AIO_WAITING
; /* flag for special completion processing */
817 if ( disable_notification
)
818 entryp
->flags
|= AIO_DISABLE
; /* flag for special completion processing */
819 if ( aiocbp
!= USER_ADDR_NULL
) {
827 * if we didn't find any matches on the todo or active queues then look for a
828 * match on our queue of async IO requests that have completed and if found
829 * return AIO_ALLDONE result.
831 if ( result
== -1 ) {
832 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
833 if ( (aiocbp
== USER_ADDR_NULL
&& fd
== 0) ||
834 (aiocbp
!= USER_ADDR_NULL
&& entryp
->uaiocbp
== aiocbp
) ||
835 (aiocbp
== USER_ADDR_NULL
&& fd
== entryp
->aiocb
.aio_fildes
) ) {
836 result
= AIO_ALLDONE
;
838 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_cancel_doneq
)) | DBG_FUNC_NONE
,
839 (int)entryp
->procp
, (int)entryp
->uaiocbp
, fd
, 0, 0 );
841 if ( aiocbp
!= USER_ADDR_NULL
) {
852 } /* do_aio_cancel */
856 * aio_suspend - suspend the calling thread until at least one of the async
857 * IO operations referenced by uap->aiocblist has completed, until a signal
858 * interrupts the function, or uap->timeoutp time interval (optional) has
860 * Returns 0 if one or more async IOs have completed else -1 and errno is
861 * set appropriately - EAGAIN if timeout elapses or EINTR if an interrupt
866 aio_suspend( struct proc
*p
, struct aio_suspend_args
*uap
, int *retval
)
871 struct user_timespec ts
;
872 aio_workq_entry
*entryp
;
873 user_addr_t
*aiocbpp
;
875 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend
)) | DBG_FUNC_START
,
876 (int)p
, uap
->nent
, 0, 0, 0 );
882 /* quick check to see if there are any async IO requests queued up */
884 count
= aio_get_all_queues_count( );
888 goto ExitThisRoutine
;
891 if ( uap
->nent
< 1 || uap
->nent
> aio_max_requests_per_process
) {
893 goto ExitThisRoutine
;
896 if ( uap
->timeoutp
!= USER_ADDR_NULL
) {
897 if ( proc_is64bit(p
) ) {
898 error
= copyin( uap
->timeoutp
, &ts
, sizeof(ts
) );
901 struct timespec temp
;
902 error
= copyin( uap
->timeoutp
, &temp
, sizeof(temp
) );
904 ts
.tv_sec
= temp
.tv_sec
;
905 ts
.tv_nsec
= temp
.tv_nsec
;
910 goto ExitThisRoutine
;
913 if ( ts
.tv_nsec
< 0 || ts
.tv_nsec
>= 1000000000 ) {
915 goto ExitThisRoutine
;
918 nanoseconds_to_absolutetime( (uint64_t)ts
.tv_sec
* NSEC_PER_SEC
+ ts
.tv_nsec
,
920 clock_absolutetime_interval_to_deadline( abstime
, &abstime
);
923 /* we reserve enough space for largest possible pointer size */
924 MALLOC( aiocbpp
, user_addr_t
*, (uap
->nent
* sizeof(user_addr_t
)), M_TEMP
, M_WAITOK
);
925 if ( aiocbpp
== NULL
) {
927 goto ExitThisRoutine
;
930 /* copyin our aiocb pointers from list */
931 error
= copyin( uap
->aiocblist
, aiocbpp
,
932 proc_is64bit(p
) ? (uap
->nent
* sizeof(user_addr_t
))
933 : (uap
->nent
* sizeof(uintptr_t)) );
936 goto ExitThisRoutine
;
939 /* we depend on a list of user_addr_t's so we need to munge and expand */
940 /* when these pointers came from a 32-bit process */
941 if ( !proc_is64bit(p
) && sizeof(uintptr_t) < sizeof(user_addr_t
) ) {
942 /* position to the last entry and work back from there */
943 uintptr_t *my_ptrp
= ((uintptr_t *)aiocbpp
) + (uap
->nent
- 1);
944 user_addr_t
*my_addrp
= aiocbpp
+ (uap
->nent
- 1);
945 for (i
= 0; i
< uap
->nent
; i
++, my_ptrp
--, my_addrp
--) {
946 *my_addrp
= (user_addr_t
) (*my_ptrp
);
950 /* check list of aio requests to see if any have completed */
952 for ( i
= 0; i
< uap
->nent
; i
++ ) {
955 /* NULL elements are legal so check for 'em */
956 aiocbp
= *(aiocbpp
+ i
);
957 if ( aiocbp
== USER_ADDR_NULL
)
960 /* return immediately if any aio request in the list is done */
961 TAILQ_FOREACH( entryp
, &p
->aio_doneq
, aio_workq_link
) {
962 if ( entryp
->uaiocbp
== aiocbp
) {
966 goto ExitThisRoutine
;
969 } /* for ( ; i < uap->nent; ) */
971 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend_sleep
)) | DBG_FUNC_NONE
,
972 (int)p
, uap
->nent
, 0, 0, 0 );
975 * wait for an async IO to complete or a signal fires or timeout expires.
976 * we return EAGAIN (35) for timeout expiration and EINTR (4) when a signal
977 * interrupts us. If an async IO completes before a signal fires or our
978 * timeout expires, we get a wakeup call from aio_work_thread().
980 assert_wait_deadline( (event_t
) &p
->AIO_SUSPEND_SLEEP_CHAN
, THREAD_ABORTSAFE
, abstime
);
983 error
= thread_block( THREAD_CONTINUE_NULL
);
985 if ( error
== THREAD_AWAKENED
) {
986 /* got our wakeup call from aio_work_thread() */
990 else if ( error
== THREAD_TIMED_OUT
) {
991 /* our timeout expired */
995 /* we were interrupted */
1000 if ( aiocbpp
!= NULL
)
1001 FREE( aiocbpp
, M_TEMP
);
1003 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_suspend
)) | DBG_FUNC_END
,
1004 (int)p
, uap
->nent
, error
, 0, 0 );
1011 /* aio_write - asynchronously write uap->aiocbp->aio_nbytes bytes to the
1012 * file descriptor (uap->aiocbp->aio_fildes) from the buffer
1013 * (uap->aiocbp->aio_buf).
1017 aio_write( struct proc
*p
, struct aio_write_args
*uap
, int *retval
)
1023 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_write
)) | DBG_FUNC_START
,
1024 (int)p
, (int)uap
->aiocbp
, 0, 0, 0 );
1026 error
= aio_queue_async_request( p
, uap
->aiocbp
, AIO_WRITE
);
1030 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_write
)) | DBG_FUNC_END
,
1031 (int)p
, (int)uap
->aiocbp
, error
, 0, 0 );
1039 * lio_listio - initiate a list of IO requests. We process the list of aiocbs
1040 * either synchronously (mode == LIO_WAIT) or asynchronously (mode == LIO_NOWAIT).
1041 * The caller gets error and return status for each aiocb in the list via aio_error
1042 * and aio_return. We must keep completed requests until released by the
1047 lio_listio( struct proc
*p
, struct lio_listio_args
*uap
, int *retval
)
1053 aio_workq_entry
* *entryp_listp
;
1054 user_addr_t
*aiocbpp
;
1056 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_listio
)) | DBG_FUNC_START
,
1057 (int)p
, uap
->nent
, uap
->mode
, 0, 0 );
1059 entryp_listp
= NULL
;
1063 if ( !(uap
->mode
== LIO_NOWAIT
|| uap
->mode
== LIO_WAIT
) ) {
1064 call_result
= EINVAL
;
1068 if ( uap
->nent
< 1 || uap
->nent
> AIO_LISTIO_MAX
) {
1069 call_result
= EINVAL
;
1074 * we use group_tag to mark IO requests for delayed completion processing
1075 * which means we wait until all IO requests in the group have completed
1076 * before we either return to the caller when mode is LIO_WAIT or signal
1077 * user when mode is LIO_NOWAIT.
1079 group_tag
= random();
1082 * allocate a list of aio_workq_entry pointers that we will use to queue
1083 * up all our requests at once while holding our lock.
1085 MALLOC( entryp_listp
, void *, (uap
->nent
* sizeof(aio_workq_entry
*)), M_TEMP
, M_WAITOK
);
1086 if ( entryp_listp
== NULL
) {
1087 call_result
= EAGAIN
;
1091 /* we reserve enough space for largest possible pointer size */
1092 MALLOC( aiocbpp
, user_addr_t
*, (uap
->nent
* sizeof(user_addr_t
)), M_TEMP
, M_WAITOK
);
1093 if ( aiocbpp
== NULL
) {
1094 call_result
= EAGAIN
;
1098 /* copyin our aiocb pointers from list */
1099 result
= copyin( uap
->aiocblist
, aiocbpp
,
1100 IS_64BIT_PROCESS(p
) ? (uap
->nent
* sizeof(user_addr_t
))
1101 : (uap
->nent
* sizeof(uintptr_t)) );
1102 if ( result
!= 0 ) {
1103 call_result
= EAGAIN
;
1107 /* we depend on a list of user_addr_t's so we need to munge and expand */
1108 /* when these pointers came from a 32-bit process */
1109 if ( !IS_64BIT_PROCESS(p
) && sizeof(uintptr_t) < sizeof(user_addr_t
) ) {
1110 /* position to the last entry and work back from there */
1111 uintptr_t *my_ptrp
= ((uintptr_t *)aiocbpp
) + (uap
->nent
- 1);
1112 user_addr_t
*my_addrp
= aiocbpp
+ (uap
->nent
- 1);
1113 for (i
= 0; i
< uap
->nent
; i
++, my_ptrp
--, my_addrp
--) {
1114 *my_addrp
= (user_addr_t
) (*my_ptrp
);
1118 /* process list of aio requests */
1119 for ( i
= 0; i
< uap
->nent
; i
++ ) {
1120 user_addr_t my_aiocbp
;
1122 *(entryp_listp
+ i
) = NULL
;
1123 my_aiocbp
= *(aiocbpp
+ i
);
1125 /* NULL elements are legal so check for 'em */
1126 if ( my_aiocbp
== USER_ADDR_NULL
)
1129 if ( uap
->mode
== LIO_NOWAIT
)
1130 result
= lio_create_async_entry( p
, my_aiocbp
, uap
->sigp
,
1131 group_tag
, (entryp_listp
+ i
) );
1133 result
= lio_create_sync_entry( p
, my_aiocbp
, group_tag
,
1134 (entryp_listp
+ i
) );
1136 if ( result
!= 0 && call_result
== -1 )
1137 call_result
= result
;
1141 * we need to protect this section since we do not want any of these grouped
1142 * IO requests to begin until we have them all on the queue.
1145 for ( i
= 0; i
< uap
->nent
; i
++ ) {
1146 aio_workq_entry
*entryp
;
1148 /* NULL elements are legal so check for 'em */
1149 entryp
= *(entryp_listp
+ i
);
1150 if ( entryp
== NULL
)
1153 /* check our aio limits to throttle bad or rude user land behavior */
1154 if ( aio_get_all_queues_count( ) >= aio_max_requests
||
1155 aio_get_process_count( entryp
->procp
) >= aio_max_requests_per_process
||
1156 is_already_queued( entryp
->procp
, entryp
->uaiocbp
) == TRUE
) {
1159 my_map
= entryp
->aio_map
;
1160 entryp
->aio_map
= VM_MAP_NULL
;
1161 if ( call_result
== -1 )
1162 call_result
= EAGAIN
;
1164 aio_free_request( entryp
, my_map
);
1169 /* place the request on the appropriate queue */
1170 if ( uap
->mode
== LIO_NOWAIT
) {
1171 TAILQ_INSERT_TAIL( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
1172 aio_anchor
.aio_async_workq_count
++;
1174 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_work_queued
)) | DBG_FUNC_NONE
,
1175 (int)p
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1178 TAILQ_INSERT_TAIL( &aio_anchor
.lio_sync_workq
, entryp
, aio_workq_link
);
1179 aio_anchor
.lio_sync_workq_count
++;
1183 if ( uap
->mode
== LIO_NOWAIT
) {
1184 /* caller does not want to wait so we'll fire off a worker thread and return */
1185 wakeup_one( (caddr_t
) &aio_anchor
.aio_async_workq
);
1188 aio_workq_entry
*entryp
;
1192 * mode is LIO_WAIT - handle the IO requests now.
1194 entryp
= TAILQ_FIRST( &aio_anchor
.lio_sync_workq
);
1195 while ( entryp
!= NULL
) {
1196 if ( p
== entryp
->procp
&& group_tag
== entryp
->group_tag
) {
1198 TAILQ_REMOVE( &aio_anchor
.lio_sync_workq
, entryp
, aio_workq_link
);
1199 aio_anchor
.lio_sync_workq_count
--;
1202 if ( (entryp
->flags
& AIO_READ
) != 0 ) {
1203 error
= do_aio_read( entryp
);
1205 else if ( (entryp
->flags
& AIO_WRITE
) != 0 ) {
1206 error
= do_aio_write( entryp
);
1208 else if ( (entryp
->flags
& AIO_FSYNC
) != 0 ) {
1209 error
= do_aio_fsync( entryp
);
1212 printf( "%s - unknown aio request - flags 0x%02X \n",
1213 __FUNCTION__
, entryp
->flags
);
1216 entryp
->errorval
= error
;
1217 if ( error
!= 0 && call_result
== -1 )
1221 /* we're done with the IO request so move it on the done queue */
1222 TAILQ_INSERT_TAIL( &p
->aio_doneq
, entryp
, aio_workq_link
);
1223 aio_anchor
.aio_done_count
++;
1224 p
->aio_done_count
++;
1226 /* need to start over since lio_sync_workq may have been changed while we */
1227 /* were away doing the IO. */
1228 entryp
= TAILQ_FIRST( &aio_anchor
.lio_sync_workq
);
1230 } /* p == entryp->procp */
1232 entryp
= TAILQ_NEXT( entryp
, aio_workq_link
);
1233 } /* while ( entryp != NULL ) */
1234 } /* uap->mode == LIO_WAIT */
1237 /* call_result == -1 means we had no trouble queueing up requests */
1238 if ( call_result
== -1 ) {
1244 if ( entryp_listp
!= NULL
)
1245 FREE( entryp_listp
, M_TEMP
);
1246 if ( aiocbpp
!= NULL
)
1247 FREE( aiocbpp
, M_TEMP
);
1249 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_listio
)) | DBG_FUNC_END
,
1250 (int)p
, call_result
, 0, 0, 0 );
1252 return( call_result
);
1258 * aio worker thread. this is where all the real work gets done.
1259 * we get a wake up call on sleep channel &aio_anchor.aio_async_workq
1260 * after new work is queued up.
1264 aio_work_thread( void )
1266 aio_workq_entry
*entryp
;
1270 entryp
= aio_get_some_work();
1271 if ( entryp
== NULL
) {
1273 * aio worker threads wait for some work to get queued up
1274 * by aio_queue_async_request. Once some work gets queued
1275 * it will wake up one of these worker threads just before
1276 * returning to our caller in user land.
1278 assert_wait( (event_t
) &aio_anchor
.aio_async_workq
, THREAD_UNINT
);
1281 thread_block( (thread_continue_t
)aio_work_thread
);
1286 vm_map_t currentmap
;
1287 vm_map_t oldmap
= VM_MAP_NULL
;
1288 task_t oldaiotask
= TASK_NULL
;
1289 struct uthread
*uthreadp
= NULL
;
1293 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_worker_thread
)) | DBG_FUNC_START
,
1294 (int)entryp
->procp
, (int)entryp
->uaiocbp
, entryp
->flags
, 0, 0 );
1297 * Assume the target's address space identity for the duration
1300 currentmap
= get_task_map( (current_proc())->task
);
1301 if ( currentmap
!= entryp
->aio_map
) {
1302 uthreadp
= (struct uthread
*) get_bsdthread_info(current_thread());
1303 oldaiotask
= uthreadp
->uu_aio_task
;
1304 uthreadp
->uu_aio_task
= entryp
->procp
->task
;
1305 oldmap
= vm_map_switch( entryp
->aio_map
);
1308 if ( (entryp
->flags
& AIO_READ
) != 0 ) {
1309 error
= do_aio_read( entryp
);
1311 else if ( (entryp
->flags
& AIO_WRITE
) != 0 ) {
1312 error
= do_aio_write( entryp
);
1314 else if ( (entryp
->flags
& AIO_FSYNC
) != 0 ) {
1315 error
= do_aio_fsync( entryp
);
1318 printf( "%s - unknown aio request - flags 0x%02X \n",
1319 __FUNCTION__
, entryp
->flags
);
1322 entryp
->errorval
= error
;
1323 if ( currentmap
!= entryp
->aio_map
) {
1324 (void) vm_map_switch( oldmap
);
1325 uthreadp
->uu_aio_task
= oldaiotask
;
1328 /* we're done with the IO request so pop it off the active queue and */
1329 /* push it on the done queue */
1331 TAILQ_REMOVE( &entryp
->procp
->aio_activeq
, entryp
, aio_workq_link
);
1332 aio_anchor
.aio_active_count
--;
1333 entryp
->procp
->aio_active_count
--;
1334 TAILQ_INSERT_TAIL( &entryp
->procp
->aio_doneq
, entryp
, aio_workq_link
);
1335 aio_anchor
.aio_done_count
++;
1336 entryp
->procp
->aio_done_count
++;
1337 entryp
->flags
|= AIO_COMPLETION
;
1339 /* remove our reference to the user land map. */
1340 if ( VM_MAP_NULL
!= entryp
->aio_map
) {
1343 my_map
= entryp
->aio_map
;
1344 entryp
->aio_map
= VM_MAP_NULL
;
1345 AIO_UNLOCK
; /* must unlock before calling vm_map_deallocate() */
1346 vm_map_deallocate( my_map
);
1352 do_aio_completion( entryp
);
1354 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_worker_thread
)) | DBG_FUNC_END
,
1355 (int)entryp
->procp
, (int)entryp
->uaiocbp
, entryp
->errorval
,
1356 entryp
->returnval
, 0 );
1359 entryp
->flags
&= ~AIO_COMPLETION
;
1360 if ( (entryp
->flags
& AIO_DO_FREE
) != 0 ) {
1363 my_map
= entryp
->aio_map
;
1364 entryp
->aio_map
= VM_MAP_NULL
;
1366 aio_free_request( entryp
, my_map
);
1375 } /* aio_work_thread */
1379 * aio_get_some_work - get the next async IO request that is ready to be executed.
1380 * aio_fsync complicates matters a bit since we cannot do the fsync until all async
1381 * IO requests at the time the aio_fsync call came in have completed.
1382 * NOTE - AIO_LOCK must be held by caller
1385 static aio_workq_entry
*
1386 aio_get_some_work( void )
1388 aio_workq_entry
*entryp
;
1390 /* pop some work off the work queue and add to our active queue */
1391 for ( entryp
= TAILQ_FIRST( &aio_anchor
.aio_async_workq
);
1393 entryp
= TAILQ_NEXT( entryp
, aio_workq_link
) ) {
1395 if ( (entryp
->flags
& AIO_FSYNC
) != 0 ) {
1396 /* leave aio_fsync calls on the work queue if there are IO */
1397 /* requests on the active queue for the same file descriptor. */
1398 if ( aio_delay_fsync_request( entryp
) ) {
1400 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_fsync_delay
)) | DBG_FUNC_NONE
,
1401 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1408 if ( entryp
!= NULL
) {
1409 TAILQ_REMOVE( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
1410 aio_anchor
.aio_async_workq_count
--;
1411 TAILQ_INSERT_TAIL( &entryp
->procp
->aio_activeq
, entryp
, aio_workq_link
);
1412 aio_anchor
.aio_active_count
++;
1413 entryp
->procp
->aio_active_count
++;
1418 } /* aio_get_some_work */
1422 * aio_delay_fsync_request - look to see if this aio_fsync request should be delayed at
1423 * this time. Delay will happen when there are any active IOs for the same file
1424 * descriptor that were queued at time the aio_sync call was queued.
1425 * NOTE - AIO_LOCK must be held by caller
1428 aio_delay_fsync_request( aio_workq_entry
*entryp
)
1430 aio_workq_entry
*my_entryp
;
1432 TAILQ_FOREACH( my_entryp
, &entryp
->procp
->aio_activeq
, aio_workq_link
) {
1433 if ( my_entryp
->fsyncp
!= USER_ADDR_NULL
&&
1434 entryp
->uaiocbp
== my_entryp
->fsyncp
&&
1435 entryp
->aiocb
.aio_fildes
== my_entryp
->aiocb
.aio_fildes
) {
1442 } /* aio_delay_fsync_request */
1446 * aio_queue_async_request - queue up an async IO request on our work queue then
1447 * wake up one of our worker threads to do the actual work. We get a reference
1448 * to our caller's user land map in order to keep it around while we are
1449 * processing the request.
1453 aio_queue_async_request( struct proc
*procp
, user_addr_t aiocbp
, int kindOfIO
)
1455 aio_workq_entry
*entryp
;
1458 entryp
= (aio_workq_entry
*) zalloc( aio_workq_zonep
);
1459 if ( entryp
== NULL
) {
1463 bzero( entryp
, sizeof(*entryp
) );
1465 /* fill in the rest of the aio_workq_entry */
1466 entryp
->procp
= procp
;
1467 entryp
->uaiocbp
= aiocbp
;
1468 entryp
->flags
|= kindOfIO
;
1469 entryp
->aio_map
= VM_MAP_NULL
;
1471 if ( !IS_64BIT_PROCESS(procp
) ) {
1472 struct aiocb aiocb32
;
1474 result
= copyin( aiocbp
, &aiocb32
, sizeof(aiocb32
) );
1476 do_munge_aiocb( &aiocb32
, &entryp
->aiocb
);
1478 result
= copyin( aiocbp
, &entryp
->aiocb
, sizeof(entryp
->aiocb
) );
1480 if ( result
!= 0 ) {
1485 /* do some more validation on the aiocb and embedded file descriptor */
1486 result
= aio_validate( entryp
);
1490 /* get a reference to the user land map in order to keep it around */
1491 entryp
->aio_map
= get_task_map( procp
->task
);
1492 vm_map_reference( entryp
->aio_map
);
1496 if ( is_already_queued( entryp
->procp
, entryp
->uaiocbp
) == TRUE
) {
1502 /* check our aio limits to throttle bad or rude user land behavior */
1503 if ( aio_get_all_queues_count( ) >= aio_max_requests
||
1504 aio_get_process_count( procp
) >= aio_max_requests_per_process
) {
1511 * aio_fsync calls sync up all async IO requests queued at the time
1512 * the aio_fsync call was made. So we mark each currently queued async
1513 * IO with a matching file descriptor as must complete before we do the
1514 * fsync. We set the fsyncp field of each matching async IO
1515 * request with the aiocb pointer passed in on the aio_fsync call to
1516 * know which IOs must complete before we process the aio_fsync call.
1518 if ( (kindOfIO
& AIO_FSYNC
) != 0 )
1519 aio_mark_requests( entryp
);
1521 /* queue up on our aio asynchronous work queue */
1522 TAILQ_INSERT_TAIL( &aio_anchor
.aio_async_workq
, entryp
, aio_workq_link
);
1523 aio_anchor
.aio_async_workq_count
++;
1525 wakeup_one( (caddr_t
) &aio_anchor
.aio_async_workq
);
1528 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_work_queued
)) | DBG_FUNC_NONE
,
1529 (int)procp
, (int)aiocbp
, 0, 0, 0 );
1534 if ( entryp
!= NULL
) {
1535 /* this entry has not been queued up so no worries about unlocked */
1536 /* state and aio_map */
1537 aio_free_request( entryp
, entryp
->aio_map
);
1542 } /* aio_queue_async_request */
1546 * lio_create_async_entry - allocate an aio_workq_entry and fill it in.
1547 * If all goes well return 0 and pass the aio_workq_entry pointer back to
1548 * our caller. We get a reference to our caller's user land map in order to keep
1549 * it around while we are processing the request.
1550 * lio_listio calls behave differently at completion they do completion notification
1551 * when all async IO requests have completed. We use group_tag to tag IO requests
1552 * that behave in the delay notification manner.
1556 lio_create_async_entry( struct proc
*procp
, user_addr_t aiocbp
,
1557 user_addr_t sigp
, long group_tag
,
1558 aio_workq_entry
**entrypp
)
1560 aio_workq_entry
*entryp
;
1563 entryp
= (aio_workq_entry
*) zalloc( aio_workq_zonep
);
1564 if ( entryp
== NULL
) {
1568 bzero( entryp
, sizeof(*entryp
) );
1570 /* fill in the rest of the aio_workq_entry */
1571 entryp
->procp
= procp
;
1572 entryp
->uaiocbp
= aiocbp
;
1573 entryp
->flags
|= AIO_LIO
;
1574 entryp
->group_tag
= group_tag
;
1575 entryp
->aio_map
= VM_MAP_NULL
;
1577 if ( !IS_64BIT_PROCESS(procp
) ) {
1578 struct aiocb aiocb32
;
1580 result
= copyin( aiocbp
, &aiocb32
, sizeof(aiocb32
) );
1582 do_munge_aiocb( &aiocb32
, &entryp
->aiocb
);
1584 result
= copyin( aiocbp
, &entryp
->aiocb
, sizeof(entryp
->aiocb
) );
1586 if ( result
!= 0 ) {
1591 /* look for lio_listio LIO_NOP requests and ignore them. */
1592 /* Not really an error, but we need to free our aio_workq_entry. */
1593 if ( entryp
->aiocb
.aio_lio_opcode
== LIO_NOP
) {
1598 /* use sigevent passed in to lio_listio for each of our calls, but only */
1599 /* do completion notification after the last request completes. */
1600 if ( sigp
!= USER_ADDR_NULL
) {
1601 if ( !IS_64BIT_PROCESS(procp
) ) {
1602 struct sigevent sigevent32
;
1604 result
= copyin( sigp
, &sigevent32
, sizeof(sigevent32
) );
1605 if ( result
== 0 ) {
1606 /* also need to munge aio_sigevent since it contains pointers */
1607 /* special case here. since we do not know if sigev_value is an */
1608 /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */
1609 /* means if we send this info back to user space we need to remember */
1610 /* sigev_value was not expanded for the 32-bit case. */
1611 /* NOTE - this does NOT affect us since we don't support sigev_value */
1612 /* yet in the aio context. */
1614 entryp
->aiocb
.aio_sigevent
.sigev_notify
= sigevent32
.sigev_notify
;
1615 entryp
->aiocb
.aio_sigevent
.sigev_signo
= sigevent32
.sigev_signo
;
1616 entryp
->aiocb
.aio_sigevent
.sigev_value
.size_equivalent
.sival_int
=
1617 sigevent32
.sigev_value
.sival_int
;
1618 entryp
->aiocb
.aio_sigevent
.sigev_notify_function
=
1619 CAST_USER_ADDR_T(sigevent32
.sigev_notify_function
);
1620 entryp
->aiocb
.aio_sigevent
.sigev_notify_attributes
=
1621 CAST_USER_ADDR_T(sigevent32
.sigev_notify_attributes
);
1624 result
= copyin( sigp
, &entryp
->aiocb
.aio_sigevent
, sizeof(entryp
->aiocb
.aio_sigevent
) );
1626 if ( result
!= 0 ) {
1632 /* do some more validation on the aiocb and embedded file descriptor */
1633 result
= aio_validate( entryp
);
1637 /* get a reference to the user land map in order to keep it around */
1638 entryp
->aio_map
= get_task_map( procp
->task
);
1639 vm_map_reference( entryp
->aio_map
);
1645 if ( entryp
!= NULL
)
1646 zfree( aio_workq_zonep
, entryp
);
1650 } /* lio_create_async_entry */
1654 * aio_mark_requests - aio_fsync calls synchronize file data for all queued async IO
1655 * requests at the moment the aio_fsync call is queued. We use aio_workq_entry.fsyncp
1656 * to mark each async IO that must complete before the fsync is done. We use the uaiocbp
1657 * field from the aio_fsync call as the aio_workq_entry.fsyncp in marked requests.
1658 * NOTE - AIO_LOCK must be held by caller
1662 aio_mark_requests( aio_workq_entry
*entryp
)
1664 aio_workq_entry
*my_entryp
;
1666 TAILQ_FOREACH( my_entryp
, &entryp
->procp
->aio_activeq
, aio_workq_link
) {
1667 if ( entryp
->aiocb
.aio_fildes
== my_entryp
->aiocb
.aio_fildes
) {
1668 my_entryp
->fsyncp
= entryp
->uaiocbp
;
1672 TAILQ_FOREACH( my_entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
1673 if ( entryp
->procp
== my_entryp
->procp
&&
1674 entryp
->aiocb
.aio_fildes
== my_entryp
->aiocb
.aio_fildes
) {
1675 my_entryp
->fsyncp
= entryp
->uaiocbp
;
1679 } /* aio_mark_requests */
1683 * lio_create_sync_entry - allocate an aio_workq_entry and fill it in.
1684 * If all goes well return 0 and pass the aio_workq_entry pointer back to
1686 * lio_listio calls behave differently at completion they do completion notification
1687 * when all async IO requests have completed. We use group_tag to tag IO requests
1688 * that behave in the delay notification manner.
1692 lio_create_sync_entry( struct proc
*procp
, user_addr_t aiocbp
,
1693 long group_tag
, aio_workq_entry
**entrypp
)
1695 aio_workq_entry
*entryp
;
1698 entryp
= (aio_workq_entry
*) zalloc( aio_workq_zonep
);
1699 if ( entryp
== NULL
) {
1703 bzero( entryp
, sizeof(*entryp
) );
1705 /* fill in the rest of the aio_workq_entry */
1706 entryp
->procp
= procp
;
1707 entryp
->uaiocbp
= aiocbp
;
1708 entryp
->flags
|= AIO_LIO
;
1709 entryp
->group_tag
= group_tag
;
1710 entryp
->aio_map
= VM_MAP_NULL
;
1712 if ( !IS_64BIT_PROCESS(procp
) ) {
1713 struct aiocb aiocb32
;
1715 result
= copyin( aiocbp
, &aiocb32
, sizeof(aiocb32
) );
1717 do_munge_aiocb( &aiocb32
, &entryp
->aiocb
);
1719 result
= copyin( aiocbp
, &entryp
->aiocb
, sizeof(entryp
->aiocb
) );
1721 if ( result
!= 0 ) {
1726 /* look for lio_listio LIO_NOP requests and ignore them. */
1727 /* Not really an error, but we need to free our aio_workq_entry. */
1728 if ( entryp
->aiocb
.aio_lio_opcode
== LIO_NOP
) {
1733 result
= aio_validate( entryp
);
1734 if ( result
!= 0 ) {
1742 if ( entryp
!= NULL
)
1743 zfree( aio_workq_zonep
, entryp
);
1747 } /* lio_create_sync_entry */
1751 * aio_free_request - remove our reference on the user land map and
1752 * free the work queue entry resources.
1753 * We are not holding the lock here thus aio_map is passed in and
1754 * zeroed while we did have the lock.
1758 aio_free_request( aio_workq_entry
*entryp
, vm_map_t the_map
)
1760 /* remove our reference to the user land map. */
1761 if ( VM_MAP_NULL
!= the_map
) {
1762 vm_map_deallocate( the_map
);
1765 zfree( aio_workq_zonep
, entryp
);
1769 } /* aio_free_request */
1772 /* aio_validate - validate the aiocb passed in by one of the aio syscalls.
1776 aio_validate( aio_workq_entry
*entryp
)
1778 struct fileproc
*fp
;
1784 if ( (entryp
->flags
& AIO_LIO
) != 0 ) {
1785 if ( entryp
->aiocb
.aio_lio_opcode
== LIO_READ
)
1786 entryp
->flags
|= AIO_READ
;
1787 else if ( entryp
->aiocb
.aio_lio_opcode
== LIO_WRITE
)
1788 entryp
->flags
|= AIO_WRITE
;
1789 else if ( entryp
->aiocb
.aio_lio_opcode
== LIO_NOP
)
1796 if ( (entryp
->flags
& (AIO_WRITE
| AIO_FSYNC
)) != 0 ) {
1800 if ( (entryp
->flags
& (AIO_READ
| AIO_WRITE
)) != 0 ) {
1801 // LP64todo - does max value for aio_nbytes need to grow?
1802 if ( entryp
->aiocb
.aio_nbytes
> INT_MAX
||
1803 entryp
->aiocb
.aio_buf
== USER_ADDR_NULL
||
1804 entryp
->aiocb
.aio_offset
< 0 )
1808 /* validate aiocb.aio_sigevent. at this point we only support sigev_notify
1809 * equal to SIGEV_SIGNAL or SIGEV_NONE. this means sigev_value,
1810 * sigev_notify_function, and sigev_notify_attributes are ignored.
1812 if ( entryp
->aiocb
.aio_sigevent
.sigev_notify
== SIGEV_SIGNAL
) {
1814 /* make sure we have a valid signal number */
1815 signum
= entryp
->aiocb
.aio_sigevent
.sigev_signo
;
1816 if ( signum
<= 0 || signum
>= NSIG
||
1817 signum
== SIGKILL
|| signum
== SIGSTOP
)
1820 else if ( entryp
->aiocb
.aio_sigevent
.sigev_notify
!= SIGEV_NONE
)
1823 /* validate the file descriptor and that the file was opened
1824 * for the appropriate read / write access.
1826 proc_fdlock(entryp
->procp
);
1828 result
= fp_lookup( entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, 1);
1829 if ( result
== 0 ) {
1830 if ( (fp
->f_fglob
->fg_flag
& flag
) == 0 ) {
1831 /* we don't have read or write access */
1834 else if ( fp
->f_fglob
->fg_type
!= DTYPE_VNODE
) {
1835 /* this is not a file */
1838 fp
->f_flags
|= FP_AIOISSUED
;
1840 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 1);
1846 proc_fdunlock(entryp
->procp
);
1850 } /* aio_validate */
1854 * aio_get_process_count - runs through our queues that hold outstanding
1855 * async IO reqests and totals up number of requests for the given
1857 * NOTE - caller must hold aio lock!
1861 aio_get_process_count( struct proc
*procp
)
1863 aio_workq_entry
*entryp
;
1866 /* begin with count of completed async IO requests for this process */
1867 count
= procp
->aio_done_count
;
1869 /* add in count of active async IO requests for this process */
1870 count
+= procp
->aio_active_count
;
1872 /* look for matches on our queue of asynchronous todo work */
1873 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
1874 if ( procp
== entryp
->procp
) {
1879 /* look for matches on our queue of synchronous todo work */
1880 TAILQ_FOREACH( entryp
, &aio_anchor
.lio_sync_workq
, aio_workq_link
) {
1881 if ( procp
== entryp
->procp
) {
1888 } /* aio_get_process_count */
1892 * aio_get_all_queues_count - get total number of entries on all aio work queues.
1893 * NOTE - caller must hold aio lock!
1897 aio_get_all_queues_count( void )
1901 count
= aio_anchor
.aio_async_workq_count
;
1902 count
+= aio_anchor
.lio_sync_workq_count
;
1903 count
+= aio_anchor
.aio_active_count
;
1904 count
+= aio_anchor
.aio_done_count
;
1908 } /* aio_get_all_queues_count */
1912 * do_aio_completion. Handle async IO completion.
1916 do_aio_completion( aio_workq_entry
*entryp
)
1918 /* signal user land process if appropriate */
1919 if ( entryp
->aiocb
.aio_sigevent
.sigev_notify
== SIGEV_SIGNAL
&&
1920 (entryp
->flags
& AIO_DISABLE
) == 0 ) {
1923 * if group_tag is non zero then make sure this is the last IO request
1924 * in the group before we signal.
1926 if ( entryp
->group_tag
== 0 ||
1927 (entryp
->group_tag
!= 0 && aio_last_group_io( entryp
)) ) {
1928 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_sig
)) | DBG_FUNC_NONE
,
1929 (int)entryp
->procp
, (int)entryp
->uaiocbp
,
1930 entryp
->aiocb
.aio_sigevent
.sigev_signo
, 0, 0 );
1932 psignal( entryp
->procp
, entryp
->aiocb
.aio_sigevent
.sigev_signo
);
1938 * need to handle case where a process is trying to exit, exec, or close
1939 * and is currently waiting for active aio requests to complete. If
1940 * AIO_WAITING is set then we need to look to see if there are any
1941 * other requests in the active queue for this process. If there are
1942 * none then wakeup using the AIO_CLEANUP_SLEEP_CHAN tsleep channel. If
1943 * there are some still active then do nothing - we only want to wakeup
1944 * when all active aio requests for the process are complete.
1946 if ( (entryp
->flags
& AIO_WAITING
) != 0 ) {
1947 int active_requests
;
1949 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_cleanup_wait
)) | DBG_FUNC_NONE
,
1950 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1953 active_requests
= aio_active_requests_for_process( entryp
->procp
);
1955 if ( active_requests
< 1 ) {
1956 /* no active aio requests for this process, continue exiting */
1957 wakeup_one( (caddr_t
) &entryp
->procp
->AIO_CLEANUP_SLEEP_CHAN
);
1959 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_cleanup_wake
)) | DBG_FUNC_NONE
,
1960 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1967 * aio_suspend case when a signal was not requested. In that scenario we
1968 * are sleeping on the AIO_SUSPEND_SLEEP_CHAN channel.
1969 * NOTE - the assumption here is that this wakeup call is inexpensive.
1970 * we really only need to do this when an aio_suspend call is pending.
1971 * If we find the wakeup call should be avoided we could mark the
1972 * async IO requests given in the list provided by aio_suspend and only
1973 * call wakeup for them. If we do mark them we should unmark them after
1974 * the aio_suspend wakes up.
1977 wakeup_one( (caddr_t
) &entryp
->procp
->AIO_SUSPEND_SLEEP_CHAN
);
1980 KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO
, AIO_completion_suspend_wake
)) | DBG_FUNC_NONE
,
1981 (int)entryp
->procp
, (int)entryp
->uaiocbp
, 0, 0, 0 );
1985 } /* do_aio_completion */
1989 * aio_last_group_io - checks to see if this is the last unfinished IO request
1990 * for the given group_tag. Returns TRUE if there are no other active IO
1991 * requests for this group or FALSE if the are active IO requests
1992 * NOTE - AIO_LOCK must be held by caller
1996 aio_last_group_io( aio_workq_entry
*entryp
)
1998 aio_workq_entry
*my_entryp
;
2000 /* look for matches on our queue of active async IO requests */
2001 TAILQ_FOREACH( my_entryp
, &entryp
->procp
->aio_activeq
, aio_workq_link
) {
2002 if ( my_entryp
->group_tag
== entryp
->group_tag
)
2006 /* look for matches on our queue of asynchronous todo work */
2007 TAILQ_FOREACH( my_entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
2008 if ( my_entryp
->group_tag
== entryp
->group_tag
)
2012 /* look for matches on our queue of synchronous todo work */
2013 TAILQ_FOREACH( my_entryp
, &aio_anchor
.lio_sync_workq
, aio_workq_link
) {
2014 if ( my_entryp
->group_tag
== entryp
->group_tag
)
2020 } /* aio_last_group_io */
2027 do_aio_read( aio_workq_entry
*entryp
)
2029 struct fileproc
*fp
;
2032 if ( (error
= fp_lookup(entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, 0)) )
2034 if ( (fp
->f_fglob
->fg_flag
& FREAD
) == 0 ) {
2035 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2039 error
= dofileread( entryp
->procp
, fp
, entryp
->aiocb
.aio_fildes
,
2040 entryp
->aiocb
.aio_buf
,
2041 entryp
->aiocb
.aio_nbytes
,
2042 entryp
->aiocb
.aio_offset
, FOF_OFFSET
,
2043 &entryp
->returnval
);
2044 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2047 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2060 do_aio_write( aio_workq_entry
*entryp
)
2062 struct fileproc
*fp
;
2065 if ( (error
= fp_lookup(entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, 0)) )
2067 if ( (fp
->f_fglob
->fg_flag
& FWRITE
) == 0 ) {
2068 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2072 /* NB: tell dofilewrite the offset, and to use the proc cred */
2073 error
= dofilewrite( entryp
->procp
,
2075 entryp
->aiocb
.aio_fildes
,
2076 entryp
->aiocb
.aio_buf
,
2077 entryp
->aiocb
.aio_nbytes
,
2078 entryp
->aiocb
.aio_offset
,
2079 FOF_OFFSET
| FOF_PCRED
,
2080 &entryp
->returnval
);
2082 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2085 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2091 } /* do_aio_write */
2095 * aio_active_requests_for_process - return number of active async IO
2096 * requests for the given process.
2097 * NOTE - caller must hold aio lock!
2101 aio_active_requests_for_process( struct proc
*procp
)
2104 return( procp
->aio_active_count
);
2106 } /* aio_active_requests_for_process */
2113 do_aio_fsync( aio_workq_entry
*entryp
)
2115 struct vfs_context context
;
2117 struct fileproc
*fp
;
2121 * NOTE - we will not support AIO_DSYNC until fdatasync() is supported.
2122 * AIO_DSYNC is caught before we queue up a request and flagged as an error.
2123 * The following was shamelessly extracted from fsync() implementation.
2126 error
= fp_getfvp( entryp
->procp
, entryp
->aiocb
.aio_fildes
, &fp
, &vp
);
2128 if ( (error
= vnode_getwithref(vp
)) ) {
2129 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2130 entryp
->returnval
= -1;
2133 context
.vc_proc
= entryp
->procp
;
2134 context
.vc_ucred
= fp
->f_fglob
->fg_cred
;
2136 error
= VNOP_FSYNC( vp
, MNT_WAIT
, &context
);
2138 (void)vnode_put(vp
);
2140 fp_drop(entryp
->procp
, entryp
->aiocb
.aio_fildes
, fp
, 0);
2143 entryp
->returnval
= -1;
2147 } /* do_aio_fsync */
2151 * is_already_queued - runs through our queues to see if the given
2152 * aiocbp / process is there. Returns TRUE if there is a match
2153 * on any of our aio queues.
2154 * NOTE - callers must hold aio lock!
2158 is_already_queued( struct proc
*procp
,
2159 user_addr_t aiocbp
)
2161 aio_workq_entry
*entryp
;
2166 /* look for matches on our queue of async IO requests that have completed */
2167 TAILQ_FOREACH( entryp
, &procp
->aio_doneq
, aio_workq_link
) {
2168 if ( aiocbp
== entryp
->uaiocbp
) {
2170 goto ExitThisRoutine
;
2174 /* look for matches on our queue of active async IO requests */
2175 TAILQ_FOREACH( entryp
, &procp
->aio_activeq
, aio_workq_link
) {
2176 if ( aiocbp
== entryp
->uaiocbp
) {
2178 goto ExitThisRoutine
;
2182 /* look for matches on our queue of asynchronous todo work */
2183 TAILQ_FOREACH( entryp
, &aio_anchor
.aio_async_workq
, aio_workq_link
) {
2184 if ( procp
== entryp
->procp
&& aiocbp
== entryp
->uaiocbp
) {
2186 goto ExitThisRoutine
;
2190 /* look for matches on our queue of synchronous todo work */
2191 TAILQ_FOREACH( entryp
, &aio_anchor
.lio_sync_workq
, aio_workq_link
) {
2192 if ( procp
== entryp
->procp
&& aiocbp
== entryp
->uaiocbp
) {
2194 goto ExitThisRoutine
;
2201 } /* is_already_queued */
2205 * aio initialization
2207 __private_extern__
void
2212 aio_lock_grp_attr
= lck_grp_attr_alloc_init();
2213 aio_lock_grp
= lck_grp_alloc_init("aio", aio_lock_grp_attr
);
2214 aio_lock_attr
= lck_attr_alloc_init();
2216 aio_lock
= lck_mtx_alloc_init(aio_lock_grp
, aio_lock_attr
);
2219 TAILQ_INIT( &aio_anchor
.aio_async_workq
);
2220 TAILQ_INIT( &aio_anchor
.lio_sync_workq
);
2221 aio_anchor
.aio_async_workq_count
= 0;
2222 aio_anchor
.lio_sync_workq_count
= 0;
2223 aio_anchor
.aio_active_count
= 0;
2224 aio_anchor
.aio_done_count
= 0;
2227 i
= sizeof( aio_workq_entry
);
2228 aio_workq_zonep
= zinit( i
, i
* aio_max_requests
, i
* aio_max_requests
, "aiowq" );
2230 _aio_create_worker_threads( aio_worker_threads
);
2238 * aio worker threads created here.
2240 __private_extern__
void
2241 _aio_create_worker_threads( int num
)
2245 /* create some worker threads to handle the async IO requests */
2246 for ( i
= 0; i
< num
; i
++ ) {
2249 myThread
= kernel_thread( kernel_task
, aio_work_thread
);
2250 if ( THREAD_NULL
== myThread
) {
2251 printf( "%s - failed to create a work thread \n", __FUNCTION__
);
2257 } /* _aio_create_worker_threads */
2260 * Return the current activation utask
2265 return ((struct uthread
*)get_bsdthread_info(current_thread()))->uu_aio_task
;
2270 * In the case of an aiocb from a
2271 * 32-bit process we need to expand some longs and pointers to the correct
2272 * sizes in order to let downstream code always work on the same type of
2273 * aiocb (in our case that is a user_aiocb)
2276 do_munge_aiocb( struct aiocb
*my_aiocbp
, struct user_aiocb
*the_user_aiocbp
)
2278 the_user_aiocbp
->aio_fildes
= my_aiocbp
->aio_fildes
;
2279 the_user_aiocbp
->aio_offset
= my_aiocbp
->aio_offset
;
2280 the_user_aiocbp
->aio_buf
= CAST_USER_ADDR_T(my_aiocbp
->aio_buf
);
2281 the_user_aiocbp
->aio_nbytes
= my_aiocbp
->aio_nbytes
;
2282 the_user_aiocbp
->aio_reqprio
= my_aiocbp
->aio_reqprio
;
2283 the_user_aiocbp
->aio_lio_opcode
= my_aiocbp
->aio_lio_opcode
;
2285 /* special case here. since we do not know if sigev_value is an */
2286 /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */
2287 /* means if we send this info back to user space we need to remember */
2288 /* sigev_value was not expanded for the 32-bit case. */
2289 /* NOTE - this does NOT affect us since we don't support sigev_value */
2290 /* yet in the aio context. */
2292 the_user_aiocbp
->aio_sigevent
.sigev_notify
= my_aiocbp
->aio_sigevent
.sigev_notify
;
2293 the_user_aiocbp
->aio_sigevent
.sigev_signo
= my_aiocbp
->aio_sigevent
.sigev_signo
;
2294 the_user_aiocbp
->aio_sigevent
.sigev_value
.size_equivalent
.sival_int
=
2295 my_aiocbp
->aio_sigevent
.sigev_value
.sival_int
;
2296 the_user_aiocbp
->aio_sigevent
.sigev_notify_function
=
2297 CAST_USER_ADDR_T(my_aiocbp
->aio_sigevent
.sigev_notify_function
);
2298 the_user_aiocbp
->aio_sigevent
.sigev_notify_attributes
=
2299 CAST_USER_ADDR_T(my_aiocbp
->aio_sigevent
.sigev_notify_attributes
);