]> git.saurik.com Git - apple/xnu.git/blame - bsd/kern/kern_aio.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / kern / kern_aio.c
CommitLineData
55e303ae 1/*
39037602 2 * Copyright (c) 2003-2016 Apple Inc. All rights reserved.
55e303ae 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
55e303ae
A
27 */
28
29
30/*
31 * todo:
32 * 1) ramesh is looking into how to replace taking a reference on
0a7de745 33 * the user's map (vm_map_reference()) since it is believed that
55e303ae
A
34 * would not hold the process for us.
35 * 2) david is looking into a way for us to set the priority of the
0a7de745
A
36 * worker threads to match that of the user's thread when the
37 * async IO was queued.
55e303ae
A
38 */
39
40
41/*
42 * This file contains support for the POSIX 1003.1B AIO/LIO facility.
43 */
44
45#include <sys/systm.h>
55e303ae 46#include <sys/fcntl.h>
91447636 47#include <sys/file_internal.h>
55e303ae
A
48#include <sys/filedesc.h>
49#include <sys/kernel.h>
91447636 50#include <sys/vnode_internal.h>
55e303ae 51#include <sys/malloc.h>
91447636 52#include <sys/mount_internal.h>
55e303ae 53#include <sys/param.h>
91447636 54#include <sys/proc_internal.h>
55e303ae
A
55#include <sys/sysctl.h>
56#include <sys/unistd.h>
57#include <sys/user.h>
58
59#include <sys/aio_kern.h>
91447636 60#include <sys/sysproto.h>
55e303ae
A
61
62#include <machine/limits.h>
91447636
A
63
64#include <mach/mach_types.h>
65#include <kern/kern_types.h>
3e170ce0 66#include <kern/waitq.h>
55e303ae
A
67#include <kern/zalloc.h>
68#include <kern/task.h>
91447636
A
69#include <kern/sched_prim.h>
70
71#include <vm/vm_map.h>
55e303ae 72
b0d623f7
A
73#include <libkern/OSAtomic.h>
74
55e303ae 75#include <sys/kdebug.h>
0a7de745
A
76#define AIO_work_queued 1
77#define AIO_worker_wake 2
78#define AIO_completion_sig 3
79#define AIO_completion_cleanup_wait 4
80#define AIO_completion_cleanup_wake 5
81#define AIO_completion_suspend_wake 6
82#define AIO_fsync_delay 7
83#define AIO_cancel 10
84#define AIO_cancel_async_workq 11
85#define AIO_cancel_sync_workq 12
86#define AIO_cancel_activeq 13
87#define AIO_cancel_doneq 14
88#define AIO_fsync 20
89#define AIO_read 30
90#define AIO_write 40
91#define AIO_listio 50
92#define AIO_error 60
93#define AIO_error_val 61
94#define AIO_error_activeq 62
95#define AIO_error_workq 63
96#define AIO_return 70
97#define AIO_return_val 71
98#define AIO_return_activeq 72
99#define AIO_return_workq 73
100#define AIO_exec 80
101#define AIO_exit 90
102#define AIO_exit_sleep 91
103#define AIO_close 100
104#define AIO_close_sleep 101
105#define AIO_suspend 110
106#define AIO_suspend_sleep 111
107#define AIO_worker_thread 120
55e303ae
A
108
109#if 0
110#undef KERNEL_DEBUG
111#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT
112#endif
113
0a7de745
A
114/*
115 * aio requests queue up on the aio_async_workq or lio_sync_workq (for
116 * lio_listio LIO_WAIT). Requests then move to the per process aio_activeq
117 * (proc.aio_activeq) when one of our worker threads start the IO.
55e303ae 118 * And finally, requests move to the per process aio_doneq (proc.aio_doneq)
0a7de745
A
119 * when the IO request completes. The request remains on aio_doneq until
120 * user process calls aio_return or the process exits, either way that is our
121 * trigger to release aio resources.
55e303ae 122 */
b0d623f7 123typedef struct aio_workq {
0a7de745
A
124 TAILQ_HEAD(, aio_workq_entry) aioq_entries;
125 int aioq_count;
126 lck_mtx_t aioq_mtx;
127 struct waitq aioq_waitq;
b0d623f7
A
128} *aio_workq_t;
129
130#define AIO_NUM_WORK_QUEUES 1
0a7de745
A
131struct aio_anchor_cb {
132 volatile int32_t aio_inflight_count; /* entries that have been taken from a workq */
133 volatile int32_t aio_done_count; /* entries on all done queues (proc.aio_doneq) */
134 volatile int32_t aio_total_count; /* total extant entries */
135
b0d623f7 136 /* Hash table of queues here */
0a7de745
A
137 int aio_num_workqs;
138 struct aio_workq aio_async_workqs[AIO_NUM_WORK_QUEUES];
55e303ae
A
139};
140typedef struct aio_anchor_cb aio_anchor_cb;
141
0a7de745
A
142struct aio_lio_context {
143 int io_waiter;
144 int io_issued;
145 int io_completed;
b0d623f7
A
146};
147typedef struct aio_lio_context aio_lio_context;
148
55e303ae
A
149
150/*
151 * Notes on aio sleep / wake channels.
152 * We currently pick a couple fields within the proc structure that will allow
153 * us sleep channels that currently do not collide with any other kernel routines.
154 * At this time, for binary compatibility reasons, we cannot create new proc fields.
155 */
b0d623f7 156#define AIO_SUSPEND_SLEEP_CHAN p_aio_active_count
0a7de745 157#define AIO_CLEANUP_SLEEP_CHAN p_aio_total_count
55e303ae 158
0a7de745
A
159#define ASSERT_AIO_FROM_PROC(aiop, theproc) \
160 if ((aiop)->procp != (theproc)) { \
161 panic("AIO on a proc list that does not belong to that proc.\n"); \
b0d623f7 162 }
55e303ae
A
163
164/*
165 * LOCAL PROTOTYPES
166 */
0a7de745
A
167static void aio_proc_lock(proc_t procp);
168static void aio_proc_lock_spin(proc_t procp);
169static void aio_proc_unlock(proc_t procp);
170static lck_mtx_t* aio_proc_mutex(proc_t procp);
171static void aio_proc_move_done_locked(proc_t procp, aio_workq_entry *entryp);
172static void aio_proc_remove_done_locked(proc_t procp, aio_workq_entry *entryp);
173static int aio_get_process_count(proc_t procp );
174static int aio_active_requests_for_process(proc_t procp );
175static int aio_proc_active_requests_for_file(proc_t procp, int fd);
176static boolean_t is_already_queued(proc_t procp, user_addr_t aiocbp );
177static boolean_t should_cancel(aio_workq_entry *entryp, user_addr_t aiocbp, int fd);
178
179static void aio_entry_lock(aio_workq_entry *entryp);
180static void aio_entry_lock_spin(aio_workq_entry *entryp);
181static aio_workq_t aio_entry_workq(aio_workq_entry *entryp);
182static lck_mtx_t* aio_entry_mutex(__unused aio_workq_entry *entryp);
183static void aio_workq_remove_entry_locked(aio_workq_t queue, aio_workq_entry *entryp);
184static void aio_workq_add_entry_locked(aio_workq_t queue, aio_workq_entry *entryp);
185static void aio_entry_ref_locked(aio_workq_entry *entryp);
186static void aio_entry_unref_locked(aio_workq_entry *entryp);
187static void aio_entry_ref(aio_workq_entry *entryp);
188static void aio_entry_unref(aio_workq_entry *entryp);
189static void aio_entry_update_for_cancel(aio_workq_entry *entryp, boolean_t cancelled,
190 int wait_for_completion, boolean_t disable_notification);
191static int aio_entry_try_workq_remove(aio_workq_entry *entryp);
192static boolean_t aio_delay_fsync_request( aio_workq_entry *entryp );
193static int aio_free_request(aio_workq_entry *entryp);
194
195static void aio_workq_init(aio_workq_t wq);
196static void aio_workq_lock_spin(aio_workq_t wq);
197static void aio_workq_unlock(aio_workq_t wq);
198static lck_mtx_t* aio_workq_mutex(aio_workq_t wq);
199
200static void aio_work_thread( void );
b0d623f7
A
201static aio_workq_entry *aio_get_some_work( void );
202
0a7de745
A
203static int aio_get_all_queues_count( void );
204static int aio_queue_async_request(proc_t procp, user_addr_t aiocbp, int kindOfIO );
205static int aio_validate( aio_workq_entry *entryp );
206static int aio_increment_total_count(void);
207static int aio_decrement_total_count(void);
208
209static int do_aio_cancel_locked(proc_t p, int fd, user_addr_t aiocbp, int wait_for_completion, boolean_t disable_notification );
210static void do_aio_completion( aio_workq_entry *entryp );
211static int do_aio_fsync( aio_workq_entry *entryp );
212static int do_aio_read( aio_workq_entry *entryp );
213static int do_aio_write( aio_workq_entry *entryp );
214static void do_munge_aiocb_user32_to_user( struct user32_aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp );
215static void do_munge_aiocb_user64_to_user( struct user64_aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp );
216static int lio_create_entry(proc_t procp,
217 user_addr_t aiocbp,
218 void *group_tag,
219 aio_workq_entry **entrypp );
b0d623f7 220static aio_workq_entry *aio_create_queue_entry(proc_t procp,
0a7de745
A
221 user_addr_t aiocbp,
222 void *group_tag,
223 int kindOfIO);
b0d623f7 224static user_addr_t *aio_copy_in_list(proc_t procp, user_addr_t aiocblist, int nent);
0a7de745
A
225static void free_lio_context(aio_lio_context* context);
226static void aio_enqueue_work( proc_t procp, aio_workq_entry *entryp, int proc_locked);
b0d623f7 227
0a7de745
A
228#define ASSERT_AIO_PROC_LOCK_OWNED(p) lck_mtx_assert(aio_proc_mutex((p)), LCK_MTX_ASSERT_OWNED)
229#define ASSERT_AIO_WORKQ_LOCK_OWNED(q) lck_mtx_assert(aio_workq_mutex((q)), LCK_MTX_ASSERT_OWNED)
230#define ASSERT_AIO_ENTRY_LOCK_OWNED(e) lck_mtx_assert(aio_entry_mutex((e)), LCK_MTX_ASSERT_OWNED)
91447636 231
55e303ae
A
232/*
233 * EXTERNAL PROTOTYPES
234 */
235
236/* in ...bsd/kern/sys_generic.c */
b0d623f7 237extern int dofileread(vfs_context_t ctx, struct fileproc *fp,
0a7de745
A
238 user_addr_t bufp, user_size_t nbyte,
239 off_t offset, int flags, user_ssize_t *retval );
b0d623f7 240extern int dofilewrite(vfs_context_t ctx, struct fileproc *fp,
0a7de745
A
241 user_addr_t bufp, user_size_t nbyte, off_t offset,
242 int flags, user_ssize_t *retval );
b0d623f7 243#if DEBUG
0a7de745 244static uint32_t lio_contexts_alloced = 0;
b0d623f7 245#endif /* DEBUG */
55e303ae
A
246
247/*
248 * aio external global variables.
249 */
0a7de745
A
250extern int aio_max_requests; /* AIO_MAX - configurable */
251extern int aio_max_requests_per_process; /* AIO_PROCESS_MAX - configurable */
252extern int aio_worker_threads; /* AIO_THREAD_COUNT - configurable */
55e303ae
A
253
254
255/*
256 * aio static variables.
257 */
0a7de745
A
258static aio_anchor_cb aio_anchor;
259static lck_grp_t *aio_proc_lock_grp;
260static lck_grp_t *aio_entry_lock_grp;
261static lck_grp_t *aio_queue_lock_grp;
262static lck_attr_t *aio_lock_attr;
263static lck_grp_attr_t *aio_lock_grp_attr;
264static struct zone *aio_workq_zonep;
265static lck_mtx_t aio_entry_mtx;
266static lck_mtx_t aio_proc_mtx;
b0d623f7
A
267
268static void
269aio_entry_lock(__unused aio_workq_entry *entryp)
270{
271 lck_mtx_lock(&aio_entry_mtx);
272}
273
0a7de745 274static void
b0d623f7
A
275aio_entry_lock_spin(__unused aio_workq_entry *entryp)
276{
277 lck_mtx_lock_spin(&aio_entry_mtx);
278}
279
0a7de745 280static void
b0d623f7
A
281aio_entry_unlock(__unused aio_workq_entry *entryp)
282{
283 lck_mtx_unlock(&aio_entry_mtx);
284}
285
286/* Hash */
287static aio_workq_t
0a7de745 288aio_entry_workq(__unused aio_workq_entry *entryp)
b0d623f7
A
289{
290 return &aio_anchor.aio_async_workqs[0];
291}
292
293static lck_mtx_t*
0a7de745 294aio_entry_mutex(__unused aio_workq_entry *entryp)
b0d623f7
A
295{
296 return &aio_entry_mtx;
297}
298
0a7de745 299static void
b0d623f7
A
300aio_workq_init(aio_workq_t wq)
301{
302 TAILQ_INIT(&wq->aioq_entries);
303 wq->aioq_count = 0;
304 lck_mtx_init(&wq->aioq_mtx, aio_queue_lock_grp, aio_lock_attr);
39037602 305 waitq_init(&wq->aioq_waitq, SYNC_POLICY_FIFO);
b0d623f7
A
306}
307
308
0a7de745 309/*
b0d623f7
A
310 * Can be passed a queue which is locked spin.
311 */
0a7de745 312static void
b0d623f7
A
313aio_workq_remove_entry_locked(aio_workq_t queue, aio_workq_entry *entryp)
314{
315 ASSERT_AIO_WORKQ_LOCK_OWNED(queue);
316
317 if (entryp->aio_workq_link.tqe_prev == NULL) {
318 panic("Trying to remove an entry from a work queue, but it is not on a queue\n");
319 }
0a7de745 320
b0d623f7
A
321 TAILQ_REMOVE(&queue->aioq_entries, entryp, aio_workq_link);
322 queue->aioq_count--;
323 entryp->aio_workq_link.tqe_prev = NULL; /* Not on a workq */
0a7de745
A
324
325 if (queue->aioq_count < 0) {
b0d623f7
A
326 panic("Negative count on a queue.\n");
327 }
328}
329
0a7de745 330static void
b0d623f7
A
331aio_workq_add_entry_locked(aio_workq_t queue, aio_workq_entry *entryp)
332{
333 ASSERT_AIO_WORKQ_LOCK_OWNED(queue);
334
335 TAILQ_INSERT_TAIL(&queue->aioq_entries, entryp, aio_workq_link);
0a7de745 336 if (queue->aioq_count < 0) {
b0d623f7
A
337 panic("Negative count on a queue.\n");
338 }
339 queue->aioq_count++;
340}
341
0a7de745
A
342static void
343aio_proc_lock(proc_t procp)
b0d623f7
A
344{
345 lck_mtx_lock(aio_proc_mutex(procp));
346}
347
0a7de745 348static void
b0d623f7
A
349aio_proc_lock_spin(proc_t procp)
350{
351 lck_mtx_lock_spin(aio_proc_mutex(procp));
352}
353
354static void
355aio_proc_move_done_locked(proc_t procp, aio_workq_entry *entryp)
356{
357 ASSERT_AIO_PROC_LOCK_OWNED(procp);
358
359 TAILQ_REMOVE(&procp->p_aio_activeq, entryp, aio_proc_link );
360 TAILQ_INSERT_TAIL( &procp->p_aio_doneq, entryp, aio_proc_link);
361 procp->p_aio_active_count--;
362 OSIncrementAtomic(&aio_anchor.aio_done_count);
363}
364
365static void
366aio_proc_remove_done_locked(proc_t procp, aio_workq_entry *entryp)
367{
368 TAILQ_REMOVE(&procp->p_aio_doneq, entryp, aio_proc_link);
369 OSDecrementAtomic(&aio_anchor.aio_done_count);
370 aio_decrement_total_count();
371 procp->p_aio_total_count--;
372}
373
0a7de745 374static void
b0d623f7
A
375aio_proc_unlock(proc_t procp)
376{
377 lck_mtx_unlock(aio_proc_mutex(procp));
378}
379
380static lck_mtx_t*
381aio_proc_mutex(proc_t procp)
382{
383 return &procp->p_mlock;
384}
385
0a7de745 386static void
b0d623f7
A
387aio_entry_ref_locked(aio_workq_entry *entryp)
388{
389 ASSERT_AIO_ENTRY_LOCK_OWNED(entryp);
390
391 if (entryp->aio_refcount < 0) {
392 panic("AIO workq entry with a negative refcount.\n");
393 }
394 entryp->aio_refcount++;
395}
396
397
398/* Return 1 if you've freed it */
399static void
400aio_entry_unref_locked(aio_workq_entry *entryp)
401{
402 ASSERT_AIO_ENTRY_LOCK_OWNED(entryp);
403
404 entryp->aio_refcount--;
405 if (entryp->aio_refcount < 0) {
406 panic("AIO workq entry with a negative refcount.\n");
407 }
408}
409
0a7de745 410static void
b0d623f7
A
411aio_entry_ref(aio_workq_entry *entryp)
412{
413 aio_entry_lock_spin(entryp);
414 aio_entry_ref_locked(entryp);
415 aio_entry_unlock(entryp);
416}
0a7de745 417static void
b0d623f7
A
418aio_entry_unref(aio_workq_entry *entryp)
419{
420 aio_entry_lock_spin(entryp);
421 aio_entry_unref_locked(entryp);
422
423 if ((entryp->aio_refcount == 0) && ((entryp->flags & AIO_DO_FREE) != 0)) {
424 aio_entry_unlock(entryp);
425 aio_free_request(entryp);
426 } else {
427 aio_entry_unlock(entryp);
428 }
0a7de745 429
b0d623f7
A
430 return;
431}
432
0a7de745 433static void
b0d623f7
A
434aio_entry_update_for_cancel(aio_workq_entry *entryp, boolean_t cancelled, int wait_for_completion, boolean_t disable_notification)
435{
436 aio_entry_lock_spin(entryp);
437
438 if (cancelled) {
439 aio_entry_ref_locked(entryp);
440 entryp->errorval = ECANCELED;
441 entryp->returnval = -1;
442 }
0a7de745
A
443
444 if (wait_for_completion) {
b0d623f7
A
445 entryp->flags |= wait_for_completion; /* flag for special completion processing */
446 }
0a7de745
A
447
448 if (disable_notification) {
b0d623f7
A
449 entryp->flags |= AIO_DISABLE; /* Don't want a signal */
450 }
451
0a7de745 452 aio_entry_unlock(entryp);
b0d623f7
A
453}
454
455static int
456aio_entry_try_workq_remove(aio_workq_entry *entryp)
0a7de745 457{
b0d623f7
A
458 /* Can only be cancelled if it's still on a work queue */
459 if (entryp->aio_workq_link.tqe_prev != NULL) {
460 aio_workq_t queue;
461
462 /* Will have to check again under the lock */
463 queue = aio_entry_workq(entryp);
464 aio_workq_lock_spin(queue);
465 if (entryp->aio_workq_link.tqe_prev != NULL) {
466 aio_workq_remove_entry_locked(queue, entryp);
467 aio_workq_unlock(queue);
468 return 1;
0a7de745 469 } else {
b0d623f7
A
470 aio_workq_unlock(queue);
471 }
472 }
55e303ae 473
b0d623f7
A
474 return 0;
475}
476
0a7de745 477static void
b0d623f7
A
478aio_workq_lock_spin(aio_workq_t wq)
479{
480 lck_mtx_lock_spin(aio_workq_mutex(wq));
481}
55e303ae 482
0a7de745 483static void
b0d623f7
A
484aio_workq_unlock(aio_workq_t wq)
485{
486 lck_mtx_unlock(aio_workq_mutex(wq));
487}
55e303ae 488
b0d623f7
A
489static lck_mtx_t*
490aio_workq_mutex(aio_workq_t wq)
491{
492 return &wq->aioq_mtx;
493}
55e303ae
A
494
495/*
496 * aio_cancel - attempt to cancel one or more async IO requests currently
0a7de745 497 * outstanding against file descriptor uap->fd. If uap->aiocbp is not
55e303ae
A
498 * NULL then only one specific IO is cancelled (if possible). If uap->aiocbp
499 * is NULL then all outstanding async IO request for the given file
500 * descriptor are cancelled (if possible).
501 */
55e303ae 502int
2d21ac55 503aio_cancel(proc_t p, struct aio_cancel_args *uap, int *retval )
55e303ae 504{
0a7de745
A
505 struct user_aiocb my_aiocb;
506 int result;
55e303ae 507
0a7de745
A
508 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel)) | DBG_FUNC_START,
509 (int)p, (int)uap->aiocbp, 0, 0, 0 );
55e303ae
A
510
511 /* quick check to see if there are any async IO requests queued up */
b0d623f7 512 if (aio_get_all_queues_count() < 1) {
2d21ac55
A
513 result = 0;
514 *retval = AIO_ALLDONE;
55e303ae
A
515 goto ExitRoutine;
516 }
0a7de745
A
517
518 *retval = -1;
519 if (uap->aiocbp != USER_ADDR_NULL) {
520 if (proc_is64bit(p)) {
b0d623f7 521 struct user64_aiocb aiocb64;
b0d623f7 522
0a7de745
A
523 result = copyin( uap->aiocbp, &aiocb64, sizeof(aiocb64));
524 if (result == 0) {
525 do_munge_aiocb_user64_to_user(&aiocb64, &my_aiocb);
526 }
b0d623f7
A
527 } else {
528 struct user32_aiocb aiocb32;
91447636 529
0a7de745
A
530 result = copyin( uap->aiocbp, &aiocb32, sizeof(aiocb32));
531 if (result == 0) {
b0d623f7 532 do_munge_aiocb_user32_to_user( &aiocb32, &my_aiocb );
0a7de745 533 }
b0d623f7 534 }
91447636 535
0a7de745
A
536 if (result != 0) {
537 result = EAGAIN;
55e303ae
A
538 goto ExitRoutine;
539 }
540
541 /* NOTE - POSIX standard says a mismatch between the file */
542 /* descriptor passed in and the file descriptor embedded in */
543 /* the aiocb causes unspecified results. We return EBADF in */
544 /* that situation. */
0a7de745 545 if (uap->fd != my_aiocb.aio_fildes) {
55e303ae
A
546 result = EBADF;
547 goto ExitRoutine;
548 }
549 }
b0d623f7
A
550
551 aio_proc_lock(p);
552 result = do_aio_cancel_locked( p, uap->fd, uap->aiocbp, 0, FALSE );
553 ASSERT_AIO_PROC_LOCK_OWNED(p);
554 aio_proc_unlock(p);
55e303ae 555
0a7de745 556 if (result != -1) {
55e303ae
A
557 *retval = result;
558 result = 0;
559 goto ExitRoutine;
560 }
0a7de745 561
55e303ae 562 result = EBADF;
55e303ae 563
0a7de745
A
564ExitRoutine:
565 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel)) | DBG_FUNC_END,
566 (int)p, (int)uap->aiocbp, result, 0, 0 );
55e303ae 567
0a7de745 568 return result;
55e303ae
A
569} /* aio_cancel */
570
571
572/*
0a7de745
A
573 * _aio_close - internal function used to clean up async IO requests for
574 * a file descriptor that is closing.
55e303ae
A
575 * THIS MAY BLOCK.
576 */
55e303ae 577__private_extern__ void
2d21ac55 578_aio_close(proc_t p, int fd )
55e303ae 579{
0a7de745 580 int error;
55e303ae
A
581
582 /* quick check to see if there are any async IO requests queued up */
b0d623f7 583 if (aio_get_all_queues_count() < 1) {
55e303ae 584 return;
b0d623f7 585 }
55e303ae 586
0a7de745
A
587 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_close)) | DBG_FUNC_START,
588 (int)p, fd, 0, 0, 0 );
589
55e303ae 590 /* cancel all async IO requests on our todo queues for this file descriptor */
b0d623f7
A
591 aio_proc_lock(p);
592 error = do_aio_cancel_locked( p, fd, 0, AIO_CLOSE_WAIT, FALSE );
593 ASSERT_AIO_PROC_LOCK_OWNED(p);
0a7de745
A
594 if (error == AIO_NOTCANCELED) {
595 /*
596 * AIO_NOTCANCELED is returned when we find an aio request for this process
597 * and file descriptor on the active async IO queue. Active requests cannot
598 * be cancelled so we must wait for them to complete. We will get a special
599 * wake up call on our channel used to sleep for ALL active requests to
600 * complete. This sleep channel (proc.AIO_CLEANUP_SLEEP_CHAN) is only used
601 * when we must wait for all active aio requests.
55e303ae
A
602 */
603
0a7de745
A
604 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_close_sleep)) | DBG_FUNC_NONE,
605 (int)p, fd, 0, 0, 0 );
55e303ae 606
b0d623f7 607 while (aio_proc_active_requests_for_file(p, fd) > 0) {
39236c6e 608 msleep(&p->AIO_CLEANUP_SLEEP_CHAN, aio_proc_mutex(p), PRIBIO, "aio_close", 0 );
b0d623f7 609 }
55e303ae 610 }
0a7de745 611
39236c6e 612 aio_proc_unlock(p);
0a7de745
A
613
614 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_close)) | DBG_FUNC_END,
615 (int)p, fd, 0, 0, 0 );
55e303ae
A
616
617 return;
55e303ae
A
618} /* _aio_close */
619
620
621/*
622 * aio_error - return the error status associated with the async IO
623 * request referred to by uap->aiocbp. The error status is the errno
624 * value that would be set by the corresponding IO request (read, wrtie,
625 * fdatasync, or sync).
626 */
55e303ae 627int
2d21ac55 628aio_error(proc_t p, struct aio_error_args *uap, int *retval )
55e303ae 629{
0a7de745
A
630 aio_workq_entry *entryp;
631 int error;
55e303ae 632
0a7de745
A
633 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_error)) | DBG_FUNC_START,
634 (int)p, (int)uap->aiocbp, 0, 0, 0 );
55e303ae 635
b0d623f7
A
636 /* see if there are any aios to check */
637 if (aio_get_all_queues_count() < 1) {
638 return EINVAL;
55e303ae 639 }
0a7de745 640
b0d623f7 641 aio_proc_lock(p);
0a7de745 642
55e303ae 643 /* look for a match on our queue of async IO requests that have completed */
b0d623f7 644 TAILQ_FOREACH( entryp, &p->p_aio_doneq, aio_proc_link) {
0a7de745 645 if (entryp->uaiocbp == uap->aiocbp) {
b0d623f7
A
646 ASSERT_AIO_FROM_PROC(entryp, p);
647
648 aio_entry_lock_spin(entryp);
55e303ae
A
649 *retval = entryp->errorval;
650 error = 0;
b0d623f7 651 aio_entry_unlock(entryp);
0a7de745
A
652 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_error_val)) | DBG_FUNC_NONE,
653 (int)p, (int)uap->aiocbp, *retval, 0, 0 );
55e303ae
A
654 goto ExitRoutine;
655 }
656 }
0a7de745 657
55e303ae 658 /* look for a match on our queue of active async IO requests */
b0d623f7 659 TAILQ_FOREACH( entryp, &p->p_aio_activeq, aio_proc_link) {
0a7de745 660 if (entryp->uaiocbp == uap->aiocbp) {
b0d623f7 661 ASSERT_AIO_FROM_PROC(entryp, p);
55e303ae
A
662 *retval = EINPROGRESS;
663 error = 0;
0a7de745
A
664 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_error_activeq)) | DBG_FUNC_NONE,
665 (int)p, (int)uap->aiocbp, *retval, 0, 0 );
55e303ae
A
666 goto ExitRoutine;
667 }
668 }
b0d623f7 669
55e303ae 670 error = EINVAL;
0a7de745 671
55e303ae 672ExitRoutine:
0a7de745
A
673 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_error)) | DBG_FUNC_END,
674 (int)p, (int)uap->aiocbp, error, 0, 0 );
b0d623f7 675 aio_proc_unlock(p);
55e303ae 676
0a7de745 677 return error;
55e303ae
A
678} /* aio_error */
679
680
681/*
0a7de745
A
682 * aio_fsync - asynchronously force all IO operations associated
683 * with the file indicated by the file descriptor (uap->aiocbp->aio_fildes) and
55e303ae 684 * queued at the time of the call to the synchronized completion state.
0a7de745 685 * NOTE - we do not support op O_DSYNC at this point since we do not support the
55e303ae
A
686 * fdatasync() call.
687 */
55e303ae 688int
2d21ac55 689aio_fsync(proc_t p, struct aio_fsync_args *uap, int *retval )
55e303ae 690{
0a7de745
A
691 int error;
692 int fsync_kind;
55e303ae 693
0a7de745
A
694 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_fsync)) | DBG_FUNC_START,
695 (int)p, (int)uap->aiocbp, uap->op, 0, 0 );
55e303ae
A
696
697 *retval = 0;
91447636 698 /* 0 := O_SYNC for binary backward compatibility with Panther */
0a7de745 699 if (uap->op == O_SYNC || uap->op == 0) {
55e303ae 700 fsync_kind = AIO_FSYNC;
0a7de745 701 } else if (uap->op == O_DSYNC) {
55e303ae 702 fsync_kind = AIO_DSYNC;
0a7de745 703 } else {
55e303ae
A
704 *retval = -1;
705 error = EINVAL;
706 goto ExitRoutine;
707 }
0a7de745 708
55e303ae 709 error = aio_queue_async_request( p, uap->aiocbp, fsync_kind );
0a7de745 710 if (error != 0) {
55e303ae 711 *retval = -1;
0a7de745 712 }
55e303ae 713
0a7de745
A
714ExitRoutine:
715 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_fsync)) | DBG_FUNC_END,
716 (int)p, (int)uap->aiocbp, error, 0, 0 );
55e303ae 717
0a7de745 718 return error;
55e303ae
A
719} /* aio_fsync */
720
721
0a7de745
A
722/* aio_read - asynchronously read uap->aiocbp->aio_nbytes bytes from the
723 * file descriptor (uap->aiocbp->aio_fildes) into the buffer
55e303ae
A
724 * (uap->aiocbp->aio_buf).
725 */
55e303ae 726int
2d21ac55 727aio_read(proc_t p, struct aio_read_args *uap, int *retval )
55e303ae 728{
0a7de745
A
729 int error;
730
731 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_read)) | DBG_FUNC_START,
732 (int)p, (int)uap->aiocbp, 0, 0, 0 );
55e303ae 733
55e303ae
A
734 *retval = 0;
735
736 error = aio_queue_async_request( p, uap->aiocbp, AIO_READ );
0a7de745 737 if (error != 0) {
55e303ae 738 *retval = -1;
0a7de745 739 }
55e303ae 740
0a7de745
A
741 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_read)) | DBG_FUNC_END,
742 (int)p, (int)uap->aiocbp, error, 0, 0 );
55e303ae 743
0a7de745 744 return error;
55e303ae
A
745} /* aio_read */
746
747
748/*
749 * aio_return - return the return status associated with the async IO
750 * request referred to by uap->aiocbp. The return status is the value
b0d623f7 751 * that would be returned by corresponding IO request (read, write,
0a7de745 752 * fdatasync, or sync). This is where we release kernel resources
55e303ae
A
753 * held for async IO call associated with the given aiocb pointer.
754 */
55e303ae 755int
2d21ac55 756aio_return(proc_t p, struct aio_return_args *uap, user_ssize_t *retval )
55e303ae 757{
0a7de745
A
758 aio_workq_entry *entryp;
759 int error;
760 boolean_t proc_lock_held = FALSE;
761
762 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_return)) | DBG_FUNC_START,
763 (int)p, (int)uap->aiocbp, 0, 0, 0 );
55e303ae 764
b0d623f7
A
765 /* See if there are any entries to check */
766 if (aio_get_all_queues_count() < 1) {
55e303ae
A
767 error = EINVAL;
768 goto ExitRoutine;
769 }
770
b0d623f7
A
771 aio_proc_lock(p);
772 proc_lock_held = TRUE;
773 *retval = 0;
774
55e303ae 775 /* look for a match on our queue of async IO requests that have completed */
b0d623f7
A
776 TAILQ_FOREACH( entryp, &p->p_aio_doneq, aio_proc_link) {
777 ASSERT_AIO_FROM_PROC(entryp, p);
0a7de745 778 if (entryp->uaiocbp == uap->aiocbp) {
b0d623f7
A
779 /* Done and valid for aio_return(), pull it off the list */
780 aio_proc_remove_done_locked(p, entryp);
0a7de745 781
b0d623f7
A
782 /* Drop the proc lock, but keep the entry locked */
783 aio_entry_lock(entryp);
784 aio_proc_unlock(p);
785 proc_lock_held = FALSE;
786
55e303ae 787 *retval = entryp->returnval;
b0d623f7 788 error = 0;
55e303ae 789
b0d623f7
A
790 /* No references and off all lists, safe to free */
791 if (entryp->aio_refcount == 0) {
792 aio_entry_unlock(entryp);
793 aio_free_request(entryp);
0a7de745 794 } else {
b0d623f7 795 /* Whoever has the refcount will have to free it */
55e303ae 796 entryp->flags |= AIO_DO_FREE;
b0d623f7
A
797 aio_entry_unlock(entryp);
798 }
799
800
0a7de745
A
801 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_return_val)) | DBG_FUNC_NONE,
802 (int)p, (int)uap->aiocbp, *retval, 0, 0 );
55e303ae
A
803 goto ExitRoutine;
804 }
805 }
0a7de745 806
55e303ae 807 /* look for a match on our queue of active async IO requests */
b0d623f7
A
808 TAILQ_FOREACH( entryp, &p->p_aio_activeq, aio_proc_link) {
809 ASSERT_AIO_FROM_PROC(entryp, p);
0a7de745 810 if (entryp->uaiocbp == uap->aiocbp) {
55e303ae 811 error = EINPROGRESS;
0a7de745
A
812 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_return_activeq)) | DBG_FUNC_NONE,
813 (int)p, (int)uap->aiocbp, *retval, 0, 0 );
55e303ae
A
814 goto ExitRoutine;
815 }
816 }
0a7de745 817
55e303ae 818 error = EINVAL;
0a7de745 819
55e303ae 820ExitRoutine:
0a7de745 821 if (proc_lock_held) {
b0d623f7 822 aio_proc_unlock(p);
0a7de745
A
823 }
824 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_return)) | DBG_FUNC_END,
825 (int)p, (int)uap->aiocbp, error, 0, 0 );
55e303ae 826
0a7de745 827 return error;
55e303ae
A
828} /* aio_return */
829
830
831/*
0a7de745
A
832 * _aio_exec - internal function used to clean up async IO requests for
833 * a process that is going away due to exec(). We cancel any async IOs
55e303ae 834 * we can and wait for those already active. We also disable signaling
0a7de745 835 * for cancelled or active aio requests that complete.
55e303ae
A
836 * This routine MAY block!
837 */
55e303ae 838__private_extern__ void
2d21ac55 839_aio_exec(proc_t p )
55e303ae 840{
0a7de745
A
841 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_exec)) | DBG_FUNC_START,
842 (int)p, 0, 0, 0, 0 );
55e303ae
A
843
844 _aio_exit( p );
845
0a7de745
A
846 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_exec)) | DBG_FUNC_END,
847 (int)p, 0, 0, 0, 0 );
55e303ae
A
848
849 return;
55e303ae
A
850} /* _aio_exec */
851
852
853/*
0a7de745
A
854 * _aio_exit - internal function used to clean up async IO requests for
855 * a process that is terminating (via exit() or exec() ). We cancel any async IOs
55e303ae
A
856 * we can and wait for those already active. We also disable signaling
857 * for cancelled or active aio requests that complete. This routine MAY block!
55e303ae 858 */
55e303ae 859__private_extern__ void
2d21ac55 860_aio_exit(proc_t p )
55e303ae 861{
0a7de745
A
862 int error;
863 aio_workq_entry *entryp;
55e303ae 864
b0d623f7 865
55e303ae 866 /* quick check to see if there are any async IO requests queued up */
b0d623f7 867 if (aio_get_all_queues_count() < 1) {
55e303ae
A
868 return;
869 }
870
0a7de745
A
871 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_exit)) | DBG_FUNC_START,
872 (int)p, 0, 0, 0, 0 );
55e303ae 873
b0d623f7
A
874 aio_proc_lock(p);
875
0a7de745
A
876 /*
877 * cancel async IO requests on the todo work queue and wait for those
878 * already active to complete.
55e303ae 879 */
b0d623f7
A
880 error = do_aio_cancel_locked( p, 0, 0, AIO_EXIT_WAIT, TRUE );
881 ASSERT_AIO_PROC_LOCK_OWNED(p);
0a7de745
A
882 if (error == AIO_NOTCANCELED) {
883 /*
884 * AIO_NOTCANCELED is returned when we find an aio request for this process
885 * on the active async IO queue. Active requests cannot be cancelled so we
886 * must wait for them to complete. We will get a special wake up call on
887 * our channel used to sleep for ALL active requests to complete. This sleep
888 * channel (proc.AIO_CLEANUP_SLEEP_CHAN) is only used when we must wait for all
889 * active aio requests.
55e303ae
A
890 */
891
0a7de745
A
892 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_exit_sleep)) | DBG_FUNC_NONE,
893 (int)p, 0, 0, 0, 0 );
55e303ae 894
b0d623f7
A
895 while (p->p_aio_active_count != 0) {
896 msleep(&p->AIO_CLEANUP_SLEEP_CHAN, aio_proc_mutex(p), PRIBIO, "aio_exit", 0 );
897 }
898 }
0a7de745 899
b0d623f7
A
900 if (p->p_aio_active_count != 0) {
901 panic("Exiting process has %d active AIOs after cancellation has completed.\n", p->p_aio_active_count);
55e303ae 902 }
0a7de745 903
55e303ae 904 /* release all aio resources used by this process */
b0d623f7 905 entryp = TAILQ_FIRST( &p->p_aio_doneq );
0a7de745 906 while (entryp != NULL) {
b0d623f7 907 ASSERT_AIO_FROM_PROC(entryp, p);
0a7de745
A
908 aio_workq_entry *next_entryp;
909
b0d623f7
A
910 next_entryp = TAILQ_NEXT( entryp, aio_proc_link);
911 aio_proc_remove_done_locked(p, entryp);
0a7de745 912
55e303ae 913 /* we cannot free requests that are still completing */
b0d623f7
A
914 aio_entry_lock_spin(entryp);
915 if (entryp->aio_refcount == 0) {
916 aio_proc_unlock(p);
917 aio_entry_unlock(entryp);
918 aio_free_request(entryp);
55e303ae
A
919
920 /* need to start over since aio_doneq may have been */
921 /* changed while we were away. */
b0d623f7
A
922 aio_proc_lock(p);
923 entryp = TAILQ_FIRST( &p->p_aio_doneq );
55e303ae 924 continue;
0a7de745 925 } else {
b0d623f7 926 /* whoever has the reference will have to do the free */
55e303ae 927 entryp->flags |= AIO_DO_FREE;
0a7de745 928 }
b0d623f7
A
929
930 aio_entry_unlock(entryp);
55e303ae
A
931 entryp = next_entryp;
932 }
0a7de745 933
b0d623f7 934 aio_proc_unlock(p);
0a7de745
A
935
936 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_exit)) | DBG_FUNC_END,
937 (int)p, 0, 0, 0, 0 );
55e303ae 938 return;
55e303ae
A
939} /* _aio_exit */
940
941
b0d623f7 942static boolean_t
0a7de745 943should_cancel(aio_workq_entry *entryp, user_addr_t aiocbp, int fd)
b0d623f7 944{
0a7de745
A
945 if ((aiocbp == USER_ADDR_NULL && fd == 0) ||
946 (aiocbp != USER_ADDR_NULL && entryp->uaiocbp == aiocbp) ||
947 (aiocbp == USER_ADDR_NULL && fd == entryp->aiocb.aio_fildes)) {
b0d623f7
A
948 return TRUE;
949 }
950
951 return FALSE;
952}
953
55e303ae 954/*
b0d623f7 955 * do_aio_cancel_locked - cancel async IO requests (if possible). We get called by
0a7de745
A
956 * aio_cancel, close, and at exit.
957 * There are three modes of operation: 1) cancel all async IOs for a process -
958 * fd is 0 and aiocbp is NULL 2) cancel all async IOs for file descriptor - fd
55e303ae
A
959 * is > 0 and aiocbp is NULL 3) cancel one async IO associated with the given
960 * aiocbp.
0a7de745
A
961 * Returns -1 if no matches were found, AIO_CANCELED when we cancelled all
962 * target async IO requests, AIO_NOTCANCELED if we could not cancel all
963 * target async IO requests, and AIO_ALLDONE if all target async IO requests
55e303ae 964 * were already complete.
0a7de745 965 * WARNING - do not deference aiocbp in this routine, it may point to user
55e303ae 966 * land data that has not been copied in (when called from aio_cancel() )
b0d623f7
A
967 *
968 * Called with proc locked, and returns the same way.
55e303ae 969 */
55e303ae 970static int
0a7de745
A
971do_aio_cancel_locked(proc_t p, int fd, user_addr_t aiocbp,
972 int wait_for_completion, boolean_t disable_notification )
55e303ae 973{
b0d623f7
A
974 ASSERT_AIO_PROC_LOCK_OWNED(p);
975
0a7de745
A
976 aio_workq_entry *entryp;
977 int result;
55e303ae
A
978
979 result = -1;
0a7de745 980
55e303ae 981 /* look for a match on our queue of async todo work. */
b0d623f7 982 entryp = TAILQ_FIRST(&p->p_aio_activeq);
0a7de745 983 while (entryp != NULL) {
b0d623f7 984 ASSERT_AIO_FROM_PROC(entryp, p);
0a7de745 985 aio_workq_entry *next_entryp;
55e303ae 986
b0d623f7
A
987 next_entryp = TAILQ_NEXT( entryp, aio_proc_link);
988 if (!should_cancel(entryp, aiocbp, fd)) {
989 entryp = next_entryp;
990 continue;
55e303ae 991 }
b0d623f7
A
992
993 /* Can only be cancelled if it's still on a work queue */
994 if (aio_entry_try_workq_remove(entryp) != 0) {
995 /* Have removed from workq. Update entry state and take a ref */
996 aio_entry_update_for_cancel(entryp, TRUE, 0, disable_notification);
997
998 /* Put on the proc done queue and update counts, then unlock the proc */
999 aio_proc_move_done_locked(p, entryp);
1000 aio_proc_unlock(p);
1001
1002 /* Now it's officially cancelled. Do the completion */
1003 result = AIO_CANCELED;
0a7de745
A
1004 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_async_workq)) | DBG_FUNC_NONE,
1005 (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 );
b0d623f7
A
1006 do_aio_completion(entryp);
1007
1008 /* This will free if the aio_return() has already happened ... */
1009 aio_entry_unref(entryp);
1010 aio_proc_lock(p);
1011
0a7de745
A
1012 if (aiocbp != USER_ADDR_NULL) {
1013 return result;
55e303ae 1014 }
55e303ae 1015
0a7de745
A
1016 /*
1017 * Restart from the head of the proc active queue since it
1018 * may have been changed while we were away doing completion
1019 * processing.
1020 *
b0d623f7
A
1021 * Note that if we found an uncancellable AIO before, we will
1022 * either find it again or discover that it's been completed,
1023 * so resetting the result will not cause us to return success
1024 * despite outstanding AIOs.
1025 */
1026 entryp = TAILQ_FIRST(&p->p_aio_activeq);
1027 result = -1; /* As if beginning anew */
1028 } else {
0a7de745 1029 /*
b0d623f7
A
1030 * It's been taken off the active queue already, i.e. is in flight.
1031 * All we can do is ask for notification.
1032 */
55e303ae
A
1033 result = AIO_NOTCANCELED;
1034
0a7de745
A
1035 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_activeq)) | DBG_FUNC_NONE,
1036 (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 );
b0d623f7
A
1037
1038 /* Mark for waiting and such; will not take a ref if "cancelled" arg is FALSE */
1039 aio_entry_update_for_cancel(entryp, FALSE, wait_for_completion, disable_notification);
55e303ae 1040
0a7de745
A
1041 if (aiocbp != USER_ADDR_NULL) {
1042 return result;
55e303ae 1043 }
b0d623f7 1044 entryp = next_entryp;
55e303ae 1045 }
b0d623f7 1046 } /* while... */
0a7de745
A
1047
1048 /*
1049 * if we didn't find any matches on the todo or active queues then look for a
1050 * match on our queue of async IO requests that have completed and if found
1051 * return AIO_ALLDONE result.
b0d623f7
A
1052 *
1053 * Proc AIO lock is still held.
55e303ae 1054 */
0a7de745 1055 if (result == -1) {
b0d623f7
A
1056 TAILQ_FOREACH(entryp, &p->p_aio_doneq, aio_proc_link) {
1057 ASSERT_AIO_FROM_PROC(entryp, p);
1058 if (should_cancel(entryp, aiocbp, fd)) {
55e303ae 1059 result = AIO_ALLDONE;
0a7de745
A
1060 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_cancel_doneq)) | DBG_FUNC_NONE,
1061 (int)entryp->procp, (int)entryp->uaiocbp, fd, 0, 0 );
55e303ae 1062
0a7de745
A
1063 if (aiocbp != USER_ADDR_NULL) {
1064 return result;
55e303ae
A
1065 }
1066 }
1067 }
1068 }
55e303ae 1069
0a7de745 1070 return result;
b0d623f7 1071}
0a7de745 1072/* do_aio_cancel_locked */
55e303ae
A
1073
1074
1075/*
1076 * aio_suspend - suspend the calling thread until at least one of the async
1077 * IO operations referenced by uap->aiocblist has completed, until a signal
1078 * interrupts the function, or uap->timeoutp time interval (optional) has
1079 * passed.
1080 * Returns 0 if one or more async IOs have completed else -1 and errno is
1081 * set appropriately - EAGAIN if timeout elapses or EINTR if an interrupt
1082 * woke us up.
1083 */
2d21ac55
A
1084int
1085aio_suspend(proc_t p, struct aio_suspend_args *uap, int *retval )
1086{
1087 __pthread_testcancel(1);
0a7de745 1088 return aio_suspend_nocancel(p, (struct aio_suspend_nocancel_args *)uap, retval);
2d21ac55
A
1089}
1090
55e303ae
A
1091
1092int
2d21ac55 1093aio_suspend_nocancel(proc_t p, struct aio_suspend_nocancel_args *uap, int *retval )
55e303ae 1094{
0a7de745
A
1095 int error;
1096 int i, count;
1097 uint64_t abstime;
91447636 1098 struct user_timespec ts;
0a7de745
A
1099 aio_workq_entry *entryp;
1100 user_addr_t *aiocbpp;
1101
1102 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_suspend)) | DBG_FUNC_START,
1103 (int)p, uap->nent, 0, 0, 0 );
55e303ae
A
1104
1105 *retval = -1;
1106 abstime = 0;
1107 aiocbpp = NULL;
1108
0a7de745
A
1109 count = aio_get_all_queues_count();
1110 if (count < 1) {
55e303ae
A
1111 error = EINVAL;
1112 goto ExitThisRoutine;
1113 }
1114
0a7de745 1115 if (uap->nent < 1 || uap->nent > aio_max_requests_per_process) {
55e303ae
A
1116 error = EINVAL;
1117 goto ExitThisRoutine;
1118 }
1119
0a7de745
A
1120 if (uap->timeoutp != USER_ADDR_NULL) {
1121 if (proc_is64bit(p)) {
b0d623f7 1122 struct user64_timespec temp;
0a7de745
A
1123 error = copyin( uap->timeoutp, &temp, sizeof(temp));
1124 if (error == 0) {
b0d623f7
A
1125 ts.tv_sec = temp.tv_sec;
1126 ts.tv_nsec = temp.tv_nsec;
1127 }
0a7de745 1128 } else {
b0d623f7 1129 struct user32_timespec temp;
0a7de745
A
1130 error = copyin( uap->timeoutp, &temp, sizeof(temp));
1131 if (error == 0) {
91447636
A
1132 ts.tv_sec = temp.tv_sec;
1133 ts.tv_nsec = temp.tv_nsec;
1134 }
1135 }
0a7de745 1136 if (error != 0) {
55e303ae
A
1137 error = EAGAIN;
1138 goto ExitThisRoutine;
1139 }
0a7de745
A
1140
1141 if (ts.tv_sec < 0 || ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000) {
55e303ae
A
1142 error = EINVAL;
1143 goto ExitThisRoutine;
1144 }
1145
0a7de745
A
1146 nanoseconds_to_absolutetime((uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec,
1147 &abstime );
55e303ae
A
1148 clock_absolutetime_interval_to_deadline( abstime, &abstime );
1149 }
1150
b0d623f7 1151 aiocbpp = aio_copy_in_list(p, uap->aiocblist, uap->nent);
0a7de745 1152 if (aiocbpp == NULL) {
55e303ae
A
1153 error = EAGAIN;
1154 goto ExitThisRoutine;
1155 }
1156
91447636 1157 /* check list of aio requests to see if any have completed */
2d21ac55 1158check_for_our_aiocbp:
b0d623f7 1159 aio_proc_lock_spin(p);
0a7de745
A
1160 for (i = 0; i < uap->nent; i++) {
1161 user_addr_t aiocbp;
91447636 1162
55e303ae
A
1163 /* NULL elements are legal so check for 'em */
1164 aiocbp = *(aiocbpp + i);
0a7de745 1165 if (aiocbp == USER_ADDR_NULL) {
55e303ae 1166 continue;
0a7de745
A
1167 }
1168
55e303ae 1169 /* return immediately if any aio request in the list is done */
b0d623f7
A
1170 TAILQ_FOREACH( entryp, &p->p_aio_doneq, aio_proc_link) {
1171 ASSERT_AIO_FROM_PROC(entryp, p);
0a7de745 1172 if (entryp->uaiocbp == aiocbp) {
b0d623f7 1173 aio_proc_unlock(p);
55e303ae
A
1174 *retval = 0;
1175 error = 0;
55e303ae
A
1176 goto ExitThisRoutine;
1177 }
1178 }
55e303ae
A
1179 } /* for ( ; i < uap->nent; ) */
1180
0a7de745
A
1181 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_suspend_sleep)) | DBG_FUNC_NONE,
1182 (int)p, uap->nent, 0, 0, 0 );
1183
1184 /*
1185 * wait for an async IO to complete or a signal fires or timeout expires.
1186 * we return EAGAIN (35) for timeout expiration and EINTR (4) when a signal
1187 * interrupts us. If an async IO completes before a signal fires or our
91447636 1188 * timeout expires, we get a wakeup call from aio_work_thread().
55e303ae 1189 */
91447636 1190
b0d623f7 1191 error = msleep1(&p->AIO_SUSPEND_SLEEP_CHAN, aio_proc_mutex(p), PCATCH | PWAIT | PDROP, "aio_suspend", abstime); /* XXX better priority? */
0a7de745
A
1192 if (error == 0) {
1193 /*
2d21ac55 1194 * got our wakeup call from aio_work_thread().
0a7de745
A
1195 * Since we can get a wakeup on this channel from another thread in the
1196 * same process we head back up to make sure this is for the correct aiocbp.
1197 * If it is the correct aiocbp we will return from where we do the check
2d21ac55 1198 * (see entryp->uaiocbp == aiocbp after check_for_our_aiocbp label)
0a7de745 1199 * else we will fall out and just sleep again.
2d21ac55
A
1200 */
1201 goto check_for_our_aiocbp;
0a7de745 1202 } else if (error == EWOULDBLOCK) {
55e303ae
A
1203 /* our timeout expired */
1204 error = EAGAIN;
0a7de745 1205 } else {
55e303ae 1206 /* we were interrupted */
55e303ae
A
1207 error = EINTR;
1208 }
1209
1210ExitThisRoutine:
0a7de745 1211 if (aiocbpp != NULL) {
55e303ae 1212 FREE( aiocbpp, M_TEMP );
0a7de745 1213 }
55e303ae 1214
0a7de745
A
1215 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_suspend)) | DBG_FUNC_END,
1216 (int)p, uap->nent, error, 0, 0 );
55e303ae 1217
0a7de745 1218 return error;
55e303ae
A
1219} /* aio_suspend */
1220
1221
0a7de745
A
1222/* aio_write - asynchronously write uap->aiocbp->aio_nbytes bytes to the
1223 * file descriptor (uap->aiocbp->aio_fildes) from the buffer
55e303ae
A
1224 * (uap->aiocbp->aio_buf).
1225 */
1226
1227int
2d21ac55 1228aio_write(proc_t p, struct aio_write_args *uap, int *retval )
55e303ae 1229{
0a7de745
A
1230 int error;
1231
55e303ae 1232 *retval = 0;
0a7de745
A
1233
1234 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_write)) | DBG_FUNC_START,
1235 (int)p, (int)uap->aiocbp, 0, 0, 0 );
55e303ae
A
1236
1237 error = aio_queue_async_request( p, uap->aiocbp, AIO_WRITE );
0a7de745 1238 if (error != 0) {
55e303ae 1239 *retval = -1;
0a7de745 1240 }
55e303ae 1241
0a7de745
A
1242 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_write)) | DBG_FUNC_END,
1243 (int)p, (int)uap->aiocbp, error, 0, 0 );
55e303ae 1244
0a7de745 1245 return error;
55e303ae
A
1246} /* aio_write */
1247
1248
b0d623f7
A
1249static user_addr_t *
1250aio_copy_in_list(proc_t procp, user_addr_t aiocblist, int nent)
55e303ae 1251{
0a7de745
A
1252 user_addr_t *aiocbpp;
1253 int i, result;
55e303ae 1254
b0d623f7
A
1255 /* we reserve enough space for largest possible pointer size */
1256 MALLOC( aiocbpp, user_addr_t *, (nent * sizeof(user_addr_t)), M_TEMP, M_WAITOK );
0a7de745 1257 if (aiocbpp == NULL) {
b0d623f7 1258 goto err;
0a7de745 1259 }
b0d623f7
A
1260
1261 /* copyin our aiocb pointers from list */
0a7de745
A
1262 result = copyin( aiocblist, aiocbpp,
1263 proc_is64bit(procp) ? (nent * sizeof(user64_addr_t))
1264 : (nent * sizeof(user32_addr_t)));
1265 if (result) {
b0d623f7
A
1266 FREE( aiocbpp, M_TEMP );
1267 aiocbpp = NULL;
1268 goto err;
1269 }
1270
1271 /*
1272 * We depend on a list of user_addr_t's so we need to
1273 * munge and expand when these pointers came from a
1274 * 32-bit process
1275 */
0a7de745 1276 if (!proc_is64bit(procp)) {
b0d623f7
A
1277 /* copy from last to first to deal with overlap */
1278 user32_addr_t *my_ptrp = ((user32_addr_t *)aiocbpp) + (nent - 1);
1279 user_addr_t *my_addrp = aiocbpp + (nent - 1);
1280
1281 for (i = 0; i < nent; i++, my_ptrp--, my_addrp--) {
1282 *my_addrp = (user_addr_t) (*my_ptrp);
1283 }
1284 }
1285
1286err:
0a7de745 1287 return aiocbpp;
b0d623f7
A
1288}
1289
1290
1291static int
1292aio_copy_in_sigev(proc_t procp, user_addr_t sigp, struct user_sigevent *sigev)
1293{
0a7de745 1294 int result = 0;
b0d623f7 1295
0a7de745 1296 if (sigp == USER_ADDR_NULL) {
b0d623f7 1297 goto out;
0a7de745 1298 }
b0d623f7
A
1299
1300 /*
1301 * We need to munge aio_sigevent since it contains pointers.
1302 * Since we do not know if sigev_value is an int or a ptr we do
1303 * NOT cast the ptr to a user_addr_t. This means if we send
1304 * this info back to user space we need to remember sigev_value
1305 * was not expanded for the 32-bit case.
1306 *
1307 * Notes: This does NOT affect us since we don't support
1308 * sigev_value yet in the aio context.
1309 */
0a7de745 1310 if (proc_is64bit(procp)) {
b0d623f7
A
1311 struct user64_sigevent sigevent64;
1312
0a7de745
A
1313 result = copyin( sigp, &sigevent64, sizeof(sigevent64));
1314 if (result == 0) {
b0d623f7
A
1315 sigev->sigev_notify = sigevent64.sigev_notify;
1316 sigev->sigev_signo = sigevent64.sigev_signo;
1317 sigev->sigev_value.size_equivalent.sival_int = sigevent64.sigev_value.size_equivalent.sival_int;
1318 sigev->sigev_notify_function = sigevent64.sigev_notify_function;
1319 sigev->sigev_notify_attributes = sigevent64.sigev_notify_attributes;
1320 }
b0d623f7
A
1321 } else {
1322 struct user32_sigevent sigevent32;
1323
0a7de745
A
1324 result = copyin( sigp, &sigevent32, sizeof(sigevent32));
1325 if (result == 0) {
b0d623f7
A
1326 sigev->sigev_notify = sigevent32.sigev_notify;
1327 sigev->sigev_signo = sigevent32.sigev_signo;
1328 sigev->sigev_value.size_equivalent.sival_int = sigevent32.sigev_value.sival_int;
1329 sigev->sigev_notify_function = CAST_USER_ADDR_T(sigevent32.sigev_notify_function);
1330 sigev->sigev_notify_attributes = CAST_USER_ADDR_T(sigevent32.sigev_notify_attributes);
1331 }
1332 }
1333
0a7de745 1334 if (result != 0) {
b0d623f7
A
1335 result = EAGAIN;
1336 }
1337
1338out:
0a7de745 1339 return result;
b0d623f7
A
1340}
1341
1342/*
1343 * aio_enqueue_work
1344 *
1345 * Queue up the entry on the aio asynchronous work queue in priority order
1346 * based on the relative priority of the request. We calculate the relative
1347 * priority using the nice value of the caller and the value
1348 *
1349 * Parameters: procp Process queueing the I/O
1350 * entryp The work queue entry being queued
1351 *
1352 * Returns: (void) No failure modes
1353 *
1354 * Notes: This function is used for both lio_listio and aio
1355 *
1356 * XXX: At some point, we may have to consider thread priority
1357 * rather than process priority, but we don't maintain the
1358 * adjusted priority for threads the POSIX way.
1359 *
1360 *
1361 * Called with proc locked.
1362 */
1363static void
1364aio_enqueue_work( proc_t procp, aio_workq_entry *entryp, int proc_locked)
1365{
1366#if 0
0a7de745 1367 aio_workq_entry *my_entryp; /* used for insertion sort */
b0d623f7
A
1368#endif /* 0 */
1369 aio_workq_t queue = aio_entry_workq(entryp);
1370
1371 if (proc_locked == 0) {
1372 aio_proc_lock(procp);
1373 }
1374
1375 ASSERT_AIO_PROC_LOCK_OWNED(procp);
1376
1377 /* Onto proc queue */
0a7de745 1378 TAILQ_INSERT_TAIL(&procp->p_aio_activeq, entryp, aio_proc_link);
b0d623f7
A
1379 procp->p_aio_active_count++;
1380 procp->p_aio_total_count++;
1381
1382 /* And work queue */
1383 aio_workq_lock_spin(queue);
1384 aio_workq_add_entry_locked(queue, entryp);
3e170ce0 1385 waitq_wakeup64_one(&queue->aioq_waitq, CAST_EVENT64_T(queue),
0a7de745 1386 THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
b0d623f7 1387 aio_workq_unlock(queue);
0a7de745 1388
b0d623f7
A
1389 if (proc_locked == 0) {
1390 aio_proc_unlock(procp);
1391 }
1392
1393#if 0
1394 /*
1395 * Procedure:
1396 *
1397 * (1) The nice value is in the range PRIO_MIN..PRIO_MAX [-20..20]
1398 * (2) The normalized nice value is in the range 0..((2 * NZERO) - 1)
1399 * which is [0..39], with 0 not being used. In nice values, the
1400 * lower the nice value, the higher the priority.
1401 * (3) The normalized scheduling prioritiy is the highest nice value
1402 * minus the current nice value. In I/O scheduling priority, the
1403 * higher the value the lower the priority, so it is the inverse
1404 * of the nice value (the higher the number, the higher the I/O
1405 * priority).
1406 * (4) From the normalized scheduling priority, we subtract the
1407 * request priority to get the request priority value number;
1408 * this means that requests are only capable of depressing their
1409 * priority relative to other requests,
1410 */
1411 entryp->priority = (((2 * NZERO) - 1) - procp->p_nice);
1412
1413 /* only premit depressing the priority */
0a7de745 1414 if (entryp->aiocb.aio_reqprio < 0) {
b0d623f7 1415 entryp->aiocb.aio_reqprio = 0;
0a7de745 1416 }
b0d623f7
A
1417 if (entryp->aiocb.aio_reqprio > 0) {
1418 entryp->priority -= entryp->aiocb.aio_reqprio;
0a7de745 1419 if (entryp->priority < 0) {
b0d623f7 1420 entryp->priority = 0;
0a7de745 1421 }
b0d623f7
A
1422 }
1423
1424 /* Insertion sort the entry; lowest ->priority to highest */
1425 TAILQ_FOREACH(my_entryp, &aio_anchor.aio_async_workq, aio_workq_link) {
0a7de745 1426 if (entryp->priority <= my_entryp->priority) {
b0d623f7
A
1427 TAILQ_INSERT_BEFORE(my_entryp, entryp, aio_workq_link);
1428 break;
1429 }
1430 }
0a7de745 1431 if (my_entryp == NULL) {
b0d623f7 1432 TAILQ_INSERT_TAIL( &aio_anchor.aio_async_workq, entryp, aio_workq_link );
0a7de745 1433 }
b0d623f7
A
1434#endif /* 0 */
1435}
1436
1437
1438/*
1439 * lio_listio - initiate a list of IO requests. We process the list of
1440 * aiocbs either synchronously (mode == LIO_WAIT) or asynchronously
1441 * (mode == LIO_NOWAIT).
1442 *
1443 * The caller gets error and return status for each aiocb in the list
1444 * via aio_error and aio_return. We must keep completed requests until
1445 * released by the aio_return call.
1446 */
1447int
1448lio_listio(proc_t p, struct lio_listio_args *uap, int *retval )
1449{
0a7de745
A
1450 int i;
1451 int call_result;
1452 int result;
1453 int old_count;
1454 aio_workq_entry **entryp_listp;
1455 user_addr_t *aiocbpp;
1456 struct user_sigevent aiosigev;
1457 aio_lio_context *lio_context;
1458 boolean_t free_context = FALSE;
1459 uint32_t *paio_offset;
1460 uint32_t *paio_nbytes;
1461
1462 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_listio)) | DBG_FUNC_START,
1463 (int)p, uap->nent, uap->mode, 0, 0 );
1464
55e303ae 1465 entryp_listp = NULL;
b0d623f7 1466 lio_context = NULL;
91447636 1467 aiocbpp = NULL;
55e303ae
A
1468 call_result = -1;
1469 *retval = -1;
0a7de745 1470 if (!(uap->mode == LIO_NOWAIT || uap->mode == LIO_WAIT)) {
55e303ae
A
1471 call_result = EINVAL;
1472 goto ExitRoutine;
1473 }
1474
0a7de745 1475 if (uap->nent < 1 || uap->nent > AIO_LISTIO_MAX) {
55e303ae
A
1476 call_result = EINVAL;
1477 goto ExitRoutine;
1478 }
0a7de745
A
1479
1480 /*
b0d623f7
A
1481 * allocate a list of aio_workq_entry pointers that we will use
1482 * to queue up all our requests at once while holding our lock.
55e303ae 1483 */
91447636 1484 MALLOC( entryp_listp, void *, (uap->nent * sizeof(aio_workq_entry *)), M_TEMP, M_WAITOK );
0a7de745 1485 if (entryp_listp == NULL) {
55e303ae
A
1486 call_result = EAGAIN;
1487 goto ExitRoutine;
1488 }
0a7de745 1489
b0d623f7 1490 MALLOC( lio_context, aio_lio_context*, sizeof(aio_lio_context), M_TEMP, M_WAITOK );
0a7de745 1491 if (lio_context == NULL) {
91447636
A
1492 call_result = EAGAIN;
1493 goto ExitRoutine;
1494 }
1495
0a7de745 1496#if DEBUG
b0d623f7
A
1497 OSIncrementAtomic(&lio_contexts_alloced);
1498#endif /* DEBUG */
1499
d9a64523 1500 free_context = TRUE;
b0d623f7 1501 bzero(lio_context, sizeof(aio_lio_context));
0a7de745 1502
b0d623f7 1503 aiocbpp = aio_copy_in_list(p, uap->aiocblist, uap->nent);
0a7de745 1504 if (aiocbpp == NULL) {
91447636
A
1505 call_result = EAGAIN;
1506 goto ExitRoutine;
1507 }
b0d623f7
A
1508
1509 /*
1510 * Use sigevent passed in to lio_listio for each of our calls, but
1511 * only do completion notification after the last request completes.
1512 */
1513 bzero(&aiosigev, sizeof(aiosigev));
1514 /* Only copy in an sigev if the user supplied one */
1515 if (uap->sigp != USER_ADDR_NULL) {
1516 call_result = aio_copy_in_sigev(p, uap->sigp, &aiosigev);
0a7de745 1517 if (call_result) {
b0d623f7 1518 goto ExitRoutine;
0a7de745 1519 }
91447636
A
1520 }
1521
55e303ae 1522 /* process list of aio requests */
d9a64523 1523 free_context = FALSE;
b0d623f7
A
1524 lio_context->io_issued = uap->nent;
1525 lio_context->io_waiter = uap->mode == LIO_WAIT ? 1 : 0; /* Should it be freed by last AIO */
0a7de745
A
1526 for (i = 0; i < uap->nent; i++) {
1527 user_addr_t my_aiocbp;
1528 aio_workq_entry *entryp;
1529
55e303ae 1530 *(entryp_listp + i) = NULL;
91447636 1531 my_aiocbp = *(aiocbpp + i);
0a7de745 1532
55e303ae 1533 /* NULL elements are legal so check for 'em */
0a7de745 1534 if (my_aiocbp == USER_ADDR_NULL) {
b0d623f7
A
1535 aio_proc_lock_spin(p);
1536 lio_context->io_issued--;
1537 aio_proc_unlock(p);
55e303ae 1538 continue;
b0d623f7 1539 }
55e303ae 1540
0a7de745 1541 /*
b0d623f7
A
1542 * We use lio_context to mark IO requests for delayed completion
1543 * processing which means we wait until all IO requests in the
1544 * group have completed before we either return to the caller
1545 * when mode is LIO_WAIT or signal user when mode is LIO_NOWAIT.
1546 *
1547 * We use the address of the lio_context for this, since it is
1548 * unique in the address space.
1549 */
0a7de745
A
1550 result = lio_create_entry( p, my_aiocbp, lio_context, (entryp_listp + i));
1551 if (result != 0 && call_result == -1) {
55e303ae 1552 call_result = result;
0a7de745
A
1553 }
1554
55e303ae
A
1555 /* NULL elements are legal so check for 'em */
1556 entryp = *(entryp_listp + i);
0a7de745 1557 if (entryp == NULL) {
b0d623f7
A
1558 aio_proc_lock_spin(p);
1559 lio_context->io_issued--;
1560 aio_proc_unlock(p);
55e303ae 1561 continue;
b0d623f7 1562 }
0a7de745
A
1563
1564 if (uap->mode == LIO_NOWAIT) {
b0d623f7
A
1565 /* Set signal hander, if any */
1566 entryp->aiocb.aio_sigevent = aiosigev;
1567 } else {
1568 /* flag that this thread blocks pending completion */
1569 entryp->flags |= AIO_LIO_NOTIFY;
1570 }
55e303ae
A
1571
1572 /* check our aio limits to throttle bad or rude user land behavior */
b0d623f7
A
1573 old_count = aio_increment_total_count();
1574
1575 aio_proc_lock_spin(p);
0a7de745
A
1576 if (old_count >= aio_max_requests ||
1577 aio_get_process_count( entryp->procp ) >= aio_max_requests_per_process ||
1578 is_already_queued( entryp->procp, entryp->uaiocbp ) == TRUE) {
b0d623f7
A
1579 lio_context->io_issued--;
1580 aio_proc_unlock(p);
0a7de745 1581
b0d623f7
A
1582 aio_decrement_total_count();
1583
0a7de745 1584 if (call_result == -1) {
b0d623f7 1585 call_result = EAGAIN;
0a7de745 1586 }
b0d623f7
A
1587 aio_free_request(entryp);
1588 entryp_listp[i] = NULL;
55e303ae
A
1589 continue;
1590 }
0a7de745 1591
b0d623f7
A
1592 lck_mtx_convert_spin(aio_proc_mutex(p));
1593 aio_enqueue_work(p, entryp, 1);
1594 aio_proc_unlock(p);
0a7de745
A
1595
1596 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_START,
1597 (int)p, (int)entryp->uaiocbp, entryp->flags, entryp->aiocb.aio_fildes, 0 );
1598 paio_offset = (uint32_t*) &entryp->aiocb.aio_offset;
1599 paio_nbytes = (uint32_t*) &entryp->aiocb.aio_nbytes;
1600 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_END,
1601 paio_offset[0], (sizeof(entryp->aiocb.aio_offset) == sizeof(uint64_t) ? paio_offset[1] : 0),
1602 paio_nbytes[0], (sizeof(entryp->aiocb.aio_nbytes) == sizeof(uint64_t) ? paio_nbytes[1] : 0),
1603 0 );
1604 }
1605
1606 switch (uap->mode) {
b0d623f7
A
1607 case LIO_WAIT:
1608 aio_proc_lock_spin(p);
1609 while (lio_context->io_completed < lio_context->io_issued) {
1610 result = msleep(lio_context, aio_proc_mutex(p), PCATCH | PRIBIO | PSPIN, "lio_listio", 0);
0a7de745 1611
b0d623f7
A
1612 /* If we were interrupted, fail out (even if all finished) */
1613 if (result != 0) {
1614 call_result = EINTR;
1615 lio_context->io_waiter = 0;
1616 break;
0a7de745 1617 }
b0d623f7
A
1618 }
1619
1620 /* If all IOs have finished must free it */
1621 if (lio_context->io_completed == lio_context->io_issued) {
1622 free_context = TRUE;
0a7de745 1623 }
55e303ae 1624
b0d623f7
A
1625 aio_proc_unlock(p);
1626 break;
0a7de745 1627
b0d623f7
A
1628 case LIO_NOWAIT:
1629 break;
1630 }
0a7de745 1631
55e303ae 1632 /* call_result == -1 means we had no trouble queueing up requests */
0a7de745 1633 if (call_result == -1) {
55e303ae
A
1634 call_result = 0;
1635 *retval = 0;
1636 }
1637
0a7de745
A
1638ExitRoutine:
1639 if (entryp_listp != NULL) {
55e303ae 1640 FREE( entryp_listp, M_TEMP );
0a7de745
A
1641 }
1642 if (aiocbpp != NULL) {
91447636 1643 FREE( aiocbpp, M_TEMP );
0a7de745 1644 }
d9a64523 1645 if (free_context) {
b0d623f7
A
1646 free_lio_context(lio_context);
1647 }
0a7de745
A
1648
1649 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_listio)) | DBG_FUNC_END,
1650 (int)p, call_result, 0, 0, 0 );
1651
1652 return call_result;
55e303ae
A
1653} /* lio_listio */
1654
1655
1656/*
1657 * aio worker thread. this is where all the real work gets done.
0a7de745 1658 * we get a wake up call on sleep channel &aio_anchor.aio_async_workq
55e303ae
A
1659 * after new work is queued up.
1660 */
39037602 1661__attribute__((noreturn))
55e303ae 1662static void
39037602 1663aio_work_thread(void)
55e303ae 1664{
0a7de745
A
1665 aio_workq_entry *entryp;
1666 int error;
1667 vm_map_t currentmap;
1668 vm_map_t oldmap = VM_MAP_NULL;
1669 task_t oldaiotask = TASK_NULL;
1670 struct uthread *uthreadp = NULL;
1671
1672 for (;;) {
1673 /*
b0d623f7 1674 * returns with the entry ref'ed.
0a7de745 1675 * sleeps until work is available.
b0d623f7 1676 */
0a7de745 1677 entryp = aio_get_some_work();
b0d623f7 1678
0a7de745
A
1679 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_thread)) | DBG_FUNC_START,
1680 (int)entryp->procp, (int)entryp->uaiocbp, entryp->flags, 0, 0 );
b0d623f7
A
1681
1682 /*
1683 * Assume the target's address space identity for the duration
1684 * of the IO. Note: don't need to have the entryp locked,
1685 * because the proc and map don't change until it's freed.
1686 */
0a7de745
A
1687 currentmap = get_task_map((current_proc())->task );
1688 if (currentmap != entryp->aio_map) {
b0d623f7
A
1689 uthreadp = (struct uthread *) get_bsdthread_info(current_thread());
1690 oldaiotask = uthreadp->uu_aio_task;
1691 uthreadp->uu_aio_task = entryp->procp->task;
1692 oldmap = vm_map_switch( entryp->aio_map );
1693 }
1694
0a7de745 1695 if ((entryp->flags & AIO_READ) != 0) {
b0d623f7 1696 error = do_aio_read( entryp );
0a7de745 1697 } else if ((entryp->flags & AIO_WRITE) != 0) {
b0d623f7 1698 error = do_aio_write( entryp );
0a7de745 1699 } else if ((entryp->flags & (AIO_FSYNC | AIO_DSYNC)) != 0) {
b0d623f7 1700 error = do_aio_fsync( entryp );
0a7de745
A
1701 } else {
1702 printf( "%s - unknown aio request - flags 0x%02X \n",
1703 __FUNCTION__, entryp->flags );
b0d623f7
A
1704 error = EINVAL;
1705 }
91447636 1706
b0d623f7 1707 /* Restore old map */
0a7de745 1708 if (currentmap != entryp->aio_map) {
b0d623f7
A
1709 (void) vm_map_switch( oldmap );
1710 uthreadp->uu_aio_task = oldaiotask;
1711 }
55e303ae 1712
0a7de745
A
1713 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_worker_thread)) | DBG_FUNC_END,
1714 (int)entryp->procp, (int)entryp->uaiocbp, entryp->errorval,
1715 entryp->returnval, 0 );
1716
b0d623f7 1717
b0d623f7
A
1718 /* XXX COUNTS */
1719 aio_entry_lock_spin(entryp);
0a7de745 1720 entryp->errorval = error;
b0d623f7
A
1721 aio_entry_unlock(entryp);
1722
1723 /* we're done with the IO request so pop it off the active queue and */
1724 /* push it on the done queue */
1725 aio_proc_lock(entryp->procp);
1726 aio_proc_move_done_locked(entryp->procp, entryp);
1727 aio_proc_unlock(entryp->procp);
1728
1729 OSDecrementAtomic(&aio_anchor.aio_inflight_count);
1730
1731 /* remove our reference to the user land map. */
0a7de745
A
1732 if (VM_MAP_NULL != entryp->aio_map) {
1733 vm_map_t my_map;
b0d623f7
A
1734
1735 my_map = entryp->aio_map;
1736 entryp->aio_map = VM_MAP_NULL;
1737 vm_map_deallocate( my_map );
55e303ae 1738 }
b0d623f7
A
1739
1740 /* Provide notifications */
1741 do_aio_completion( entryp );
1742
1743 /* Will free if needed */
1744 aio_entry_unref(entryp);
55e303ae
A
1745 } /* for ( ;; ) */
1746
1747 /* NOT REACHED */
55e303ae
A
1748} /* aio_work_thread */
1749
1750
1751/*
1752 * aio_get_some_work - get the next async IO request that is ready to be executed.
1753 * aio_fsync complicates matters a bit since we cannot do the fsync until all async
1754 * IO requests at the time the aio_fsync call came in have completed.
91447636 1755 * NOTE - AIO_LOCK must be held by caller
55e303ae 1756 */
55e303ae
A
1757static aio_workq_entry *
1758aio_get_some_work( void )
1759{
0a7de745
A
1760 aio_workq_entry *entryp = NULL;
1761 aio_workq_t queue = NULL;
b0d623f7
A
1762
1763 /* Just one queue for the moment. In the future there will be many. */
0a7de745 1764 queue = &aio_anchor.aio_async_workqs[0];
b0d623f7
A
1765 aio_workq_lock_spin(queue);
1766 if (queue->aioq_count == 0) {
1767 goto nowork;
1768 }
1769
0a7de745 1770 /*
b0d623f7
A
1771 * Hold the queue lock.
1772 *
1773 * pop some work off the work queue and add to our active queue
0a7de745 1774 * Always start with the queue lock held.
b0d623f7 1775 */
0a7de745
A
1776 for (;;) {
1777 /*
b0d623f7
A
1778 * Pull of of work queue. Once it's off, it can't be cancelled,
1779 * so we can take our ref once we drop the queue lock.
1780 */
1781 entryp = TAILQ_FIRST(&queue->aioq_entries);
55e303ae 1782
0a7de745
A
1783 /*
1784 * If there's no work or only fsyncs that need delay, go to sleep
1785 * and then start anew from aio_work_thread
b0d623f7
A
1786 */
1787 if (entryp == NULL) {
1788 goto nowork;
1789 }
1790
1791 aio_workq_remove_entry_locked(queue, entryp);
0a7de745 1792
b0d623f7
A
1793 aio_workq_unlock(queue);
1794
0a7de745 1795 /*
b0d623f7
A
1796 * Check if it's an fsync that must be delayed. No need to lock the entry;
1797 * that flag would have been set at initialization.
1798 */
0a7de745
A
1799 if ((entryp->flags & AIO_FSYNC) != 0) {
1800 /*
b0d623f7
A
1801 * Check for unfinished operations on the same file
1802 * in this proc's queue.
1803 */
1804 aio_proc_lock_spin(entryp->procp);
0a7de745 1805 if (aio_delay_fsync_request( entryp )) {
b0d623f7 1806 /* It needs to be delayed. Put it back on the end of the work queue */
0a7de745
A
1807 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_fsync_delay)) | DBG_FUNC_NONE,
1808 (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 );
b0d623f7
A
1809
1810 aio_proc_unlock(entryp->procp);
1811
1812 aio_workq_lock_spin(queue);
1813 aio_workq_add_entry_locked(queue, entryp);
55e303ae 1814 continue;
0a7de745 1815 }
b0d623f7 1816 aio_proc_unlock(entryp->procp);
55e303ae 1817 }
0a7de745 1818
55e303ae
A
1819 break;
1820 }
b0d623f7
A
1821
1822 aio_entry_ref(entryp);
1823
1824 OSIncrementAtomic(&aio_anchor.aio_inflight_count);
0a7de745 1825 return entryp;
55e303ae 1826
b0d623f7
A
1827nowork:
1828 /* We will wake up when someone enqueues something */
3e170ce0 1829 waitq_assert_wait64(&queue->aioq_waitq, CAST_EVENT64_T(queue), THREAD_UNINT, 0);
b0d623f7 1830 aio_workq_unlock(queue);
0a7de745 1831 thread_block((thread_continue_t)aio_work_thread );
b0d623f7
A
1832
1833 // notreached
1834 return NULL;
1835}
55e303ae
A
1836
1837/*
b0d623f7
A
1838 * aio_delay_fsync_request - look to see if this aio_fsync request should be delayed.
1839 * A big, simple hammer: only send it off if it's the most recently filed IO which has
1840 * not been completed.
55e303ae
A
1841 */
1842static boolean_t
1843aio_delay_fsync_request( aio_workq_entry *entryp )
1844{
b0d623f7
A
1845 if (entryp == TAILQ_FIRST(&entryp->procp->p_aio_activeq)) {
1846 return FALSE;
55e303ae 1847 }
0a7de745 1848
b0d623f7 1849 return TRUE;
55e303ae
A
1850} /* aio_delay_fsync_request */
1851
b0d623f7
A
1852static aio_workq_entry *
1853aio_create_queue_entry(proc_t procp, user_addr_t aiocbp, void *group_tag, int kindOfIO)
55e303ae 1854{
0a7de745
A
1855 aio_workq_entry *entryp;
1856 int result = 0;
55e303ae
A
1857
1858 entryp = (aio_workq_entry *) zalloc( aio_workq_zonep );
0a7de745
A
1859 if (entryp == NULL) {
1860 result = EAGAIN;
55e303ae
A
1861 goto error_exit;
1862 }
91447636 1863
0a7de745 1864 bzero( entryp, sizeof(*entryp));
55e303ae
A
1865
1866 /* fill in the rest of the aio_workq_entry */
1867 entryp->procp = procp;
1868 entryp->uaiocbp = aiocbp;
b0d623f7 1869 entryp->flags |= kindOfIO;
55e303ae
A
1870 entryp->group_tag = group_tag;
1871 entryp->aio_map = VM_MAP_NULL;
b0d623f7 1872 entryp->aio_refcount = 0;
91447636 1873
0a7de745 1874 if (proc_is64bit(procp)) {
b0d623f7 1875 struct user64_aiocb aiocb64;
0a7de745
A
1876
1877 result = copyin( aiocbp, &aiocb64, sizeof(aiocb64));
1878 if (result == 0) {
b0d623f7 1879 do_munge_aiocb_user64_to_user(&aiocb64, &entryp->aiocb);
0a7de745 1880 }
b0d623f7
A
1881 } else {
1882 struct user32_aiocb aiocb32;
0a7de745
A
1883
1884 result = copyin( aiocbp, &aiocb32, sizeof(aiocb32));
1885 if (result == 0) {
b0d623f7 1886 do_munge_aiocb_user32_to_user( &aiocb32, &entryp->aiocb );
0a7de745 1887 }
55e303ae
A
1888 }
1889
0a7de745 1890 if (result != 0) {
b0d623f7 1891 result = EAGAIN;
55e303ae 1892 goto error_exit;
b0d623f7 1893 }
55e303ae
A
1894
1895 /* get a reference to the user land map in order to keep it around */
1896 entryp->aio_map = get_task_map( procp->task );
1897 vm_map_reference( entryp->aio_map );
b0d623f7
A
1898
1899 /* do some more validation on the aiocb and embedded file descriptor */
1900 result = aio_validate( entryp );
0a7de745 1901 if (result != 0) {
39236c6e 1902 goto error_exit_with_ref;
0a7de745 1903 }
39236c6e
A
1904
1905 /* get a reference on the current_thread, which is passed in vfs_context. */
1906 entryp->thread = current_thread();
1907 thread_reference( entryp->thread );
0a7de745 1908 return entryp;
b0d623f7 1909
39236c6e 1910error_exit_with_ref:
0a7de745 1911 if (VM_MAP_NULL != entryp->aio_map) {
39236c6e
A
1912 vm_map_deallocate( entryp->aio_map );
1913 }
55e303ae 1914error_exit:
0a7de745 1915 if (result && entryp != NULL) {
91447636 1916 zfree( aio_workq_zonep, entryp );
b0d623f7
A
1917 entryp = NULL;
1918 }
1919
0a7de745 1920 return entryp;
b0d623f7 1921}
55e303ae
A
1922
1923
1924/*
b0d623f7
A
1925 * aio_queue_async_request - queue up an async IO request on our work queue then
1926 * wake up one of our worker threads to do the actual work. We get a reference
1927 * to our caller's user land map in order to keep it around while we are
0a7de745 1928 * processing the request.
55e303ae 1929 */
b0d623f7
A
1930static int
1931aio_queue_async_request(proc_t procp, user_addr_t aiocbp, int kindOfIO )
55e303ae 1932{
0a7de745
A
1933 aio_workq_entry *entryp;
1934 int result;
1935 int old_count;
1936 uint32_t *paio_offset;
1937 uint32_t *paio_nbytes;
1938
b0d623f7
A
1939 old_count = aio_increment_total_count();
1940 if (old_count >= aio_max_requests) {
1941 result = EAGAIN;
1942 goto error_noalloc;
55e303ae 1943 }
b0d623f7
A
1944
1945 entryp = aio_create_queue_entry( procp, aiocbp, 0, kindOfIO);
0a7de745 1946 if (entryp == NULL) {
b0d623f7
A
1947 result = EAGAIN;
1948 goto error_noalloc;
55e303ae 1949 }
55e303ae
A
1950
1951
b0d623f7 1952 aio_proc_lock_spin(procp);
55e303ae 1953
0a7de745
A
1954 if (is_already_queued( entryp->procp, entryp->uaiocbp ) == TRUE) {
1955 result = EAGAIN;
b0d623f7
A
1956 goto error_exit;
1957 }
55e303ae 1958
b0d623f7
A
1959 /* check our aio limits to throttle bad or rude user land behavior */
1960 if (aio_get_process_count( procp ) >= aio_max_requests_per_process) {
1961 printf("aio_queue_async_request(): too many in flight for proc: %d.\n", procp->p_aio_total_count);
0a7de745 1962 result = EAGAIN;
55e303ae
A
1963 goto error_exit;
1964 }
0a7de745 1965
b0d623f7
A
1966 /* Add the IO to proc and work queues, wake up threads as appropriate */
1967 lck_mtx_convert_spin(aio_proc_mutex(procp));
1968 aio_enqueue_work(procp, entryp, 1);
0a7de745 1969
b0d623f7 1970 aio_proc_unlock(procp);
0a7de745
A
1971
1972 paio_offset = (uint32_t*) &entryp->aiocb.aio_offset;
1973 paio_nbytes = (uint32_t*) &entryp->aiocb.aio_nbytes;
1974 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_START,
1975 (int)procp, (int)aiocbp, entryp->flags, entryp->aiocb.aio_fildes, 0 );
1976 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_AIO, AIO_work_queued)) | DBG_FUNC_END,
1977 paio_offset[0], (sizeof(entryp->aiocb.aio_offset) == sizeof(uint64_t) ? paio_offset[1] : 0),
1978 paio_nbytes[0], (sizeof(entryp->aiocb.aio_nbytes) == sizeof(uint64_t) ? paio_nbytes[1] : 0),
1979 0 );
1980
1981 return 0;
1982
b0d623f7
A
1983error_exit:
1984 /*
1985 * This entry has not been queued up so no worries about
1986 * unlocked state and aio_map
1987 */
1988 aio_proc_unlock(procp);
1989 aio_free_request(entryp);
91447636 1990
b0d623f7
A
1991error_noalloc:
1992 aio_decrement_total_count();
91447636 1993
0a7de745 1994 return result;
b0d623f7 1995} /* aio_queue_async_request */
91447636 1996
b0d623f7
A
1997
1998/*
1999 * lio_create_entry
2000 *
2001 * Allocate an aio_workq_entry and fill it in. If all goes well return 0
2002 * and pass the aio_workq_entry pointer back to our caller.
2003 *
2004 * Parameters: procp The process makign the request
2005 * aiocbp The aio context buffer pointer
2006 * group_tag The group tag used to indicate a
2007 * group of operations has completed
2008 * entrypp Pointer to the pointer to receive the
2009 * address of the created aio_workq_entry
2010 *
2011 * Returns: 0 Successfully created
2012 * EAGAIN Try again (usually resource shortage)
2013 *
2014 *
2015 * Notes: We get a reference to our caller's user land map in order
0a7de745 2016 * to keep it around while we are processing the request.
b0d623f7
A
2017 *
2018 * lio_listio calls behave differently at completion they do
2019 * completion notification when all async IO requests have
2020 * completed. We use group_tag to tag IO requests that behave
0a7de745 2021 * in the delay notification manner.
b0d623f7
A
2022 *
2023 * All synchronous operations are considered to not have a
2024 * signal routine associated with them (sigp == USER_ADDR_NULL).
2025 */
2026static int
2027lio_create_entry(proc_t procp, user_addr_t aiocbp, void *group_tag,
0a7de745 2028 aio_workq_entry **entrypp )
b0d623f7 2029{
0a7de745
A
2030 aio_workq_entry *entryp;
2031 int result;
b0d623f7
A
2032
2033 entryp = aio_create_queue_entry( procp, aiocbp, group_tag, AIO_LIO);
0a7de745
A
2034 if (entryp == NULL) {
2035 result = EAGAIN;
55e303ae
A
2036 goto error_exit;
2037 }
2038
b0d623f7
A
2039 /*
2040 * Look for lio_listio LIO_NOP requests and ignore them; this is
2041 * not really an error, but we need to free our aio_workq_entry.
2042 */
0a7de745 2043 if (entryp->aiocb.aio_lio_opcode == LIO_NOP) {
55e303ae
A
2044 result = 0;
2045 goto error_exit;
2046 }
2047
55e303ae 2048 *entrypp = entryp;
0a7de745
A
2049 return 0;
2050
55e303ae 2051error_exit:
b0d623f7 2052
0a7de745 2053 if (entryp != NULL) {
b0d623f7
A
2054 /*
2055 * This entry has not been queued up so no worries about
2056 * unlocked state and aio_map
2057 */
2058 aio_free_request(entryp);
2059 }
0a7de745
A
2060
2061 return result;
b0d623f7 2062} /* lio_create_entry */
55e303ae
A
2063
2064
2065/*
2066 * aio_free_request - remove our reference on the user land map and
b0d623f7
A
2067 * free the work queue entry resources. The entry is off all lists
2068 * and has zero refcount, so no one can have a pointer to it.
55e303ae
A
2069 */
2070
2071static int
b0d623f7 2072aio_free_request(aio_workq_entry *entryp)
55e303ae
A
2073{
2074 /* remove our reference to the user land map. */
0a7de745 2075 if (VM_MAP_NULL != entryp->aio_map) {
b0d623f7 2076 vm_map_deallocate(entryp->aio_map);
55e303ae 2077 }
b0d623f7 2078
39236c6e 2079 /* remove our reference to thread which enqueued the request */
0a7de745 2080 if (NULL != entryp->thread) {
39236c6e
A
2081 thread_deallocate( entryp->thread );
2082 }
2083
b0d623f7 2084 entryp->aio_refcount = -1; /* A bit of poisoning in case of bad refcounting. */
0a7de745 2085
91447636 2086 zfree( aio_workq_zonep, entryp );
55e303ae 2087
0a7de745 2088 return 0;
55e303ae
A
2089} /* aio_free_request */
2090
2091
b0d623f7
A
2092/*
2093 * aio_validate
2094 *
2095 * validate the aiocb passed in by one of the aio syscalls.
55e303ae 2096 */
55e303ae 2097static int
0a7de745 2098aio_validate( aio_workq_entry *entryp )
55e303ae 2099{
0a7de745
A
2100 struct fileproc *fp;
2101 int flag;
2102 int result;
2103
55e303ae
A
2104 result = 0;
2105
0a7de745
A
2106 if ((entryp->flags & AIO_LIO) != 0) {
2107 if (entryp->aiocb.aio_lio_opcode == LIO_READ) {
55e303ae 2108 entryp->flags |= AIO_READ;
0a7de745 2109 } else if (entryp->aiocb.aio_lio_opcode == LIO_WRITE) {
55e303ae 2110 entryp->flags |= AIO_WRITE;
0a7de745
A
2111 } else if (entryp->aiocb.aio_lio_opcode == LIO_NOP) {
2112 return 0;
2113 } else {
2114 return EINVAL;
2115 }
55e303ae
A
2116 }
2117
2118 flag = FREAD;
0a7de745 2119 if ((entryp->flags & (AIO_WRITE | AIO_FSYNC | AIO_DSYNC)) != 0) {
55e303ae
A
2120 flag = FWRITE;
2121 }
2122
0a7de745
A
2123 if ((entryp->flags & (AIO_READ | AIO_WRITE)) != 0) {
2124 if (entryp->aiocb.aio_nbytes > INT_MAX ||
2125 entryp->aiocb.aio_buf == USER_ADDR_NULL ||
2126 entryp->aiocb.aio_offset < 0) {
2127 return EINVAL;
2128 }
55e303ae
A
2129 }
2130
b0d623f7
A
2131 /*
2132 * validate aiocb.aio_sigevent. at this point we only support
2133 * sigev_notify equal to SIGEV_SIGNAL or SIGEV_NONE. this means
2134 * sigev_value, sigev_notify_function, and sigev_notify_attributes
2135 * are ignored, since SIGEV_THREAD is unsupported. This is consistent
2136 * with no [RTS] (RalTime Signal) option group support.
55e303ae 2137 */
0a7de745 2138 switch (entryp->aiocb.aio_sigevent.sigev_notify) {
b0d623f7 2139 case SIGEV_SIGNAL:
0a7de745
A
2140 {
2141 int signum;
b0d623f7 2142
55e303ae
A
2143 /* make sure we have a valid signal number */
2144 signum = entryp->aiocb.aio_sigevent.sigev_signo;
0a7de745
A
2145 if (signum <= 0 || signum >= NSIG ||
2146 signum == SIGKILL || signum == SIGSTOP) {
2147 return EINVAL;
2148 }
2149 }
2150 break;
b0d623f7
A
2151
2152 case SIGEV_NONE:
2153 break;
2154
2155 case SIGEV_THREAD:
0a7de745 2156 /* Unsupported [RTS] */
b0d623f7
A
2157
2158 default:
0a7de745 2159 return EINVAL;
b0d623f7 2160 }
0a7de745 2161
55e303ae 2162 /* validate the file descriptor and that the file was opened
91447636 2163 * for the appropriate read / write access.
55e303ae 2164 */
91447636 2165 proc_fdlock(entryp->procp);
55e303ae 2166
0a7de745
A
2167 result = fp_lookup( entryp->procp, entryp->aiocb.aio_fildes, &fp, 1);
2168 if (result == 0) {
2169 if ((fp->f_fglob->fg_flag & flag) == 0) {
55e303ae
A
2170 /* we don't have read or write access */
2171 result = EBADF;
0a7de745 2172 } else if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) {
55e303ae
A
2173 /* this is not a file */
2174 result = ESPIPE;
0a7de745
A
2175 } else {
2176 fp->f_flags |= FP_AIOISSUED;
2177 }
91447636 2178
0a7de745
A
2179 fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 1);
2180 } else {
55e303ae
A
2181 result = EBADF;
2182 }
55e303ae 2183
0a7de745 2184 proc_fdunlock(entryp->procp);
55e303ae 2185
0a7de745 2186 return result;
55e303ae
A
2187} /* aio_validate */
2188
0a7de745 2189static int
b0d623f7
A
2190aio_increment_total_count()
2191{
2192 return OSIncrementAtomic(&aio_anchor.aio_total_count);
2193}
2194
0a7de745 2195static int
b0d623f7
A
2196aio_decrement_total_count()
2197{
2198 int old = OSDecrementAtomic(&aio_anchor.aio_total_count);
2199 if (old <= 0) {
2200 panic("Negative total AIO count!\n");
2201 }
55e303ae 2202
b0d623f7
A
2203 return old;
2204}
55e303ae
A
2205
2206static int
0a7de745 2207aio_get_process_count(proc_t procp )
55e303ae 2208{
b0d623f7 2209 return procp->p_aio_total_count;
55e303ae
A
2210} /* aio_get_process_count */
2211
55e303ae 2212static int
0a7de745 2213aio_get_all_queues_count( void )
55e303ae 2214{
b0d623f7 2215 return aio_anchor.aio_total_count;
55e303ae
A
2216} /* aio_get_all_queues_count */
2217
2218
2219/*
0a7de745 2220 * do_aio_completion. Handle async IO completion.
55e303ae 2221 */
55e303ae 2222static void
0a7de745 2223do_aio_completion( aio_workq_entry *entryp )
55e303ae 2224{
0a7de745
A
2225 boolean_t lastLioCompleted = FALSE;
2226 aio_lio_context *lio_context = NULL;
b0d623f7 2227 int waiter = 0;
0a7de745 2228
b0d623f7 2229 lio_context = (aio_lio_context *)entryp->group_tag;
0a7de745 2230
b0d623f7 2231 if (lio_context != NULL) {
b0d623f7
A
2232 aio_proc_lock_spin(entryp->procp);
2233
2234 /* Account for this I/O completing. */
0a7de745
A
2235 lio_context->io_completed++;
2236
b0d623f7 2237 /* Are we done with this lio context? */
0a7de745
A
2238 if (lio_context->io_issued == lio_context->io_completed) {
2239 lastLioCompleted = TRUE;
2240 }
2241
b0d623f7 2242 waiter = lio_context->io_waiter;
0a7de745 2243
b0d623f7
A
2244 /* explicit wakeup of lio_listio() waiting in LIO_WAIT */
2245 if ((entryp->flags & AIO_LIO_NOTIFY) && (lastLioCompleted) && (waiter != 0)) {
2246 /* wake up the waiter */
2247 wakeup(lio_context);
2248 }
0a7de745 2249
b0d623f7
A
2250 aio_proc_unlock(entryp->procp);
2251 }
0a7de745
A
2252
2253 if (entryp->aiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL &&
2254 (entryp->flags & AIO_DISABLE) == 0) {
2255 boolean_t performSignal = FALSE;
2256 if (lio_context == NULL) {
2257 performSignal = TRUE;
2258 } else {
2259 /*
b0d623f7
A
2260 * If this was the last request in the group and a signal
2261 * is desired, send one.
2262 */
2263 performSignal = lastLioCompleted;
0a7de745
A
2264 }
2265
2266 if (performSignal) {
2267 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_sig)) | DBG_FUNC_NONE,
2268 (int)entryp->procp, (int)entryp->uaiocbp,
2269 entryp->aiocb.aio_sigevent.sigev_signo, 0, 0 );
2270
55e303ae 2271 psignal( entryp->procp, entryp->aiocb.aio_sigevent.sigev_signo );
55e303ae
A
2272 }
2273 }
2274
b0d623f7
A
2275 if ((entryp->flags & AIO_EXIT_WAIT) && (entryp->flags & AIO_CLOSE_WAIT)) {
2276 panic("Close and exit flags set at the same time\n");
2277 }
0a7de745 2278
55e303ae 2279 /*
b0d623f7
A
2280 * need to handle case where a process is trying to exit, exec, or
2281 * close and is currently waiting for active aio requests to complete.
0a7de745
A
2282 * If AIO_CLEANUP_WAIT is set then we need to look to see if there are any
2283 * other requests in the active queue for this process. If there are
b0d623f7
A
2284 * none then wakeup using the AIO_CLEANUP_SLEEP_CHAN tsleep channel.
2285 * If there are some still active then do nothing - we only want to
0a7de745 2286 * wakeup when all active aio requests for the process are complete.
b0d623f7
A
2287 *
2288 * Don't need to lock the entry or proc to check the cleanup flag. It can only be
0a7de745 2289 * set for cancellation, while the entryp is still on a proc list; now it's
b0d623f7 2290 * off, so that flag is already set if it's going to be.
55e303ae 2291 */
0a7de745
A
2292 if ((entryp->flags & AIO_EXIT_WAIT) != 0) {
2293 int active_requests;
2294
2295 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wait)) | DBG_FUNC_NONE,
2296 (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 );
55e303ae 2297
b0d623f7 2298 aio_proc_lock_spin(entryp->procp);
55e303ae 2299 active_requests = aio_active_requests_for_process( entryp->procp );
0a7de745
A
2300 if (active_requests < 1) {
2301 /*
b0d623f7
A
2302 * no active aio requests for this process, continue exiting. In this
2303 * case, there should be no one else waiting ont he proc in AIO...
2304 */
2305 wakeup_one((caddr_t)&entryp->procp->AIO_CLEANUP_SLEEP_CHAN);
2306 aio_proc_unlock(entryp->procp);
55e303ae 2307
0a7de745
A
2308 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wake)) | DBG_FUNC_NONE,
2309 (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 );
b0d623f7
A
2310 } else {
2311 aio_proc_unlock(entryp->procp);
55e303ae 2312 }
55e303ae
A
2313 }
2314
0a7de745
A
2315 if ((entryp->flags & AIO_CLOSE_WAIT) != 0) {
2316 int active_requests;
2317
2318 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wait)) | DBG_FUNC_NONE,
2319 (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 );
2320
b0d623f7
A
2321 aio_proc_lock_spin(entryp->procp);
2322 active_requests = aio_proc_active_requests_for_file( entryp->procp, entryp->aiocb.aio_fildes);
0a7de745 2323 if (active_requests < 1) {
b0d623f7
A
2324 /* Can't wakeup_one(); multiple closes might be in progress. */
2325 wakeup(&entryp->procp->AIO_CLEANUP_SLEEP_CHAN);
2326 aio_proc_unlock(entryp->procp);
2327
0a7de745
A
2328 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_cleanup_wake)) | DBG_FUNC_NONE,
2329 (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 );
b0d623f7
A
2330 } else {
2331 aio_proc_unlock(entryp->procp);
2332 }
2333 }
0a7de745 2334 /*
b0d623f7
A
2335 * A thread in aio_suspend() wants to known about completed IOs. If it checked
2336 * the done list before we moved our AIO there, then it already asserted its wait,
2337 * and we can wake it up without holding the lock. If it checked the list after
2338 * we did our move, then it already has seen the AIO that we moved. Herego, we
2339 * can do our wakeup without holding the lock.
55e303ae 2340 */
0a7de745
A
2341 wakeup((caddr_t) &entryp->procp->AIO_SUSPEND_SLEEP_CHAN );
2342 KERNEL_DEBUG((BSDDBG_CODE(DBG_BSD_AIO, AIO_completion_suspend_wake)) | DBG_FUNC_NONE,
2343 (int)entryp->procp, (int)entryp->uaiocbp, 0, 0, 0 );
55e303ae 2344
0a7de745 2345 /*
b0d623f7
A
2346 * free the LIO context if the last lio completed and no thread is
2347 * waiting
2348 */
0a7de745
A
2349 if (lastLioCompleted && (waiter == 0)) {
2350 free_lio_context(lio_context);
2351 }
b0d623f7 2352} /* do_aio_completion */
55e303ae
A
2353
2354
2355/*
2356 * do_aio_read
2357 */
2358static int
2359do_aio_read( aio_workq_entry *entryp )
2360{
0a7de745
A
2361 struct fileproc *fp;
2362 int error;
2363 struct vfs_context context;
55e303ae 2364
0a7de745
A
2365 if ((error = fp_lookup(entryp->procp, entryp->aiocb.aio_fildes, &fp, 0))) {
2366 return error;
2367 }
2368 if ((fp->f_fglob->fg_flag & FREAD) == 0) {
91447636 2369 fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
0a7de745 2370 return EBADF;
91447636 2371 }
2d21ac55 2372
0a7de745 2373 context.vc_thread = entryp->thread; /* XXX */
2d21ac55
A
2374 context.vc_ucred = fp->f_fglob->fg_cred;
2375
0a7de745
A
2376 error = dofileread(&context, fp,
2377 entryp->aiocb.aio_buf,
2378 entryp->aiocb.aio_nbytes,
2379 entryp->aiocb.aio_offset, FOF_OFFSET,
2380 &entryp->returnval);
2d21ac55 2381 fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
0a7de745
A
2382
2383 return error;
55e303ae
A
2384} /* do_aio_read */
2385
2386
2387/*
2388 * do_aio_write
2389 */
2390static int
2391do_aio_write( aio_workq_entry *entryp )
2392{
0a7de745
A
2393 struct fileproc *fp;
2394 int error, flags;
2395 struct vfs_context context;
55e303ae 2396
0a7de745
A
2397 if ((error = fp_lookup(entryp->procp, entryp->aiocb.aio_fildes, &fp, 0))) {
2398 return error;
2399 }
2400 if ((fp->f_fglob->fg_flag & FWRITE) == 0) {
91447636 2401 fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
0a7de745 2402 return EBADF;
91447636 2403 }
2d21ac55 2404
b0d623f7 2405 flags = FOF_PCRED;
0a7de745 2406 if ((fp->f_fglob->fg_flag & O_APPEND) == 0) {
b0d623f7
A
2407 flags |= FOF_OFFSET;
2408 }
2409
0a7de745 2410 context.vc_thread = entryp->thread; /* XXX */
2d21ac55
A
2411 context.vc_ucred = fp->f_fglob->fg_cred;
2412
2413 /* NB: tell dofilewrite the offset, and to use the proc cred */
2414 error = dofilewrite(&context,
0a7de745
A
2415 fp,
2416 entryp->aiocb.aio_buf,
2417 entryp->aiocb.aio_nbytes,
2418 entryp->aiocb.aio_offset,
2419 flags,
2420 &entryp->returnval);
2421
2422 if (entryp->returnval) {
fe8ab488 2423 fp_drop_written(entryp->procp, entryp->aiocb.aio_fildes, fp);
0a7de745 2424 } else {
fe8ab488 2425 fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
0a7de745 2426 }
55e303ae 2427
0a7de745 2428 return error;
55e303ae
A
2429} /* do_aio_write */
2430
2431
2432/*
2433 * aio_active_requests_for_process - return number of active async IO
2434 * requests for the given process.
55e303ae 2435 */
55e303ae 2436static int
2d21ac55 2437aio_active_requests_for_process(proc_t procp )
55e303ae 2438{
0a7de745 2439 return procp->p_aio_active_count;
b0d623f7
A
2440} /* aio_active_requests_for_process */
2441
2442/*
2443 * Called with the proc locked.
2444 */
2445static int
2446aio_proc_active_requests_for_file(proc_t procp, int fd)
2447{
2448 int count = 0;
2449 aio_workq_entry *entryp;
2450 TAILQ_FOREACH(entryp, &procp->p_aio_activeq, aio_proc_link) {
2451 if (entryp->aiocb.aio_fildes == fd) {
2452 count++;
2453 }
2454 }
55e303ae 2455
b0d623f7 2456 return count;
55e303ae
A
2457} /* aio_active_requests_for_process */
2458
2459
b0d623f7 2460
55e303ae
A
2461/*
2462 * do_aio_fsync
2463 */
2464static int
2465do_aio_fsync( aio_workq_entry *entryp )
2466{
0a7de745
A
2467 struct vfs_context context;
2468 struct vnode *vp;
2469 struct fileproc *fp;
2470 int sync_flag;
2471 int error;
91447636 2472
b0d623f7
A
2473 /*
2474 * We are never called unless either AIO_FSYNC or AIO_DSYNC are set.
2475 *
2476 * If AIO_DSYNC is set, we can tell the lower layers that it is OK
2477 * to mark for update the metadata not strictly necessary for data
2478 * retrieval, rather than forcing it to disk.
2479 *
2480 * If AIO_FSYNC is set, we have to also wait for metadata not really
2481 * necessary to data retrival are committed to stable storage (e.g.
2482 * atime, mtime, ctime, etc.).
2483 *
2484 * Metadata necessary for data retrieval ust be committed to stable
2485 * storage in either case (file length, etc.).
2486 */
0a7de745 2487 if (entryp->flags & AIO_FSYNC) {
b0d623f7 2488 sync_flag = MNT_WAIT;
0a7de745 2489 } else {
b0d623f7 2490 sync_flag = MNT_DWAIT;
0a7de745
A
2491 }
2492
91447636 2493 error = fp_getfvp( entryp->procp, entryp->aiocb.aio_fildes, &fp, &vp);
0a7de745
A
2494 if (error == 0) {
2495 if ((error = vnode_getwithref(vp))) {
2496 fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
91447636 2497 entryp->returnval = -1;
0a7de745 2498 return error;
91447636 2499 }
2d21ac55 2500 context.vc_thread = current_thread();
91447636
A
2501 context.vc_ucred = fp->f_fglob->fg_cred;
2502
b0d623f7 2503 error = VNOP_FSYNC( vp, sync_flag, &context);
91447636
A
2504
2505 (void)vnode_put(vp);
2506
2507 fp_drop(entryp->procp, entryp->aiocb.aio_fildes, fp, 0);
55e303ae 2508 }
0a7de745 2509 if (error != 0) {
55e303ae 2510 entryp->returnval = -1;
0a7de745 2511 }
55e303ae 2512
0a7de745 2513 return error;
55e303ae
A
2514} /* do_aio_fsync */
2515
2516
2517/*
0a7de745 2518 * is_already_queued - runs through our queues to see if the given
55e303ae
A
2519 * aiocbp / process is there. Returns TRUE if there is a match
2520 * on any of our aio queues.
b0d623f7
A
2521 *
2522 * Called with proc aio lock held (can be held spin)
55e303ae 2523 */
55e303ae 2524static boolean_t
0a7de745
A
2525is_already_queued(proc_t procp,
2526 user_addr_t aiocbp )
55e303ae 2527{
0a7de745
A
2528 aio_workq_entry *entryp;
2529 boolean_t result;
2530
55e303ae 2531 result = FALSE;
0a7de745 2532
55e303ae 2533 /* look for matches on our queue of async IO requests that have completed */
b0d623f7 2534 TAILQ_FOREACH( entryp, &procp->p_aio_doneq, aio_proc_link ) {
0a7de745 2535 if (aiocbp == entryp->uaiocbp) {
55e303ae
A
2536 result = TRUE;
2537 goto ExitThisRoutine;
2538 }
2539 }
0a7de745 2540
55e303ae 2541 /* look for matches on our queue of active async IO requests */
b0d623f7 2542 TAILQ_FOREACH( entryp, &procp->p_aio_activeq, aio_proc_link ) {
0a7de745 2543 if (aiocbp == entryp->uaiocbp) {
55e303ae
A
2544 result = TRUE;
2545 goto ExitThisRoutine;
2546 }
2547 }
0a7de745 2548
55e303ae 2549ExitThisRoutine:
0a7de745 2550 return result;
55e303ae
A
2551} /* is_already_queued */
2552
2553
b0d623f7
A
2554static void
2555free_lio_context(aio_lio_context* context)
2556{
0a7de745 2557#if DEBUG
b0d623f7
A
2558 OSDecrementAtomic(&lio_contexts_alloced);
2559#endif /* DEBUG */
2560
2561 FREE( context, M_TEMP );
b0d623f7
A
2562} /* free_lio_context */
2563
2564
55e303ae
A
2565/*
2566 * aio initialization
2567 */
2568__private_extern__ void
2569aio_init( void )
2570{
0a7de745
A
2571 int i;
2572
91447636 2573 aio_lock_grp_attr = lck_grp_attr_alloc_init();
b0d623f7
A
2574 aio_proc_lock_grp = lck_grp_alloc_init("aio_proc", aio_lock_grp_attr);;
2575 aio_entry_lock_grp = lck_grp_alloc_init("aio_entry", aio_lock_grp_attr);;
2576 aio_queue_lock_grp = lck_grp_alloc_init("aio_queue", aio_lock_grp_attr);;
91447636 2577 aio_lock_attr = lck_attr_alloc_init();
91447636 2578
b0d623f7
A
2579 lck_mtx_init(&aio_entry_mtx, aio_entry_lock_grp, aio_lock_attr);
2580 lck_mtx_init(&aio_proc_mtx, aio_proc_lock_grp, aio_lock_attr);
55e303ae 2581
b0d623f7 2582 aio_anchor.aio_inflight_count = 0;
55e303ae 2583 aio_anchor.aio_done_count = 0;
b0d623f7
A
2584 aio_anchor.aio_total_count = 0;
2585 aio_anchor.aio_num_workqs = AIO_NUM_WORK_QUEUES;
2586
2587 for (i = 0; i < AIO_NUM_WORK_QUEUES; i++) {
2588 aio_workq_init(&aio_anchor.aio_async_workqs[i]);
2589 }
2590
55e303ae 2591
0a7de745 2592 i = sizeof(aio_workq_entry);
55e303ae 2593 aio_workq_zonep = zinit( i, i * aio_max_requests, i * aio_max_requests, "aiowq" );
0a7de745 2594
55e303ae 2595 _aio_create_worker_threads( aio_worker_threads );
55e303ae
A
2596} /* aio_init */
2597
2598
2599/*
2600 * aio worker threads created here.
2601 */
2602__private_extern__ void
2603_aio_create_worker_threads( int num )
2604{
0a7de745
A
2605 int i;
2606
55e303ae 2607 /* create some worker threads to handle the async IO requests */
0a7de745
A
2608 for (i = 0; i < num; i++) {
2609 thread_t myThread;
2610
2611 if (KERN_SUCCESS != kernel_thread_start((thread_continue_t)aio_work_thread, NULL, &myThread)) {
2612 printf( "%s - failed to create a work thread \n", __FUNCTION__ );
2613 } else {
b0d623f7 2614 thread_deallocate(myThread);
0a7de745 2615 }
55e303ae 2616 }
0a7de745 2617
55e303ae 2618 return;
55e303ae
A
2619} /* _aio_create_worker_threads */
2620
2621/*
2622 * Return the current activation utask
2623 */
2624task_t
2625get_aiotask(void)
2626{
0a7de745 2627 return ((struct uthread *)get_bsdthread_info(current_thread()))->uu_aio_task;
91447636
A
2628}
2629
2630
2631/*
2632 * In the case of an aiocb from a
2633 * 32-bit process we need to expand some longs and pointers to the correct
2634 * sizes in order to let downstream code always work on the same type of
2635 * aiocb (in our case that is a user_aiocb)
2636 */
0a7de745
A
2637static void
2638do_munge_aiocb_user32_to_user( struct user32_aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp )
91447636
A
2639{
2640 the_user_aiocbp->aio_fildes = my_aiocbp->aio_fildes;
2641 the_user_aiocbp->aio_offset = my_aiocbp->aio_offset;
2642 the_user_aiocbp->aio_buf = CAST_USER_ADDR_T(my_aiocbp->aio_buf);
2643 the_user_aiocbp->aio_nbytes = my_aiocbp->aio_nbytes;
2644 the_user_aiocbp->aio_reqprio = my_aiocbp->aio_reqprio;
2645 the_user_aiocbp->aio_lio_opcode = my_aiocbp->aio_lio_opcode;
2646
2647 /* special case here. since we do not know if sigev_value is an */
2648 /* int or a ptr we do NOT cast the ptr to a user_addr_t. This */
2649 /* means if we send this info back to user space we need to remember */
2650 /* sigev_value was not expanded for the 32-bit case. */
2651 /* NOTE - this does NOT affect us since we don't support sigev_value */
2652 /* yet in the aio context. */
2653 //LP64
2654 the_user_aiocbp->aio_sigevent.sigev_notify = my_aiocbp->aio_sigevent.sigev_notify;
2655 the_user_aiocbp->aio_sigevent.sigev_signo = my_aiocbp->aio_sigevent.sigev_signo;
0a7de745
A
2656 the_user_aiocbp->aio_sigevent.sigev_value.size_equivalent.sival_int =
2657 my_aiocbp->aio_sigevent.sigev_value.sival_int;
2658 the_user_aiocbp->aio_sigevent.sigev_notify_function =
2659 CAST_USER_ADDR_T(my_aiocbp->aio_sigevent.sigev_notify_function);
2660 the_user_aiocbp->aio_sigevent.sigev_notify_attributes =
2661 CAST_USER_ADDR_T(my_aiocbp->aio_sigevent.sigev_notify_attributes);
55e303ae 2662}
b0d623f7
A
2663
2664/* Similar for 64-bit user process, so that we don't need to satisfy
2665 * the alignment constraints of the original user64_aiocb
2666 */
0a7de745
A
2667static void
2668do_munge_aiocb_user64_to_user( struct user64_aiocb *my_aiocbp, struct user_aiocb *the_user_aiocbp )
b0d623f7
A
2669{
2670 the_user_aiocbp->aio_fildes = my_aiocbp->aio_fildes;
2671 the_user_aiocbp->aio_offset = my_aiocbp->aio_offset;
2672 the_user_aiocbp->aio_buf = my_aiocbp->aio_buf;
2673 the_user_aiocbp->aio_nbytes = my_aiocbp->aio_nbytes;
2674 the_user_aiocbp->aio_reqprio = my_aiocbp->aio_reqprio;
2675 the_user_aiocbp->aio_lio_opcode = my_aiocbp->aio_lio_opcode;
0a7de745 2676
b0d623f7
A
2677 the_user_aiocbp->aio_sigevent.sigev_notify = my_aiocbp->aio_sigevent.sigev_notify;
2678 the_user_aiocbp->aio_sigevent.sigev_signo = my_aiocbp->aio_sigevent.sigev_signo;
0a7de745
A
2679 the_user_aiocbp->aio_sigevent.sigev_value.size_equivalent.sival_int =
2680 my_aiocbp->aio_sigevent.sigev_value.size_equivalent.sival_int;
2681 the_user_aiocbp->aio_sigevent.sigev_notify_function =
2682 my_aiocbp->aio_sigevent.sigev_notify_function;
2683 the_user_aiocbp->aio_sigevent.sigev_notify_attributes =
2684 my_aiocbp->aio_sigevent.sigev_notify_attributes;
b0d623f7 2685}