+ /* we reserve enough space for largest possible pointer size */
+ MALLOC( aiocbpp, user_addr_t *, (nent * sizeof(user_addr_t)), M_TEMP, M_WAITOK );
+ if ( aiocbpp == NULL )
+ goto err;
+
+ /* copyin our aiocb pointers from list */
+ result = copyin( aiocblist, aiocbpp,
+ proc_is64bit(procp) ? (nent * sizeof(user64_addr_t))
+ : (nent * sizeof(user32_addr_t)) );
+ if ( result) {
+ FREE( aiocbpp, M_TEMP );
+ aiocbpp = NULL;
+ goto err;
+ }
+
+ /*
+ * We depend on a list of user_addr_t's so we need to
+ * munge and expand when these pointers came from a
+ * 32-bit process
+ */
+ if ( !proc_is64bit(procp) ) {
+ /* copy from last to first to deal with overlap */
+ user32_addr_t *my_ptrp = ((user32_addr_t *)aiocbpp) + (nent - 1);
+ user_addr_t *my_addrp = aiocbpp + (nent - 1);
+
+ for (i = 0; i < nent; i++, my_ptrp--, my_addrp--) {
+ *my_addrp = (user_addr_t) (*my_ptrp);
+ }
+ }
+
+err:
+ return (aiocbpp);
+}
+
+
+static int
+aio_copy_in_sigev(proc_t procp, user_addr_t sigp, struct user_sigevent *sigev)
+{
+ int result = 0;
+
+ if (sigp == USER_ADDR_NULL)
+ goto out;
+
+ /*
+ * We need to munge aio_sigevent since it contains pointers.
+ * Since we do not know if sigev_value is an int or a ptr we do
+ * NOT cast the ptr to a user_addr_t. This means if we send
+ * this info back to user space we need to remember sigev_value
+ * was not expanded for the 32-bit case.
+ *
+ * Notes: This does NOT affect us since we don't support
+ * sigev_value yet in the aio context.
+ */
+ if ( proc_is64bit(procp) ) {
+ struct user64_sigevent sigevent64;
+
+ result = copyin( sigp, &sigevent64, sizeof(sigevent64) );
+ if ( result == 0 ) {
+ sigev->sigev_notify = sigevent64.sigev_notify;
+ sigev->sigev_signo = sigevent64.sigev_signo;
+ sigev->sigev_value.size_equivalent.sival_int = sigevent64.sigev_value.size_equivalent.sival_int;
+ sigev->sigev_notify_function = sigevent64.sigev_notify_function;
+ sigev->sigev_notify_attributes = sigevent64.sigev_notify_attributes;
+ }
+
+ } else {
+ struct user32_sigevent sigevent32;
+
+ result = copyin( sigp, &sigevent32, sizeof(sigevent32) );
+ if ( result == 0 ) {
+ sigev->sigev_notify = sigevent32.sigev_notify;
+ sigev->sigev_signo = sigevent32.sigev_signo;
+ sigev->sigev_value.size_equivalent.sival_int = sigevent32.sigev_value.sival_int;
+ sigev->sigev_notify_function = CAST_USER_ADDR_T(sigevent32.sigev_notify_function);
+ sigev->sigev_notify_attributes = CAST_USER_ADDR_T(sigevent32.sigev_notify_attributes);
+ }
+ }
+
+ if ( result != 0 ) {
+ result = EAGAIN;
+ }
+
+out:
+ return (result);
+}
+
+/*
+ * aio_enqueue_work
+ *
+ * Queue up the entry on the aio asynchronous work queue in priority order
+ * based on the relative priority of the request. We calculate the relative
+ * priority using the nice value of the caller and the value
+ *
+ * Parameters: procp Process queueing the I/O
+ * entryp The work queue entry being queued
+ *
+ * Returns: (void) No failure modes
+ *
+ * Notes: This function is used for both lio_listio and aio
+ *
+ * XXX: At some point, we may have to consider thread priority
+ * rather than process priority, but we don't maintain the
+ * adjusted priority for threads the POSIX way.
+ *
+ *
+ * Called with proc locked.
+ */
+static void
+aio_enqueue_work( proc_t procp, aio_workq_entry *entryp, int proc_locked)
+{
+#if 0
+ aio_workq_entry *my_entryp; /* used for insertion sort */
+#endif /* 0 */
+ aio_workq_t queue = aio_entry_workq(entryp);
+
+ if (proc_locked == 0) {
+ aio_proc_lock(procp);
+ }
+
+ ASSERT_AIO_PROC_LOCK_OWNED(procp);
+
+ /* Onto proc queue */
+ TAILQ_INSERT_TAIL(&procp->p_aio_activeq, entryp, aio_proc_link);
+ procp->p_aio_active_count++;
+ procp->p_aio_total_count++;
+
+ /* And work queue */
+ aio_workq_lock_spin(queue);
+ aio_workq_add_entry_locked(queue, entryp);
+ wait_queue_wakeup_one(queue->aioq_waitq, queue, THREAD_AWAKENED);
+ aio_workq_unlock(queue);
+
+ if (proc_locked == 0) {
+ aio_proc_unlock(procp);
+ }
+
+#if 0
+ /*
+ * Procedure:
+ *
+ * (1) The nice value is in the range PRIO_MIN..PRIO_MAX [-20..20]
+ * (2) The normalized nice value is in the range 0..((2 * NZERO) - 1)
+ * which is [0..39], with 0 not being used. In nice values, the
+ * lower the nice value, the higher the priority.
+ * (3) The normalized scheduling prioritiy is the highest nice value
+ * minus the current nice value. In I/O scheduling priority, the
+ * higher the value the lower the priority, so it is the inverse
+ * of the nice value (the higher the number, the higher the I/O
+ * priority).
+ * (4) From the normalized scheduling priority, we subtract the
+ * request priority to get the request priority value number;
+ * this means that requests are only capable of depressing their
+ * priority relative to other requests,
+ */
+ entryp->priority = (((2 * NZERO) - 1) - procp->p_nice);
+
+ /* only premit depressing the priority */
+ if (entryp->aiocb.aio_reqprio < 0)
+ entryp->aiocb.aio_reqprio = 0;
+ if (entryp->aiocb.aio_reqprio > 0) {
+ entryp->priority -= entryp->aiocb.aio_reqprio;
+ if (entryp->priority < 0)
+ entryp->priority = 0;
+ }
+
+ /* Insertion sort the entry; lowest ->priority to highest */
+ TAILQ_FOREACH(my_entryp, &aio_anchor.aio_async_workq, aio_workq_link) {
+ if ( entryp->priority <= my_entryp->priority) {
+ TAILQ_INSERT_BEFORE(my_entryp, entryp, aio_workq_link);
+ break;
+ }
+ }
+ if (my_entryp == NULL)
+ TAILQ_INSERT_TAIL( &aio_anchor.aio_async_workq, entryp, aio_workq_link );
+#endif /* 0 */
+}
+
+
+/*
+ * lio_listio - initiate a list of IO requests. We process the list of
+ * aiocbs either synchronously (mode == LIO_WAIT) or asynchronously
+ * (mode == LIO_NOWAIT).
+ *
+ * The caller gets error and return status for each aiocb in the list
+ * via aio_error and aio_return. We must keep completed requests until
+ * released by the aio_return call.
+ */
+int
+lio_listio(proc_t p, struct lio_listio_args *uap, int *retval )
+{
+ int i;
+ int call_result;
+ int result;
+ int old_count;
+ aio_workq_entry **entryp_listp;
+ user_addr_t *aiocbpp;
+ struct user_sigevent aiosigev;
+ aio_lio_context *lio_context;
+ boolean_t free_context = FALSE;
+
+ KERNEL_DEBUG( (BSDDBG_CODE(DBG_BSD_AIO, AIO_listio)) | DBG_FUNC_START,