]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/miscfs/specfs/spec_vnops.c
xnu-1228.15.4.tar.gz
[apple/xnu.git] / bsd / miscfs / specfs / spec_vnops.c
index 752afe8df04fe8f4e47094aeaca7ec796f46ee2c..6c26b1799734bcec0b423c8747d8e095861e1962 100644 (file)
@@ -590,7 +590,6 @@ spec_fsync(struct vnop_fsync_args *ap)
  */
 extern int hard_throttle_on_root;
 void IOSleep(int);
-extern void throttle_lowpri_io(int *lowpri_window,mount_t v_mount);
 
 // the low priority process may wait for at most LOWPRI_MAX_DELAY millisecond
 #define LOWPRI_INITIAL_WINDOW_MSECS 100
@@ -599,6 +598,13 @@ extern void throttle_lowpri_io(int *lowpri_window,mount_t v_mount);
 #define LOWPRI_MAX_WAITING_MSECS 200
 #define LOWPRI_SLEEP_INTERVAL 5
 
+struct _throttle_io_info_t {
+       struct timeval  last_normal_IO_timestamp;
+       struct timeval  last_IO_timestamp;
+       SInt32 numthreads_throttling;
+};
+
+struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV];
 int    lowpri_IO_initial_window_msecs  = LOWPRI_INITIAL_WINDOW_MSECS;
 int    lowpri_IO_window_msecs_inc  = LOWPRI_WINDOW_MSECS_INC;
 int    lowpri_max_window_msecs  = LOWPRI_MAX_WINDOW_MSECS;
@@ -609,40 +615,100 @@ SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_window_inc, CTLFLAG_RW, &lowpri_IO_window
 SYSCTL_INT(_debug, OID_AUTO, lowpri_max_window_msecs, CTLFLAG_RW, &lowpri_max_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
 SYSCTL_INT(_debug, OID_AUTO, lowpri_max_waiting_msecs, CTLFLAG_RW, &lowpri_max_waiting_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
 
-void throttle_lowpri_io(int *lowpri_window,mount_t v_mount)
+void
+throttle_info_get_last_io_time(mount_t mp, struct timeval *tv)
+{
+       size_t devbsdunit;
+               
+       devbsdunit = mp->mnt_devbsdunit;
+
+       if (devbsdunit < LOWPRI_MAX_NUM_DEV) {
+               *tv = _throttle_io_info[devbsdunit].last_IO_timestamp;
+       } else {
+               memset(tv, 0, sizeof(*tv));
+       }
+}
+
+void
+update_last_io_time(mount_t mp)
+{
+       size_t devbsdunit;
+               
+       devbsdunit = mp->mnt_devbsdunit;
+
+       if (devbsdunit < LOWPRI_MAX_NUM_DEV) {
+               microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp);
+       }
+}
+
+int throttle_io_will_be_throttled(int lowpri_window_msecs, size_t devbsdunit)
 {
-       int i;
-       struct timeval last_lowpri_IO_timestamp,last_normal_IO_timestamp;
        struct timeval elapsed;
-       int lowpri_IO_window_msecs;
-       struct timeval lowpri_IO_window;
-       int max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL;
+       int elapsed_msecs;
 
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
-                    *lowpri_window, 0, 0, 0, 0);
+       microuptime(&elapsed);
+       timevalsub(&elapsed, &_throttle_io_info[devbsdunit].last_normal_IO_timestamp);
+       elapsed_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000;
 
-        last_normal_IO_timestamp = v_mount->last_normal_IO_timestamp;
-                        
-       for (i=0; i<max_try_num; i++) {
-               microuptime(&last_lowpri_IO_timestamp);
+       if (lowpri_window_msecs == -1) // use the max waiting time
+               lowpri_window_msecs = lowpri_max_waiting_msecs;
 
-               elapsed = last_lowpri_IO_timestamp;
-               timevalsub(&elapsed, &last_normal_IO_timestamp);
+       return elapsed_msecs < lowpri_window_msecs;
+}
 
-               lowpri_IO_window_msecs = *lowpri_window;
-               lowpri_IO_window.tv_sec  = lowpri_IO_window_msecs / 1000;
-               lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000;
+void throttle_lowpri_io(boolean_t ok_to_sleep)
+{
+       int i;
+       int max_try_num;
+       struct uthread *ut;
 
-               if (timevalcmp(&elapsed, &lowpri_IO_window, <)) {
-                       IOSleep(LOWPRI_SLEEP_INTERVAL);
-               } else {
-                       break;
+       ut = get_bsdthread_info(current_thread());
+
+       if (ut->uu_lowpri_window == 0)
+               return;
+
+       max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL * MAX(1, _throttle_io_info[ut->uu_devbsdunit].numthreads_throttling);
+
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
+                    ut->uu_lowpri_window, 0, 0, 0, 0);
+
+       if (ok_to_sleep == TRUE) {
+               for (i=0; i<max_try_num; i++) {
+                       if (throttle_io_will_be_throttled(ut->uu_lowpri_window, ut->uu_devbsdunit)) {
+                               IOSleep(LOWPRI_SLEEP_INTERVAL);
+                       } else {
+                               break;
+                       }
                }
        }
-
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END,
-                    *lowpri_window, i*5, 0, 0, 0);
-       *lowpri_window = 0;
+                    ut->uu_lowpri_window, i*5, 0, 0, 0);
+       SInt32 oldValue;
+       oldValue = OSDecrementAtomic(&_throttle_io_info[ut->uu_devbsdunit].numthreads_throttling);
+       ut->uu_lowpri_window = 0;
+
+       if (oldValue <= 0) {
+               panic("%s: numthreads negative", __func__);
+       }
+}
+
+int throttle_get_io_policy(struct uthread **ut)
+{
+       int policy = IOPOL_DEFAULT;
+       proc_t p = current_proc();
+
+       *ut = get_bsdthread_info(current_thread());
+               
+       if (p != NULL)
+               policy = p->p_iopol_disk;
+
+       if (*ut != NULL) {
+               // the I/O policy of the thread overrides that of the process
+               // unless the I/O policy of the thread is default
+               if ((*ut)->uu_iopol_disk != IOPOL_DEFAULT)
+                       policy = (*ut)->uu_iopol_disk;
+       }
+       return policy;
 }
 
 int
@@ -677,23 +743,14 @@ spec_strategy(struct vnop_strategy_args *ap)
                hard_throttle_on_root = 1;
 
        if (lowpri_IO_initial_window_msecs) {
-               proc_t  p;
                struct uthread  *ut;
-               int policy = IOPOL_DEFAULT;
+               int policy;
                int is_throttleable_io = 0;
                int is_passive_io = 0;
-               p = current_proc();
-               ut = get_bsdthread_info(current_thread());
-               
-               if (p != NULL)
-                       policy = p->p_iopol_disk;
-
-               if (ut != NULL) {
-                       // the I/O policy of the thread overrides that of the process
-                       // unless the I/O policy of the thread is default
-                       if (ut->uu_iopol_disk != IOPOL_DEFAULT)
-                               policy = ut->uu_iopol_disk;
-               }
+               size_t devbsdunit;
+               SInt32 oldValue;
+
+               policy = throttle_get_io_policy(&ut);
 
                switch (policy) {
                case IOPOL_DEFAULT:
@@ -713,9 +770,13 @@ spec_strategy(struct vnop_strategy_args *ap)
                if (!is_throttleable_io && ISSET(bflags, B_PASSIVE))
                    is_passive_io |= 1;
 
+               if (buf_vnode(bp)->v_mount != NULL)
+                       devbsdunit = buf_vnode(bp)->v_mount->mnt_devbsdunit;
+               else
+                       devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
                if (!is_throttleable_io) {
-                       if (!is_passive_io && buf_vnode(bp)->v_mount != NULL){
-                               microuptime(&(buf_vnode(bp)->v_mount->last_normal_IO_timestamp));
+                       if (!is_passive_io){
+                               microuptime(&_throttle_io_info[devbsdunit].last_normal_IO_timestamp);
                        }
                } else {
                        /*
@@ -728,17 +789,40 @@ spec_strategy(struct vnop_strategy_args *ap)
                         * do the delay just before we return from the system
                         * call that triggered this I/O or from vnode_pagein
                         */
-                       if(buf_vnode(bp)->v_mount != NULL)
-                                ut->v_mount = buf_vnode(bp)->v_mount;
                        if (ut->uu_lowpri_window == 0) {
+                               ut->uu_devbsdunit = devbsdunit;
+                               oldValue = OSIncrementAtomic(&_throttle_io_info[devbsdunit].numthreads_throttling);
+                               if (oldValue < 0) {
+                                       panic("%s: numthreads negative", __func__);
+                               }
                                ut->uu_lowpri_window = lowpri_IO_initial_window_msecs;
+                               ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * oldValue;
                        } else {
-                               ut->uu_lowpri_window += lowpri_IO_window_msecs_inc;
-                               if (ut->uu_lowpri_window > lowpri_max_window_msecs)
-                                       ut->uu_lowpri_window = lowpri_max_window_msecs;
+                               if (ut->uu_devbsdunit != devbsdunit) { // the thread sends I/Os to different devices within the same system call
+                                       // keep track of the numthreads in the right device
+                                       OSDecrementAtomic(&_throttle_io_info[ut->uu_devbsdunit].numthreads_throttling);
+                                       OSIncrementAtomic(&_throttle_io_info[devbsdunit].numthreads_throttling);
+                                       ut->uu_devbsdunit = devbsdunit;
+                               }
+                               int numthreads = MAX(1, _throttle_io_info[devbsdunit].numthreads_throttling);
+                               ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * numthreads;
+                               if (ut->uu_lowpri_window > lowpri_max_window_msecs * numthreads)
+                                       ut->uu_lowpri_window = lowpri_max_window_msecs * numthreads;
                        }
                }
        }
+
+       if ((bflags & B_READ) == 0) {
+               size_t devbsdunit;
+
+               if (buf_vnode(bp)->v_mount != NULL)
+                       devbsdunit = buf_vnode(bp)->v_mount->mnt_devbsdunit;
+               else
+                       devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
+               
+               microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp);
+       }
+
         (*bdevsw[major(bdev)].d_strategy)(bp);
 
         return (0);
@@ -827,7 +911,7 @@ spec_close(struct vnop_close_args *ap)
                 * sum of the reference counts on all the aliased
                 * vnodes descends to one, we are on last close.
                 */
-               if (vcount(vp) > 1)
+               if (vcount(vp) > 0)
                        return (0);
 #else /* DEVFS_IMPLEMENTS_LOCKING */
                /*
@@ -837,7 +921,7 @@ spec_close(struct vnop_close_args *ap)
                 * sum of the reference counts on all the aliased
                 * vnodes descends to one, we are on last close.
                 */
-               if (vcount(vp) > 1)
+               if (vcount(vp) > 0)
                        return (0);
 
                /*