xnu-1228.15.4.tar.gz

author Apple <opensource@apple.com>

Fri, 7 Aug 2009 20:02:49 +0000 (20:02 +0000)

committer Apple <opensource@apple.com>

Fri, 7 Aug 2009 20:02:49 +0000 (20:02 +0000)
author Apple <opensource@apple.com>
Fri, 7 Aug 2009 20:02:49 +0000 (20:02 +0000)
committer Apple <opensource@apple.com>
Fri, 7 Aug 2009 20:02:49 +0000 (20:02 +0000)
diff --git a/bsd/hfs/hfs.h b/bsd/hfs/hfs.h

index f3c12bb41e29e15473a84a243c2a0ef96aeb9d8b..5bb4ec9200f11a73b8a79bde7ef71da3e72ae494 100644 (file)
--- a/bsd/hfs/hfs.h
+++ b/bsd/hfs/hfs.h
@@ -46,6 +46,7 @@
  #include <sys/quota.h>
  #include <sys/dirent.h>
  #include <sys/event.h>
+#include <kern/thread_call.h>
  
  #include <kern/locks.h>
  
@@ -272,8 +273,34 @@ typedef struct hfsmount {
         /* Resize variables: */
         u_int32_t               hfs_resize_filesmoved;
         u_int32_t               hfs_resize_totalfiles;
+
+       /*
+        * About the sync counters:
+        * hfs_sync_scheduled  keeps track whether a timer was scheduled but we
+        *                     haven't started processing the callback (i.e. we
+        *                     haven't begun the flush).  This will be non-zero
+        *                     even if the callback has been invoked, before we
+        *                    start the flush.
+        * hfs_sync_incomplete keeps track of the number of callbacks that have
+        *                     not completed yet (including callbacks not yet
+        *                     invoked).  We cannot safely unmount until this
+        *                     drops to zero.
+        *
+        * In both cases, we use counters, not flags, so that we can avoid
+        * taking locks.
+        */
+       int32_t         hfs_sync_scheduled;
+       int32_t         hfs_sync_incomplete;
+       u_int64_t       hfs_last_sync_request_time;
+       u_int64_t       hfs_last_sync_time;
+       uint32_t        hfs_active_threads;
+       thread_call_t   hfs_syncer;           // removeable devices get sync'ed by this guy
+
  } hfsmount_t;
  
+#define HFS_META_DELAY     (100)
+#define HFS_MILLISEC_SCALE (1000*1000)
+
  typedef hfsmount_t  ExtendedVCB;
  
  /* Aliases for legacy (Mac OS 9) field names */
@@ -689,6 +716,7 @@ extern int  hfs_virtualmetafile(struct cnode *);
  
  extern int hfs_start_transaction(struct hfsmount *hfsmp);
  extern int hfs_end_transaction(struct hfsmount *hfsmp);
+extern void hfs_sync_ejectable(struct hfsmount *hfsmp);
  
  
  /*****************************************************************************
diff --git a/bsd/hfs/hfs_readwrite.c b/bsd/hfs/hfs_readwrite.c

index 1e836052f75bf15a6e9d6abd8dbc3c34d27a6d40..e5ab6c8b95491ac7f02b1649b200e236aa25d9b6 100644 (file)
--- a/bsd/hfs/hfs_readwrite.c
+++ b/bsd/hfs/hfs_readwrite.c
@@ -1836,12 +1836,20 @@ fail_change_next_allocation:
         }
  
         case HFS_GET_MOUNT_TIME:
-           return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
-           break;
+           if (is64bit) {
+               *(user_time_t *)(ap->a_data) = (user_time_t) hfsmp->hfs_mount_time;
+           } else {
+               *(time_t *)(ap->a_data) = (time_t) hfsmp->hfs_mount_time;
+           }
+               return 0;
  
         case HFS_GET_LAST_MTIME:
-           return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
-           break;
+           if (is64bit) {
+               *(user_time_t *)(ap->a_data) = (user_time_t) hfsmp->hfs_last_mounted_mtime;
+           } else {
+               *(time_t *)(ap->a_data) = (time_t) hfsmp->hfs_last_mounted_mtime;
+           }
+               return 0;
  
         case HFS_SET_BOOT_INFO:
                 if (!vnode_isvroot(vp))
diff --git a/bsd/hfs/hfs_vfsops.c b/bsd/hfs/hfs_vfsops.c

index b2e71a03485f9ac3355314cadc74ebc60818b42e..6f5c3eb5309bdc5abcb6120bb11834d067df19ca 100644 (file)
--- a/bsd/hfs/hfs_vfsops.c
+++ b/bsd/hfs/hfs_vfsops.c
@@ -827,6 +827,99 @@ hfs_reload(struct mount *mountp)
         return (0);
  }
  
+int hfs_last_io_wait_time = 125000;
+SYSCTL_INT (_kern, OID_AUTO, hfs_last_io_wait_time, CTLFLAG_RW, &hfs_last_io_wait_time, 0, "number of usecs to wait after an i/o before syncing ejectable media");
+
+static void
+hfs_syncer(void *arg0, void *unused)
+{
+#pragma unused(unused)
+
+    struct hfsmount *hfsmp = arg0;
+    uint32_t secs, usecs, delay = HFS_META_DELAY;
+    uint64_t now;
+    struct timeval nowtv, last_io;
+
+    clock_get_calendar_microtime(&secs, &usecs);
+    now = ((uint64_t)secs * 1000000LL) + usecs;
+    //
+    // If we have put off the last sync for more than
+    // 5 seconds, force it so that we don't let too
+    // much i/o queue up (since flushing the journal
+    // causes the i/o queue to drain)
+    //
+    if ((now - hfsmp->hfs_last_sync_time) >= 5000000LL) {
+           goto doit;
+    }
+
+    //
+    // Find out when the last i/o was done to this device (read or write).  
+    //
+    throttle_info_get_last_io_time(hfsmp->hfs_mp, &last_io);
+    microuptime(&nowtv);
+    timevalsub(&nowtv, &last_io);
+
+    //
+    // If the last i/o was too recent, defer this sync until later.
+    // The limit chosen (125 milli-seconds) was picked based on
+    // some experiments copying data to an SD card and seems to
+    // prevent us from issuing too many syncs.
+    //
+    if (nowtv.tv_sec >= 0 && nowtv.tv_usec > 0 && nowtv.tv_usec < hfs_last_io_wait_time) {
+           delay /= 2;
+           goto resched;
+    }
+    
+    //
+    // If there's pending i/o, also skip the sync.
+    //
+    if (hfsmp->hfs_devvp && hfsmp->hfs_devvp->v_numoutput > 0) {
+           goto resched;
+    }
+
+               
+    //
+    // Only flush the journal if we have not sync'ed recently
+    // and the last sync request time was more than 100 milli
+    // seconds ago and there is no one in the middle of a
+    // transaction right now.  Else we defer the sync and
+    // reschedule it for later.
+    //
+    if (  ((now - hfsmp->hfs_last_sync_time) >= 100000LL)
+       && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
+       && (hfsmp->hfs_active_threads == 0)
+       && (hfsmp->hfs_global_lock_nesting == 0)) {
+
+    doit:
+           OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
+           if (hfsmp->jnl) {
+                   journal_flush(hfsmp->jnl);
+           }
+           OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
+                  
+           clock_get_calendar_microtime(&secs, &usecs);
+           hfsmp->hfs_last_sync_time = ((int64_t)secs * 1000000) + usecs;
+           
+    } else if (hfsmp->hfs_active_threads == 0) {
+           uint64_t deadline;
+
+    resched:
+           clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
+           thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
+           return;
+    }
+           
+    //
+    // NOTE: we decrement these *after* we're done the journal_flush() since
+    // it can take a significant amount of time and so we don't want more
+    // callbacks scheduled until we're done this one.
+    //
+    OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+    OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
+    wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
+}
+
+extern int IOBSDIsMediaEjectable( const char *cdev_name );
  
  /*
   * Common code for mount and mountroot
@@ -855,12 +948,18 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
         u_int32_t iswritable;
         daddr64_t mdb_offset;
         int isvirtual = 0;
+       int isroot = 0;
  
         ronly = vfs_isrdonly(mp);
         dev = vnode_specrdev(devvp);
         cred = p ? vfs_context_ucred(context) : NOCRED;
         mntwrapper = 0;
  
+       if (args == NULL) {
+               /* only hfs_mountroot passes us NULL as the 'args' argument */
+               isroot = 1;     
+       }
+
         bp = NULL;
         hfsmp = NULL;
         mdbp = NULL;
@@ -1379,6 +1478,18 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                 }
         }
  
+       /* ejectability checks will time out when the device is root_device, so skip them */
+       if (isroot == 0) {
+               if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 && 
+                               IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
+                       hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
+                       if (hfsmp->hfs_syncer == NULL) {
+                               printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
+                                               mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
+                       }
+               }
+       }
+
         /*
          * Start looking for free space to drop below this level and generate a
          * warning immediately if needed:
@@ -1451,6 +1562,38 @@ hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
         if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
                 (void) hfs_recording_suspend(hfsmp);
  
+       /*
+        * Cancel any pending timers for this volume.  Then wait for any timers
+        * which have fired, but whose callbacks have not yet completed.
+        */
+       if (hfsmp->hfs_syncer)
+       {
+               struct timespec ts = {0, 100000000};    /* 0.1 seconds */
+               
+               /*
+                * Cancel any timers that have been scheduled, but have not
+                * fired yet.  NOTE: The kernel considers a timer complete as
+                * soon as it starts your callback, so the kernel does not
+                * keep track of the number of callbacks in progress.
+                */
+               if (thread_call_cancel(hfsmp->hfs_syncer))
+                       OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
+               thread_call_free(hfsmp->hfs_syncer);
+               hfsmp->hfs_syncer = NULL;
+               
+               /*
+                * This waits for all of the callbacks that were entered before
+                * we did thread_call_cancel above, but have not completed yet.
+                */
+               while(hfsmp->hfs_sync_incomplete > 0)
+               {
+                       msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
+               }
+               
+               if (hfsmp->hfs_sync_incomplete < 0)
+                       printf("hfs_unmount: pm_sync_incomplete underflow (%d)!\n", hfsmp->hfs_sync_incomplete);
+       }
+       
         /*
          * Flush out the b-trees, volume bitmap and Volume Header
          */
@@ -1931,6 +2074,15 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
             journal_flush(hfsmp->jnl);
         }
  
+       {
+               uint32_t secs, usecs;
+               uint64_t now;
+
+               clock_get_calendar_microtime(&secs, &usecs);
+               now = ((uint64_t)secs * 1000000LL) + usecs;
+               hfsmp->hfs_last_sync_time = now;
+       }
+
         lck_rw_unlock_shared(&hfsmp->hfs_insync);       
         return (allerror);
  }
diff --git a/bsd/hfs/hfs_vfsutils.c b/bsd/hfs/hfs_vfsutils.c

index ce577ec74165a91d876ad56cfc7e3aef5e61b3b1..d6dc1e356f0e7736a345ded924c12a7360485c45 100644 (file)
--- a/bsd/hfs/hfs_vfsutils.c
+++ b/bsd/hfs/hfs_vfsutils.c
@@ -2347,6 +2347,46 @@ hfs_virtualmetafile(struct cnode *cp)
  }
  
  
+
+//
+// Fire off a timed callback to sync the disk if the
+// volume is on ejectable media.
+//
+ __private_extern__
+void
+hfs_sync_ejectable(struct hfsmount *hfsmp)
+{
+       if (hfsmp->hfs_syncer)  {
+               uint32_t secs, usecs;
+               uint64_t now;
+
+               clock_get_calendar_microtime(&secs, &usecs);
+               now = ((uint64_t)secs * 1000000) + usecs;
+
+               if (hfsmp->hfs_sync_scheduled == 0) {
+                       uint64_t deadline;
+
+                       hfsmp->hfs_last_sync_request_time = now;
+
+                       clock_interval_to_deadline(HFS_META_DELAY, HFS_MILLISEC_SCALE, &deadline);
+
+                       /*
+                        * Increment hfs_sync_scheduled on the assumption that we're the
+                        * first thread to schedule the timer.  If some other thread beat
+                        * us, then we'll decrement it.  If we *were* the first to
+                        * schedule the timer, then we need to keep track that the
+                        * callback is waiting to complete.
+                        */
+                       OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+                       if (thread_call_enter_delayed(hfsmp->hfs_syncer, deadline))
+                               OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
+                       else
+                               OSIncrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
+               }               
+       }
+}
+
+
  __private_extern__
  int
  hfs_start_transaction(struct hfsmount *hfsmp)
@@ -2374,6 +2414,7 @@ hfs_start_transaction(struct hfsmount *hfsmp)
  
      if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
         lck_rw_lock_shared(&hfsmp->hfs_global_lock);
+       OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
         unlock_on_err = 1;
      }
  
@@ -2399,6 +2440,7 @@ hfs_start_transaction(struct hfsmount *hfsmp)
  out:
      if (ret != 0 && unlock_on_err) {
         lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+       OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
      }
  
      return ret;
@@ -2424,7 +2466,9 @@ hfs_end_transaction(struct hfsmount *hfsmp)
      }
  
      if (need_unlock) {
+       OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
         lck_rw_unlock_shared(&hfsmp->hfs_global_lock);
+       hfs_sync_ejectable(hfsmp);
      }
  
      return ret;
diff --git a/bsd/hfs/hfs_vnops.c b/bsd/hfs/hfs_vnops.c

index 61875f626671835c2aa78314b1c8d2ac58ae195a..6d8d6ad33d8f9c5f9039c1a02e64eabd19c91136 100644 (file)
--- a/bsd/hfs/hfs_vnops.c
+++ b/bsd/hfs/hfs_vnops.c
@@ -367,6 +367,11 @@ hfs_vnop_close(ap)
         }
         
         hfs_unlock(cp);
+
+       if (ap->a_fflag & FWASWRITTEN) {
+               hfs_sync_ejectable(hfsmp);
+       }
+
         return (0);
  }
  
@@ -2619,6 +2624,16 @@ hfs_vnop_rename(ap)
  skip_rm:
         /*
          * All done with tvp and fvp
+        *
+        * We also jump to this point if there was no destination observed during lookup and namei.
+        * However, because only iocounts are held at the VFS layer, there is nothing preventing a 
+        * competing thread from racing us and creating a file or dir at the destination of this rename 
+        * operation.  If this occurs, it may cause us to get a spurious EEXIST out of the cat_rename 
+        * call below.  To preserve rename's atomicity, we need to signal VFS to re-drive the 
+        * namei/lookup and restart the rename operation.  EEXIST is an allowable errno to be bubbled 
+        * out of the rename syscall, but not for this reason, since it is a synonym errno for ENOTEMPTY.
+        * To signal VFS, we return ERECYCLE (which is also used for lookup restarts). This errno
+        * will be swallowed and it will restart the operation.
          */
  
         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
@@ -2626,6 +2641,9 @@ skip_rm:
         hfs_systemfile_unlock(hfsmp, lockflags);
  
         if (error) {
+               if (error == EEXIST) {
+                       error = ERECYCLE;
+               }
                 goto out;
         }
  
diff --git a/bsd/kern/kern_descrip.c b/bsd/kern/kern_descrip.c

index d7711096fb515efe1e39a8200458e54ce2adc8c9..f7bd0e5d9c524bd4add64ec3e226eaf92527a57e 100644 (file)
--- a/bsd/kern/kern_descrip.c
+++ b/bsd/kern/kern_descrip.c
@@ -106,6 +106,8 @@
  
  #include <sys/ubc_internal.h>
  
+#include <hfs/hfs.h>   /* <rdar://7042269 manifest constants */
+
  struct psemnode;
  struct pshmnode;
  
@@ -593,6 +595,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval)
         int devBlockSize = 0;
         unsigned int fflag;
         user_addr_t argp;
+       boolean_t is64bit;
  
         AUDIT_ARG(fd, uap->fd);
         AUDIT_ARG(cmd, uap->cmd);
@@ -604,7 +607,9 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval)
         }
         context.vc_thread = current_thread();
         context.vc_ucred = fp->f_cred;
-       if (proc_is64bit(p)) {
+
+       is64bit = proc_is64bit(p);
+       if (is64bit) {
                 argp = uap->arg;
         }
         else {
@@ -1482,13 +1487,17 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval)
         }
  
         default:
-               if (uap->cmd < FCNTL_FS_SPECIFIC_BASE) {
-                       error = EINVAL;
+               /*
+                * This is an fcntl() that we d not recognize at this level;
+                * if this is a vnode, we send it down into the VNOP_IOCTL
+                * for this vnode; this can include special devices, and will
+                * effectively overload fcntl() to send ioctl()'s.
+                */
+               if((uap->cmd & IOC_VOID) && (uap->cmd & IOC_INOUT)){
+                       error = EINVAL;
                         goto out;
                 }
-
-               // if it's a fs-specific fcntl() then just pass it through
-
+               
                 if (fp->f_type != DTYPE_VNODE) {
                         error = EBADF;
                         goto out;
@@ -1497,12 +1506,103 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval)
                 proc_fdunlock(p);
  
                 if ( (error = vnode_getwithref(vp)) == 0 ) {
-                       error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, argp), 0, &context);
+#define STK_PARAMS 128
+                       char stkbuf[STK_PARAMS];
+                       unsigned int size;
+                       caddr_t data, memp;
+                       int fix_cmd = uap->cmd;
+
+                       /*
+                        * For this to work properly, we have to copy in the
+                        * ioctl() cmd argument if there is one; we must also
+                        * check that a command parameter, if present, does
+                        * not exceed the maximum command length dictated by
+                        * the number of bits we have available in the command
+                        * to represent a structure length.  Finally, we have
+                        * to copy the results back out, if it is that type of
+                        * ioctl().
+                        */
+                       size = IOCPARM_LEN(uap->cmd);
+                       if (size > IOCPARM_MAX) {
+                               (void)vnode_put(vp);
+                               error = EINVAL;
+                               break;
+                       }
+
+                       /*
+                        * <rdar://7042269> fix up the command we should have
+                        * received via fcntl with one with a valid size and
+                        * copy out argument.
+                        */
+                       if (fix_cmd == HFS_GET_MOUNT_TIME ||
+                           fix_cmd == HFS_GET_LAST_MTIME) {
+                               if (is64bit)
+                                       size = sizeof(user_time_t);
+                               else
+                                       size = sizeof(time_t);
+                               fix_cmd |= IOC_OUT;
+                       }
+
+                       memp = NULL;
+                       if (size > sizeof (stkbuf)) {
+                               if ((memp = (caddr_t)kalloc(size)) == 0) {
+                                       (void)vnode_put(vp);
+                                       error = ENOMEM;
+                               }
+                               data = memp;
+                       } else {
+                               data = &stkbuf[0];
+                       }
+                       
+                       if (fix_cmd & IOC_IN) {
+                               if (size) {
+                                       /* structure */
+                                       error = copyin(argp, data, size);
+                                       if (error) {
+                                               (void)vnode_put(vp);
+                                               if (memp)
+                                                       kfree(memp, size);
+                                               goto outdrop;
+                                       }
+                               } else {
+                                       /* int */
+                                       if (is64bit) {
+                                               *(user_addr_t *)data = argp;
+                                       } else {
+                                               *(uint32_t *)data = (uint32_t)argp;
+                                       }
+                               };
+                       } else if ((fix_cmd & IOC_OUT) && size) {
+                               /*
+                                * Zero the buffer so the user always
+                                * gets back something deterministic.
+                                */
+                               bzero(data, size);
+                       } else if (fix_cmd & IOC_VOID) {
+                               if (is64bit) {
+                                   *(user_addr_t *)data = argp;
+                               } else {
+                                   *(uint32_t *)data = (uint32_t)argp;
+                               }
+                       }
+
+                       /*
+                        * <rdar://7042269> We pass the unmodified uap->cmd
+                        * to the underlying VNOP so that we don't confuse it;
+                        * but we are going to handle its copyout() when it
+                        * gets back.
+                        */
+                       error = VNOP_IOCTL(vp, uap->cmd, CAST_DOWN(caddr_t, data), 0, &context);
  
                         (void)vnode_put(vp);
+
+                       /* Copy any output data to user */
+                       if (error == 0 && (fix_cmd & IOC_OUT) && size) 
+                               error = copyout(data, argp, size);
+                       if (memp)
+                               kfree(memp, size);
                 }
                 break;
-       
         }
  
  outdrop:
@@ -3871,9 +3971,12 @@ closef_locked(struct fileproc *fp, struct fileglob *fg, proc_t p)
         fg->fg_lflags |= FG_TERM;
         lck_mtx_unlock(&fg->fg_lock);
  
-       proc_fdunlock(p);
+       if (p)
+               proc_fdunlock(p);
         error = closef_finish(fp, fg, p, &context);
-       proc_fdlock(p);
+
+       if (p)
+               proc_fdlock(p);
  
         return(error);
  }
diff --git a/bsd/kern/uipc_usrreq.c b/bsd/kern/uipc_usrreq.c

index 9b568d24c65b22e5b8dadf849b64e5902ba2789e..4c1b6c6ebc579531d9ea3bd23da528d0b1df1846 100644 (file)
--- a/bsd/kern/uipc_usrreq.c
+++ b/bsd/kern/uipc_usrreq.c
@@ -132,7 +132,7 @@ static int  unp_connect(struct socket *, struct sockaddr *, proc_t);
  static void    unp_disconnect(struct unpcb *);
  static void    unp_shutdown(struct unpcb *);
  static void    unp_drop(struct unpcb *, int);
-static void    unp_gc(void);
+__private_extern__ void        unp_gc(void);
  static void    unp_scan(struct mbuf *, void (*)(struct fileglob *));
  static void    unp_mark(struct fileglob *);
  static void    unp_discard(struct fileglob *);
@@ -749,7 +749,11 @@ unp_detach(struct unpcb *unp)
                  * gets them (resulting in a "panic: closef: count < 0").
                  */
                 sorflush(unp->unp_socket);
+
+               /* Per domain mutex deadlock avoidance */
+               socket_unlock(unp->unp_socket, 0);
                 unp_gc();
+               socket_lock(unp->unp_socket, 0);
         }
         if (unp->unp_addr)
                 FREE(unp->unp_addr, M_SONAME);
@@ -1362,11 +1366,15 @@ unp_internalize(struct mbuf *control, proc_t p)
  }
  
  static int     unp_defer, unp_gcing, unp_gcwait;
+static thread_t unp_gcthread = NULL;
  
  /* always called under uipc_lock */
  void
  unp_gc_wait(void)
  {
+       if (unp_gcthread == current_thread())
+               return;
+
         while (unp_gcing != 0) {
                 unp_gcwait = 1;
                 msleep(&unp_gcing, uipc_lock, 0 , "unp_gc_wait", NULL);
@@ -1374,12 +1382,13 @@ unp_gc_wait(void)
  }
  
  
-static void
+__private_extern__ void
  unp_gc(void)
  {
         struct fileglob *fg, *nextfg;
         struct socket *so;
-       struct fileglob **extra_ref, **fpp;
+       static struct fileglob **extra_ref;
+        struct fileglob **fpp;
         int nunref, i;
         int need_gcwakeup = 0;
  
@@ -1390,6 +1399,7 @@ unp_gc(void)
         }
         unp_gcing = 1;
         unp_defer = 0;
+       unp_gcthread = current_thread();
         lck_mtx_unlock(uipc_lock);
         /*
          * before going through all this, set all FDs to
@@ -1484,9 +1494,13 @@ unp_gc(void)
                          * to see if we hold any file descriptors in its
                          * message buffers. Follow those links and mark them
                          * as accessible too.
+                        *
+                        * In case a file is passed onto itself we need to
+                        * release the file lock.
                          */
-                       unp_scan(so->so_rcv.sb_mb, unp_mark);
                         lck_mtx_unlock(&fg->fg_lock);
+
+                       unp_scan(so->so_rcv.sb_mb, unp_mark);
                 }
         } while (unp_defer);
         /*
@@ -1564,20 +1578,13 @@ unp_gc(void)
                 tfg = *fpp;
  
                 if (tfg->fg_type == DTYPE_SOCKET && tfg->fg_data != NULL) {
-                       int locked = 0;
-
                         so = (struct socket *)(tfg->fg_data);
  
-                       /* XXXX */
-                       /* Assume local sockets use a global lock */
-                       if (so->so_proto->pr_domain->dom_family != PF_LOCAL) {
-                               socket_lock(so, 0);
-                               locked = 1;
-                       }
+                       socket_lock(so, 0);
+
                         sorflush(so);
  
-                       if (locked)
-                               socket_unlock(so, 0);
+                       socket_unlock(so, 0);
                 }
         }
         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
@@ -1585,6 +1592,7 @@ unp_gc(void)
  
          lck_mtx_lock(uipc_lock);
         unp_gcing = 0;
+       unp_gcthread = NULL;
  
         if (unp_gcwait != 0) {
                 unp_gcwait = 0;
diff --git a/bsd/miscfs/specfs/spec_vnops.c b/bsd/miscfs/specfs/spec_vnops.c

index aad1b0250e1d9f5cad5ca0c166c6c54ef09c82db..6c26b1799734bcec0b423c8747d8e095861e1962 100644 (file)
--- a/bsd/miscfs/specfs/spec_vnops.c
+++ b/bsd/miscfs/specfs/spec_vnops.c
@@ -600,6 +600,7 @@ void IOSleep(int);
  
  struct _throttle_io_info_t {
         struct timeval  last_normal_IO_timestamp;
+       struct timeval  last_IO_timestamp;
         SInt32 numthreads_throttling;
  };
  
@@ -614,6 +615,32 @@ SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_window_inc, CTLFLAG_RW, &lowpri_IO_window
  SYSCTL_INT(_debug, OID_AUTO, lowpri_max_window_msecs, CTLFLAG_RW, &lowpri_max_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
  SYSCTL_INT(_debug, OID_AUTO, lowpri_max_waiting_msecs, CTLFLAG_RW, &lowpri_max_waiting_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
  
+void
+throttle_info_get_last_io_time(mount_t mp, struct timeval *tv)
+{
+       size_t devbsdunit;
+               
+       devbsdunit = mp->mnt_devbsdunit;
+
+       if (devbsdunit < LOWPRI_MAX_NUM_DEV) {
+               *tv = _throttle_io_info[devbsdunit].last_IO_timestamp;
+       } else {
+               memset(tv, 0, sizeof(*tv));
+       }
+}
+
+void
+update_last_io_time(mount_t mp)
+{
+       size_t devbsdunit;
+               
+       devbsdunit = mp->mnt_devbsdunit;
+
+       if (devbsdunit < LOWPRI_MAX_NUM_DEV) {
+               microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp);
+       }
+}
+
  int throttle_io_will_be_throttled(int lowpri_window_msecs, size_t devbsdunit)
  {
         struct timeval elapsed;
@@ -784,6 +811,18 @@ spec_strategy(struct vnop_strategy_args *ap)
                         }
                 }
         }
+
+       if ((bflags & B_READ) == 0) {
+               size_t devbsdunit;
+
+               if (buf_vnode(bp)->v_mount != NULL)
+                       devbsdunit = buf_vnode(bp)->v_mount->mnt_devbsdunit;
+               else
+                       devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
+               
+               microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp);
+       }
+
          (*bdevsw[major(bdev)].d_strategy)(bp);
  
          return (0);
diff --git a/bsd/netat/atp_read.c b/bsd/netat/atp_read.c

index a3853eae9798f070f4de8862823d3a34b3c5031b..940a58ae3784c49c9ff7ffc6692b92e429af7d4e 100644 (file)
--- a/bsd/netat/atp_read.c
+++ b/bsd/netat/atp_read.c
@@ -58,6 +58,8 @@
  #include <netat/asp.h>
  #include <netat/debug.h>
  
+__private_extern__ int atp_resp_seqno2big = 0;
+
  static void atp_trans_complete(struct atp_trans *);
  void atp_x_done_locked(void *);
  void atp_treq_event(void *);
@@ -139,8 +141,8 @@ gbuf_t   *m;
             case ATP_CMD_TRESP:
             {   
                 register struct atp_trans *trp;
-               register int    seqno;
-           register at_ddp_t       *ddp;
+               register unsigned int    seqno;
+               register at_ddp_t       *ddp;
  
                 /*
                  * we just got a response, find the trans record
@@ -155,10 +157,20 @@ gbuf_t   *m;
                  *      If we can't find one then ignore the message
                  */
                 seqno = athp->bitmap;
+               if (seqno > 7) {
+                       atp_resp_seqno2big++;
+                       ddp = AT_DDP_HDR(m);
+                       dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR),
+                               ("atp_rput: dropping TRESP seqno too big, tid=%d,loc=%d,rem=%d.%d,seqno=%u\n",
+                                UAS_VALUE_NTOH(athp->tid),
+                                ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno));
+                       gbuf_freem(m);
+                       return;
+               }
                 if (trp == NULL) {
                 ddp = AT_DDP_HDR(m);
                     dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR),
-               ("atp_rput: dropping TRESP, no trp,tid=%d,loc=%d,rem=%d.%d,seqno=%d\n",
+               ("atp_rput: dropping TRESP, no trp,tid=%d,loc=%d,rem=%d.%d,seqno=%u\n",
                             UAS_VALUE_NTOH(athp->tid),
                             ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno));
                     gbuf_freem(m);
@@ -184,7 +196,7 @@ gbuf_t   *m;
                 if (!(trp->tr_bitmap&atp_mask[seqno]) || trp->tr_rcv[seqno]) {
                 ddp = AT_DDP_HDR(m);
                     dPrintf(D_M_ATP_LOW, (D_L_INPUT|D_L_ERROR),
-               ("atp_rput: dropping TRESP, duplicate,tid=%d,loc=%d,rem=%d.%d,seqno=%d\n",
+               ("atp_rput: dropping TRESP, duplicate,tid=%d,loc=%d,rem=%d.%d,seqno=%u\n",
                             UAS_VALUE_NTOH(athp->tid),
                             ddp->dst_socket, ddp->src_node, ddp->src_socket, seqno));
                     gbuf_freem(m);
diff --git a/bsd/netat/sys_glue.c b/bsd/netat/sys_glue.c

index a1d2a402c382231313755a18a7665578758a7f0f..dd22563be127fa0bec1feb885af98ba84483333c 100644 (file)
--- a/bsd/netat/sys_glue.c
+++ b/bsd/netat/sys_glue.c
@@ -99,6 +99,9 @@ SYSCTL_INT(_net_appletalk, OID_AUTO, routermix, CTLFLAG_WR,
  at_ddp_stats_t at_ddp_stats;           /* DDP statistics */
  SYSCTL_STRUCT(_net_appletalk, OID_AUTO, ddpstats, CTLFLAG_RD,
               &at_ddp_stats, at_ddp_stats, "AppleTalk DDP Stats");
+extern int atp_resp_seqno2big;
+SYSCTL_INT(_net_appletalk, OID_AUTO, atp_resp_seqno2big, CTLFLAG_RD,
+                        &atp_resp_seqno2big, 0, "Appletalk ATP seqno too big count");
  
  static void ioccmd_t_32_to_64( ioccmd_t *from_p, user_ioccmd_t *to_p );
  static void ioccmd_t_64_to_32( user_ioccmd_t *from_p, ioccmd_t *to_p );
diff --git a/bsd/netinet6/icmp6.c b/bsd/netinet6/icmp6.c

index e847a3319315948156885609d7214df9598e5644..db0662895ff1a6e277812f6ec6fb28eacdbaab52 100644 (file)
--- a/bsd/netinet6/icmp6.c
+++ b/bsd/netinet6/icmp6.c
@@ -121,7 +121,8 @@ static struct timeval icmp6errppslim_last;
  extern int icmp6_nodeinfo;
  extern struct inpcbinfo ripcbinfo;
  extern lck_mtx_t *ip6_mutex; 
-extern lck_mtx_t *nd6_mutex; 
+extern lck_mtx_t *nd6_mutex;
+extern lck_mtx_t *inet6_domain_mutex;
  
  static void icmp6_errcount(struct icmp6errstat *, int, int);
  static int icmp6_rip6_input(struct mbuf **, int);
@@ -515,8 +516,15 @@ icmp6_input(mp, offp)
                 icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
                 if (code != 0)
                         goto badcode;
+
+               if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+                       icmp6stat.icp6s_toofreq++;
+                       goto freeit;
+               }
+
                 if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
                         /* Give up remote */
+                       goto rate_limit_checked;
                         break;
                 }
                 if ((n->m_flags & M_EXT) != 0
@@ -531,6 +539,7 @@ icmp6_input(mp, offp)
                         if (maxlen >= MCLBYTES) {
                                 /* Give up remote */
                                 m_freem(n0);
+                               goto rate_limit_checked;
                                 break;
                         }
                         MGETHDR(n, M_DONTWAIT, n0->m_type);     /* MAC-OK */
@@ -544,6 +553,7 @@ icmp6_input(mp, offp)
                         if (n == NULL) {
                                 /* Give up remote */
                                 m_freem(n0);
+                               goto rate_limit_checked;
                                 break;
                         }
                         M_COPY_PKTHDR(n, n0);
@@ -578,6 +588,7 @@ icmp6_input(mp, offp)
                         icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
                         icmp6_reflect(n, noff);
                 }
+               goto rate_limit_checked;
                 break;
  
         case ICMP6_ECHO_REPLY:
@@ -594,6 +605,12 @@ icmp6_input(mp, offp)
                         icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
                 else
                         icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
+
+               if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+                       icmp6stat.icp6s_toofreq++;
+                       goto freeit;
+               }
+
                 if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                         /* give up local */
                         mld6_input(m, off);
@@ -602,6 +619,7 @@ icmp6_input(mp, offp)
                 }
                 mld6_input(n, off);
                 /* m stays. */
+               goto rate_limit_checked;
                 break;
  
         case MLD6_LISTENER_DONE:
@@ -631,6 +649,11 @@ icmp6_input(mp, offp)
                 IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
                                  return IPPROTO_DONE);
  #endif
+               if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+                       icmp6stat.icp6s_toofreq++;
+                       goto freeit;
+               }
+
                 n = m_copy(m, 0, M_COPYALL);
                 if (n)
                         n = ni6_input(n, off);
@@ -640,6 +663,7 @@ icmp6_input(mp, offp)
                         icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++;
                         icmp6_reflect(n, noff);
                 }
+               goto rate_limit_checked;
                 break;
  
         case ICMP6_WRUREPLY:
@@ -653,6 +677,12 @@ icmp6_input(mp, offp)
                         goto badcode;
                 if (icmp6len < sizeof(struct nd_router_solicit))
                         goto badlen;
+
+               if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+                       icmp6stat.icp6s_toofreq++;
+                       goto freeit;
+               }
+
                 if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                         /* give up local */
                         nd6_rs_input(m, off, icmp6len);
@@ -661,6 +691,7 @@ icmp6_input(mp, offp)
                 }
                 nd6_rs_input(n, off, icmp6len);
                 /* m stays. */
+               goto rate_limit_checked;
                 break;
  
         case ND_ROUTER_ADVERT:
@@ -669,6 +700,12 @@ icmp6_input(mp, offp)
                         goto badcode;
                 if (icmp6len < sizeof(struct nd_router_advert))
                         goto badlen;
+
+               if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+                       icmp6stat.icp6s_toofreq++;
+                       goto freeit;
+               }
+
                 if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                         /* give up local */
                         nd6_ra_input(m, off, icmp6len);
@@ -677,6 +714,7 @@ icmp6_input(mp, offp)
                 }
                 nd6_ra_input(n, off, icmp6len);
                 /* m stays. */
+               goto rate_limit_checked;
                 break;
  
         case ND_NEIGHBOR_SOLICIT:
@@ -685,6 +723,12 @@ icmp6_input(mp, offp)
                         goto badcode;
                 if (icmp6len < sizeof(struct nd_neighbor_solicit))
                         goto badlen;
+
+               if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+                       icmp6stat.icp6s_toofreq++;
+                       goto freeit;
+               }
+
                 if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                         /* give up local */
                         nd6_ns_input(m, off, icmp6len);
@@ -693,6 +737,7 @@ icmp6_input(mp, offp)
                 }
                 nd6_ns_input(n, off, icmp6len);
                 /* m stays. */
+               goto rate_limit_checked;
                 break;
  
         case ND_NEIGHBOR_ADVERT:
@@ -701,6 +746,12 @@ icmp6_input(mp, offp)
                         goto badcode;
                 if (icmp6len < sizeof(struct nd_neighbor_advert))
                         goto badlen;
+
+               if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+                       icmp6stat.icp6s_toofreq++;
+                       goto freeit;
+               }
+
                 if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                         /* give up local */
                         nd6_na_input(m, off, icmp6len);
@@ -709,6 +760,7 @@ icmp6_input(mp, offp)
                 }
                 nd6_na_input(n, off, icmp6len);
                 /* m stays. */
+               goto rate_limit_checked;
                 break;
  
         case ND_REDIRECT:
@@ -717,6 +769,12 @@ icmp6_input(mp, offp)
                         goto badcode;
                 if (icmp6len < sizeof(struct nd_redirect))
                         goto badlen;
+
+               if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+                       icmp6stat.icp6s_toofreq++;
+                       goto freeit;
+               }
+
                 if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
                         /* give up local */
                         icmp6_redirect_input(m, off);
@@ -725,6 +783,7 @@ icmp6_input(mp, offp)
                 }
                 icmp6_redirect_input(n, off);
                 /* m stays. */
+               goto rate_limit_checked;
                 break;
  
         case ICMP6_ROUTER_RENUMBERING:
@@ -736,6 +795,11 @@ icmp6_input(mp, offp)
                 break;
  
         default:
+               if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+                       icmp6stat.icp6s_toofreq++;
+                       goto freeit;
+               }
+
                 nd6log((LOG_DEBUG,
                     "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
                     icmp6->icmp6_type, ip6_sprintf(&ip6->ip6_src),
@@ -747,9 +811,15 @@ icmp6_input(mp, offp)
                         /* deliver */
                 } else {
                         /* ICMPv6 informational: MUST not deliver */
+                       goto rate_limit_checked;
                         break;
                 }
         deliver:
+               if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+                       icmp6stat.icp6s_toofreq++;
+                       goto freeit;
+               }
+
                 if (icmp6_notify_error(m, off, icmp6len, code)) {
                         /* In this case, m should've been freed. */
                         return(IPPROTO_DONE);
@@ -765,6 +835,11 @@ icmp6_input(mp, offp)
                 break;
         }
  
+       if (icmp6_ratelimit(&ip6->ip6_dst, icmp6->icmp6_type, code)) {
+               icmp6stat.icp6s_toofreq++;
+               goto freeit;
+       }
+rate_limit_checked:
         /* deliver the packet to appropriate sockets */
         icmp6_rip6_input(&m, *offp);
  
@@ -2331,7 +2406,16 @@ icmp6_redirect_input(m, off)
         sdst.sin6_family = AF_INET6;
         sdst.sin6_len = sizeof(struct sockaddr_in6);
         bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
+       
+       /*
+         * Radar 6843900
+        * Release the IPv6 domain lock because we are going to take domain_proto_mtx
+        * and could otherwise cause a deadlock with other threads taking these locks 
+        * in the reverse order -- e.g. frag6_slowtimo() from pfslowtimo()
+        */
+       lck_mtx_unlock(inet6_domain_mutex);
         pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
+       lck_mtx_lock(inet6_domain_mutex);
  #if IPSEC
         key_sa_routechange((struct sockaddr *)&sdst);
  #endif
diff --git a/bsd/netinet6/in6.h b/bsd/netinet6/in6.h

index 29188674f33eaedf0249584e22785a5d57409aa8..a84715b19260ad635b1d866b286b67be2b47a5c9 100644 (file)
--- a/bsd/netinet6/in6.h
+++ b/bsd/netinet6/in6.h
@@ -545,9 +545,14 @@ struct in6_pktinfo {
  
  #define IPV6CTL_MAXFRAGS        41      /* max fragments */
  
+#define IPV6CTL_NEIGHBORGCTHRESH 46
+#define IPV6CTL_MAXIFPREFIXES  47
+#define IPV6CTL_MAXIFDEFROUTERS 48
+#define IPV6CTL_MAXDYNROUTES   49
+
  /* New entries should be added here from current IPV6CTL_MAXID value. */
  /* to define items, should talk with KAME guys first, for *BSD compatibility */
-#define IPV6CTL_MAXID          42
+#define IPV6CTL_MAXID          50
  
  #ifdef KERNEL_PRIVATE
  #define CTL_IPV6PROTO_NAMES { \
diff --git a/bsd/netinet6/in6_proto.c b/bsd/netinet6/in6_proto.c

index c27a77892298221aaed221430d10f721af805b3b..a937bbf35c28785c08b350854da535cfce25fdaa 100644 (file)
--- a/bsd/netinet6/in6_proto.c
+++ b/bsd/netinet6/in6_proto.c
@@ -378,6 +378,11 @@ int        ip6_rr_prune = 5;       /* router renumbering prefix
                                  * walk list every 5 sec.    */
  int    ip6_v6only = 0;         /* Mapped addresses on by default -  Radar 3347718 */
  
+int    ip6_neighborgcthresh = 2048;    /* Threshold # of NDP entries for GC */
+int    ip6_maxifprefixes = 16;         /* Max acceptable prefixes via RA per IF */
+int    ip6_maxifdefrouters = 16;       /* Max acceptable def routers via RA */
+int    ip6_maxdynroutes = 4096;        /* Max # of routes created via redirect */
+
  u_int32_t ip6_id = 0UL;
  int    ip6_keepfaith = 0;
  time_t ip6_log_time = (time_t)0L;
@@ -515,6 +520,14 @@ SYSCTL_STRUCT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats, CTLFLAG_RD,
         &rip6stat, rip6stat, "");
  SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RD,
          &mrt6stat, mrt6stat, "");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NEIGHBORGCTHRESH,
+       neighborgcthresh, CTLFLAG_RW,   &ip6_neighborgcthresh,  0, "");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXIFPREFIXES,
+       maxifprefixes, CTLFLAG_RW,      &ip6_maxifprefixes,     0, "");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXIFDEFROUTERS,
+       maxifdefrouters, CTLFLAG_RW,    &ip6_maxifdefrouters,   0, "");
+SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXDYNROUTES,
+       maxdynroutes, CTLFLAG_RW,       &ip6_maxdynroutes,      0, "");
  
  
  /* net.inet6.icmp6 */
diff --git a/bsd/netinet6/ip6_var.h b/bsd/netinet6/ip6_var.h

index c4a2fb28695a69e71fa9be34c2d40ce6ac21b5c1..f42089272372e47b150900b05b179a46cdc421f4 100644 (file)
--- a/bsd/netinet6/ip6_var.h
+++ b/bsd/netinet6/ip6_var.h
@@ -266,6 +266,11 @@ extern int ip6_rr_prune;           /* router renumbering prefix
  #define ip6_mapped_addr_on     (!ip6_v6only)
  extern int     ip6_v6only;
  
+extern int     ip6_neighborgcthresh;   /* Threshold # of NDP entries for GC */
+extern int     ip6_maxifprefixes;      /* Max acceptable prefixes via RA per IF */
+extern int     ip6_maxifdefrouters;    /* Max acceptable def routers via RA */
+extern int     ip6_maxdynroutes;       /* Max # of routes created via redirect */
+
  extern struct socket *ip6_mrouter;     /* multicast routing daemon */
  extern int     ip6_sendredirects;      /* send IP redirects when forwarding? */
  extern int     ip6_maxfragpackets;     /* Maximum packets in reassembly queue */
diff --git a/bsd/netinet6/nd6.c b/bsd/netinet6/nd6.c

index f0c838d588a09a52a7009331ab8531f83b23ad96..66f4cd2b9a5256b751db872965fe390d0d7d66a4 100644 (file)
--- a/bsd/netinet6/nd6.c
+++ b/bsd/netinet6/nd6.c
@@ -119,6 +119,7 @@ extern lck_mtx_t *nd6_mutex;
  
  static void nd6_slowtimo(void *ignored_arg);
  
+
  void
  nd6_init()
  {
@@ -415,11 +416,11 @@ nd6_timer(
         struct in6_ifaddr *ia6, *nia6;
         struct in6_addrlifetime *lt6;
         struct timeval timenow;
+       int count = 0;
  
         getmicrotime(&timenow);
         
  
-
         ln = llinfo_nd6.ln_next;
         while (ln && ln != &llinfo_nd6) {
                 struct rtentry *rt;
@@ -439,9 +440,34 @@ nd6_timer(
                 ndi = &nd_ifinfo[ifp->if_index];
                 dst = (struct sockaddr_in6 *)rt_key(rt);
  
+               count++;
+
                 if (ln->ln_expire > timenow.tv_sec) {
-                       ln = next;
-                       continue;
+
+                       /* Radar 6871508 Check if we have too many cache entries.
+                        * In that case purge 20% of the table to make space
+                        * for the new entries. 
+                        * This is a bit crude but keeps the deletion in timer
+                        * thread only. 
+                        */
+
+                       if ((ip6_neighborgcthresh >= 0 &&
+                               nd6_inuse >= ip6_neighborgcthresh) &&
+                               ((count % 5) == 0))  {
+
+                               if (ln->ln_state > ND6_LLINFO_INCOMPLETE) 
+                                       ln->ln_state = ND6_LLINFO_STALE;
+                               else
+                                       ln->ln_state = ND6_LLINFO_PURGE;
+                               ln->ln_expire = timenow.tv_sec;
+
+                               /* fallthrough and call nd6_free() */
+                       }
+
+                       else {
+                               ln = next;
+                               continue;
+                       }
                 }
  
                 /* sanity check */
@@ -499,6 +525,7 @@ nd6_timer(
                         break;
  
                 case ND6_LLINFO_STALE:
+               case ND6_LLINFO_PURGE:
                         /* Garbage Collection(RFC 2461 5.3) */
                         if (ln->ln_expire)
                                 next = nd6_free(rt);
diff --git a/bsd/netinet6/nd6.h b/bsd/netinet6/nd6.h

index b85be0157ceaca7c878066beb883ef81daeee043..4b66e6be61827bd0c6b508a7aa03eb8a24c183ba 100644 (file)
--- a/bsd/netinet6/nd6.h
+++ b/bsd/netinet6/nd6.h
@@ -55,6 +55,7 @@ struct        llinfo_nd6 {
  };
  #endif /* KERNEL_PRIVATE */
  
+#define ND6_LLINFO_PURGE       -3
  #define ND6_LLINFO_NOSTATE     -2
  /*
   * We don't need the WAITDELETE state any more, but we keep the definition
@@ -86,6 +87,9 @@ struct nd_ifinfo {
         u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */
         u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
         u_int8_t randomid[8];   /* current random ID */
+       /* keep track of routers and prefixes on this link */
+       int32_t nprefixes;
+       int32_t ndefrouters;
  };
  
  #define ND6_IFF_PERFORMNUD     0x1
diff --git a/bsd/netinet6/nd6_rtr.c b/bsd/netinet6/nd6_rtr.c

index 8ca259b74219867ea2bdc687cf37b8a5d844cec1..5af29d31e972b9b3ca6cdee11485f1ead7103657 100644 (file)
--- a/bsd/netinet6/nd6_rtr.c
+++ b/bsd/netinet6/nd6_rtr.c
@@ -631,6 +631,7 @@ defrtrlist_del(
         struct nd_defrouter *dr, int nd6locked)
  {
         struct nd_defrouter *deldr = NULL;
+       struct nd_ifinfo *ndi = &nd_ifinfo[dr->ifp->if_index];
         struct nd_prefix *pr;
  
         /*
@@ -667,6 +668,12 @@ defrtrlist_del(
         if (deldr)
                 defrouter_select();
  
+       ndi->ndefrouters--;
+       if (ndi->ndefrouters < 0) {
+               log(LOG_WARNING, "defrtrlist_del: negative count on %s\n",
+                   if_name(dr->ifp));
+       }
+
         if (nd6locked == 0)
                 lck_mtx_unlock(nd6_mutex);
  
@@ -760,6 +767,7 @@ defrtrlist_update(
         struct nd_defrouter *new)
  {
         struct nd_defrouter *dr, *n;
+       struct nd_ifinfo *ndi = &nd_ifinfo[new->ifp->if_index];
  
         lck_mtx_lock(nd6_mutex);
         if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
@@ -783,6 +791,12 @@ defrtrlist_update(
                 return(NULL);
         }
  
+       if (ip6_maxifdefrouters >= 0 &&
+           ndi->ndefrouters >= ip6_maxifdefrouters) {
+               lck_mtx_unlock(nd6_mutex);
+               return (NULL);
+       }
+
         n = (struct nd_defrouter *)_MALLOC(sizeof(*n), M_IP6NDP, M_NOWAIT);
         if (n == NULL) {
                 lck_mtx_unlock(nd6_mutex);
@@ -799,6 +813,8 @@ defrtrlist_update(
         TAILQ_INSERT_TAIL(&nd_defrouter, n, dr_entry);
         if (TAILQ_FIRST(&nd_defrouter) == n)
                 defrouter_select();
+       
+       ndi->ndefrouters++;
                 
         lck_mtx_unlock(nd6_mutex);
         return(n);
@@ -905,6 +921,40 @@ ndpr_rele(struct nd_prefix *pr, boolean_t locked)
                 lck_mtx_unlock(nd6_mutex);
  }
  
+static void
+purge_detached(struct ifnet *ifp)
+{
+       struct nd_prefix *pr, *pr_next;
+       struct in6_ifaddr *ia;
+       struct ifaddr *ifa, *ifa_next;
+       
+       lck_mtx_lock(nd6_mutex);
+
+       for (pr = nd_prefix.lh_first; pr; pr = pr_next) {
+               pr_next = pr->ndpr_next;
+               if (pr->ndpr_ifp != ifp ||
+                   IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
+                   ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
+                   !LIST_EMPTY(&pr->ndpr_advrtrs)))
+                       continue;
+
+               for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa_next) {
+                       ifa_next = ifa->ifa_list.tqe_next;
+                       if (ifa->ifa_addr->sa_family != AF_INET6)
+                               continue;
+                       ia = (struct in6_ifaddr *)ifa;
+                       if ((ia->ia6_flags & IN6_IFF_AUTOCONF) ==
+                           IN6_IFF_AUTOCONF && ia->ia6_ndpr == pr) {
+                               in6_purgeaddr(ifa, 1);
+                       }
+               }
+               if (pr->ndpr_refcnt == 0)
+                       prelist_remove(pr, 1);
+       }
+
+       lck_mtx_unlock(nd6_mutex);
+}
+
  int
  nd6_prelist_add(
         struct nd_prefix *pr,
@@ -913,6 +963,14 @@ nd6_prelist_add(
  {
         struct nd_prefix *new = NULL;
         int i;
+       struct nd_ifinfo *ndi = &nd_ifinfo[pr->ndpr_ifp->if_index];
+
+       if (ip6_maxifprefixes >= 0) {
+               if (ndi->nprefixes >= ip6_maxifprefixes / 2)
+                       purge_detached(pr->ndpr_ifp);
+               if (ndi->nprefixes >= ip6_maxifprefixes)
+                       return(ENOMEM);
+       }
  
         new = (struct nd_prefix *)_MALLOC(sizeof(*new), M_IP6NDP, M_NOWAIT);
         if (new == NULL)
@@ -953,6 +1011,9 @@ nd6_prelist_add(
         if (dr) {
                 pfxrtr_add(new, dr);
         }
+
+       ndi->nprefixes++;
+
         lck_mtx_unlock(nd6_mutex);
  
         return 0;
@@ -964,6 +1025,7 @@ prelist_remove(
  {
         struct nd_pfxrouter *pfr, *next;
         int e;
+       struct nd_ifinfo *ndi = &nd_ifinfo[pr->ndpr_ifp->if_index];
  
         /* make sure to invalidate the prefix until it is really freed. */
         pr->ndpr_vltime = 0;
@@ -1001,6 +1063,12 @@ prelist_remove(
                 FREE(pfr, M_IP6NDP);
         }
  
+       ndi->nprefixes--;
+       if (ndi->nprefixes < 0) {
+               log(LOG_WARNING, "prelist_remove: negative count on %s\n",
+                   if_name(pr->ndpr_ifp));
+       }
+
         FREE(pr, M_IP6NDP);
  
         pfxlist_onlink_check(1);
diff --git a/bsd/netkey/key.c b/bsd/netkey/key.c

index 0e1d6dc69f58d3fa4ac8c54b952a1e3456f8aaa0..75e405b3cad6006806117090f99efba818cbf1e5 100644 (file)
--- a/bsd/netkey/key.c
+++ b/bsd/netkey/key.c
@@ -781,6 +781,8 @@ key_checkrequest(isr, saidx, sav)
   * OUT:        NULL:   not found.
   *     others: found and return the pointer.
   */
+u_int32_t sah_search_calls = 0;
+u_int32_t sah_search_count = 0;
  struct secasvar *
  key_allocsa_policy(saidx)
         struct secasindex *saidx;
@@ -794,7 +796,9 @@ key_allocsa_policy(saidx)
         u_int16_t       dstport;
         
         lck_mtx_lock(sadb_mutex);
+       sah_search_calls++;
         LIST_FOREACH(sah, &sahtree, chain) {
+               sah_search_count++;
                 if (sah->state == SADB_SASTATE_DEAD)
                         continue;
                 if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE | CMP_REQID))
@@ -4630,7 +4634,9 @@ key_bbcmp(p1, p2, bits)
   * and do to remove or to expire.
   * XXX: year 2038 problem may remain.
   */
-
+int key_timehandler_debug = 0;
+u_int32_t spd_count = 0, sah_count = 0, dead_sah_count = 0, empty_sah_count = 0, larval_sav_count = 0, mature_sav_count = 0, dying_sav_count = 0, dead_sav_count = 0;
+u_int64_t total_sav_count = 0;
  void
  key_timehandler(void)
  {
@@ -4671,6 +4677,7 @@ key_timehandler(void)
                              sp != NULL;
                              sp = nextsp) {
  
+                               spd_count++;
                                 nextsp = LIST_NEXT(sp, chain);
  
                                 if (sp->state == IPSEC_SPSTATE_DEAD) {
@@ -4706,11 +4713,22 @@ key_timehandler(void)
                          sah != NULL;
                          sah = nextsah) {
         
+                       sah_count++;
                         nextsah = LIST_NEXT(sah, chain);
         
                         /* if sah has been dead, then delete it and process next sah. */
                         if (sah->state == SADB_SASTATE_DEAD) {
                                 key_delsah(sah);
+                               dead_sah_count++;
+                               continue;
+                       }
+
+                       if (LIST_FIRST(&sah->savtree[SADB_SASTATE_LARVAL]) == NULL &&
+                           LIST_FIRST(&sah->savtree[SADB_SASTATE_MATURE]) == NULL && 
+                           LIST_FIRST(&sah->savtree[SADB_SASTATE_DYING]) == NULL && 
+                           LIST_FIRST(&sah->savtree[SADB_SASTATE_DEAD]) == NULL) {
+                               key_delsah(sah);
+                               empty_sah_count++;
                                 continue;
                         }
         
@@ -4719,6 +4737,8 @@ key_timehandler(void)
                                  sav != NULL;
                                  sav = nextsav) {
         
+                               larval_sav_count++;
+                               total_sav_count++;
                                 nextsav = LIST_NEXT(sav, chain);
         
                                 if (tv.tv_sec - sav->created > key_larval_lifetime) {
@@ -4755,6 +4775,8 @@ key_timehandler(void)
                                  sav != NULL;
                                  sav = nextsav) {
         
+                               mature_sav_count++;
+                               total_sav_count++;
                                 nextsav = LIST_NEXT(sav, chain);
         
                                 /* we don't need to check. */
@@ -4816,6 +4838,8 @@ key_timehandler(void)
                                  sav != NULL;
                                  sav = nextsav) {
         
+                               dying_sav_count++;
+                               total_sav_count++;
                                 nextsav = LIST_NEXT(sav, chain);
         
                                 /* we don't need to check. */
@@ -4869,6 +4893,8 @@ key_timehandler(void)
                                  sav != NULL;
                                  sav = nextsav) {
         
+                               dead_sav_count++;
+                               total_sav_count++;
                                 nextsav = LIST_NEXT(sav, chain);
         
                                 /* sanity check */
@@ -4890,6 +4916,32 @@ key_timehandler(void)
                 }
     }
  
+         if (++key_timehandler_debug >= 300) {
+                 if (key_debug_level) {
+                          printf("%s: total stats for %u calls\n", __FUNCTION__, key_timehandler_debug);
+                          printf("%s: walked %u SPDs\n", __FUNCTION__, spd_count);
+                          printf("%s: walked %llu SAs: LARVAL SAs %u, MATURE SAs %u, DYING SAs %u, DEAD SAs %u\n", __FUNCTION__,
+                                 total_sav_count, larval_sav_count, mature_sav_count, dying_sav_count, dead_sav_count);
+                          printf("%s: walked %u SAHs: DEAD SAHs %u, EMPTY SAHs %u\n", __FUNCTION__,
+                                 sah_count, dead_sah_count, empty_sah_count);
+                          if (sah_search_calls) {
+                                  printf("%s: SAH search cost %d iters per call\n", __FUNCTION__,
+                                         (sah_search_count/sah_search_calls));
+                          }
+                 }
+                 spd_count = 0;
+                 sah_count = 0;
+                 dead_sah_count = 0;
+                 empty_sah_count = 0;
+                 larval_sav_count = 0;
+                 mature_sav_count = 0;
+                 dying_sav_count = 0;
+                 dead_sav_count = 0;
+                 total_sav_count = 0;
+                 sah_search_count = 0;
+                 sah_search_calls = 0;
+                 key_timehandler_debug = 0;
+        }
  #ifndef IPSEC_NONBLOCK_ACQUIRE
         /* ACQ tree */
      {
diff --git a/bsd/sys/systm.h b/bsd/sys/systm.h

index 368f4f3e63eae7400aef1e0bed73f04ae083355c..d482714ef2eaba78b31254c1e214a40061b8e160 100644 (file)
--- a/bsd/sys/systm.h
+++ b/bsd/sys/systm.h
@@ -230,6 +230,8 @@ void        get_procrustime(struct time_value *tv);
  void   load_init_program(struct proc *p);
  void __pthread_testcancel(int presyscall);
  void syscall_exit_funnelcheck(void);
+void throttle_info_get_last_io_time(mount_t mp, struct timeval *tv);
+void update_last_io_time(mount_t mp);
  #endif /* BSD_KERNEL_PRIVATE */
  
  
diff --git a/bsd/vfs/vfs_bio.c b/bsd/vfs/vfs_bio.c

index 3871c2a3b5afeb992c24b54eed4c89811293a937..026109d2133e32b5c9bce7f10de286194fdf1b05 100644 (file)
--- a/bsd/vfs/vfs_bio.c
+++ b/bsd/vfs/vfs_bio.c
@@ -3154,6 +3154,10 @@ buf_biodone(buf_t bp)
                 fslog_io_error(bp);
         }
  
+       if (bp->b_vp && bp->b_vp->v_mount && (bp->b_flags & B_READ) == 0) {
+               update_last_io_time(bp->b_vp->v_mount);
+       }
+
          if (kdebug_enable) {
                 int    code = DKIO_DONE;
  
diff --git a/bsd/vfs/vfs_subr.c b/bsd/vfs/vfs_subr.c

index bfee0d8b4c4eb21d2dcf877a196ec88a5d9b9817..643d79c07054eca4cc8764ab8a01e366ea79bb3c 100644 (file)
--- a/bsd/vfs/vfs_subr.c
+++ b/bsd/vfs/vfs_subr.c
@@ -4986,15 +4986,21 @@ out:
   * - Neither the node nor the directory are immutable.
   * - The user is not the superuser.
   *
- * Deletion is not permitted if the directory is sticky and the caller is not owner of the
- * node or directory.
+ * Deletion is not permitted if the directory is sticky and the caller is
+ * not owner of the node or directory.
   *
- * If either the node grants DELETE, or the directory grants DELETE_CHILD, the node may be
- * deleted.  If neither denies the permission, and the caller has Posix write access to the
- * directory, then the node may be deleted.
+ * If either the node grants DELETE, or the directory grants DELETE_CHILD,
+ * the node may be deleted.  If neither denies the permission, and the
+ * caller has Posix write access to the directory, then the node may be
+ * deleted.
+ *
+ * As an optimization, we cache whether or not delete child is permitted
+ * on directories without the sticky bit set.
   */
-static int
-vnode_authorize_delete(vauth_ctx vcp)
+int
+vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child);
+/*static*/ int
+vnode_authorize_delete(vauth_ctx vcp, boolean_t cached_delete_child)
  {
         struct vnode_attr       *vap = vcp->vap;
         struct vnode_attr       *dvap = vcp->dvap;
@@ -5004,7 +5010,7 @@ vnode_authorize_delete(vauth_ctx vcp)
  
         /* check the ACL on the directory */
         delete_child_denied = 0;
-       if (VATTR_IS_NOT(dvap, va_acl, NULL)) {
+       if (!cached_delete_child && VATTR_IS_NOT(dvap, va_acl, NULL)) {
                 eval.ae_requested = KAUTH_VNODE_DELETE_CHILD;
                 eval.ae_acl = &dvap->va_acl->acl_ace[0];
                 eval.ae_count = dvap->va_acl->acl_entrycount;
@@ -5070,15 +5076,20 @@ vnode_authorize_delete(vauth_ctx vcp)
                 return(EACCES);
         }
  
-       /* enforce sticky bit behaviour */
-       if ((dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) {
+       /*
+        * enforce sticky bit behaviour; the cached_delete_child property will
+        * be false and the dvap contents valis for sticky bit directories;
+        * this makes us check the directory each time, but it's unavoidable,
+        * as sticky bit is an exception to caching.
+        */
+       if (!cached_delete_child && (dvap->va_mode & S_ISTXT) && !vauth_file_owner(vcp) && !vauth_dir_owner(vcp)) {
                 KAUTH_DEBUG("%p    DENIED - sticky bit rules (user %d  file %d  dir %d)",
                     vcp->vp, cred->cr_uid, vap->va_uid, dvap->va_uid);
                 return(EACCES);
         }
  
         /* check the directory */
-       if ((error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) {
+       if (!cached_delete_child && (error = vnode_authorize_posix(vcp, VWRITE, 1 /* on_dir */)) != 0) {
                 KAUTH_DEBUG("%p    ALLOWED - granted by posix permisssions", vcp->vp);
                 return(error);
         }
@@ -5476,7 +5487,7 @@ vnode_authorize_callback_int(__unused kauth_cred_t unused_cred, __unused void *i
         int                     result;
         int                     *errorp;
         int                     noimmutable;
-       boolean_t               parent_authorized_for_delete = FALSE;
+       boolean_t               parent_authorized_for_delete_child = FALSE;
         boolean_t               found_deny = FALSE;
         boolean_t               parent_ref= FALSE;
  
@@ -5541,8 +5552,8 @@ vnode_authorize_callback_int(__unused kauth_cred_t unused_cred, __unused void *i
                  * can skip a whole bunch of work... we will still have to
                  * authorize that this specific child can be removed
                  */
-               if (vnode_cache_is_authorized(dvp, ctx, KAUTH_VNODE_DELETE) == TRUE)
-                       parent_authorized_for_delete = TRUE;
+               if (vnode_cache_is_authorized(dvp, ctx, KAUTH_VNODE_DELETE_CHILD) == TRUE)
+                       parent_authorized_for_delete_child = TRUE;
         } else {
                 dvp = NULL;
         }
@@ -5589,7 +5600,7 @@ vnode_authorize_callback_int(__unused kauth_cred_t unused_cred, __unused void *i
                 KAUTH_DEBUG("%p    ERROR - failed to get vnode attributes - %d", vp, result);
                 goto out;
         }
-       if (dvp && parent_authorized_for_delete == FALSE) {
+       if (dvp && parent_authorized_for_delete_child == FALSE) {
                 VATTR_WANTED(&dva, va_mode);
                 VATTR_WANTED(&dva, va_uid);
                 VATTR_WANTED(&dva, va_gid);
@@ -5645,7 +5656,7 @@ vnode_authorize_callback_int(__unused kauth_cred_t unused_cred, __unused void *i
         if ((result = vnode_authorize_checkimmutable(vp, &va, rights, noimmutable)) != 0)
                 goto out;
         if ((rights & KAUTH_VNODE_DELETE) &&
-           parent_authorized_for_delete == FALSE &&
+           parent_authorized_for_delete_child == FALSE &&
             ((result = vnode_authorize_checkimmutable(dvp, &dva, KAUTH_VNODE_DELETE_CHILD, 0)) != 0))
                 goto out;
  
@@ -5658,13 +5669,14 @@ vnode_authorize_callback_int(__unused kauth_cred_t unused_cred, __unused void *i
                 goto out;
  
         /*
-        * If we're not the superuser, authorize based on file properties.
+        * If we're not the superuser, authorize based on file properties;
+        * note that even if parent_authorized_for_delete_child is TRUE, we
+        * need to check on the node itself.
          */
         if (!vfs_context_issuser(ctx)) {
                 /* process delete rights */
                 if ((rights & KAUTH_VNODE_DELETE) &&
-                   parent_authorized_for_delete == FALSE &&
-                   ((result = vnode_authorize_delete(vcp)) != 0))
+                   ((result = vnode_authorize_delete(vcp, parent_authorized_for_delete_child)) != 0))
                     goto out;
  
                 /* process remaining rights */
@@ -5715,12 +5727,20 @@ out:
                         vnode_cache_authorized_action(vp, ctx, KAUTH_VNODE_SEARCHBYANYONE);
                 }
         }
-       if ((rights & KAUTH_VNODE_DELETE) && parent_authorized_for_delete == FALSE) {
+       if ((rights & KAUTH_VNODE_DELETE) && parent_authorized_for_delete_child == FALSE) {
                 /*
-                * parent was successfully and newly authorized for deletions
-                * add it to the cache
+                * parent was successfully and newly authorized for content deletions
+                * add it to the cache, but only if it doesn't have the sticky
+                * bit set on it.  This same  check is done earlier guarding
+                * fetching of dva, and if we jumped to out without having done
+                * this, we will have returned already because of a non-zero
+                * 'result' value.
                  */
-               vnode_cache_authorized_action(dvp, ctx, KAUTH_VNODE_DELETE);
+               if (VATTR_IS_SUPPORTED(&dva, va_mode) &&
+                   !(dva.va_mode & (S_ISVTX))) {
+                       /* OK to cache delete rights */
+                       vnode_cache_authorized_action(dvp, ctx, KAUTH_VNODE_DELETE_CHILD);
+               }
         }
         if (parent_ref)
                 vnode_put(vp);
diff --git a/bsd/vfs/vfs_syscalls.c b/bsd/vfs/vfs_syscalls.c

index 869f3f5b3d1ccb5da7ed2a4766d5918e5df9bd46..ee31d2c82b0d48faebcc2d5b7894cfebc098f199 100644 (file)
--- a/bsd/vfs/vfs_syscalls.c
+++ b/bsd/vfs/vfs_syscalls.c
@@ -5120,10 +5120,11 @@ auth_exit:
          /*
           * We may encounter a race in the VNOP where the destination didn't 
           * exist when we did the namei, but it does by the time we go and 
-         * try to create the entry. In this case, we should re-drive this rename
-         * call from the top again.
-         */
-        if (error == EEXIST) {
+                * try to create the entry. In this case, we should re-drive this rename
+                * call from the top again.  Currently, only HFS bubbles out ERECYCLE,
+                * but other filesystem susceptible to this race could return it, too.
+                */
+        if (error == ERECYCLE) {
              do_retry = 1;
          }
  
diff --git a/config/MasterVersion b/config/MasterVersion

index 58454343efb5b2fedc8ff9886fc5b0d7b20433dd..6ce3d2f9dfbfaa279a142a5af0390224a41ec1c1 100644 (file)
--- a/config/MasterVersion
+++ b/config/MasterVersion
@@ -1,4 +1,4 @@
-9.7.0
+9.8.0
  
  # The first line of this file contains the master version number for the kernel.
  # All other instances of the kernel version in xnu are derived from this file.
diff --git a/iokit/bsddev/IOKitBSDInit.cpp b/iokit/bsddev/IOKitBSDInit.cpp

index 6346d0c2d08bcf48834f889b6471c8f3b711dd33..3ec11b1f126a8c027b47e0fc8a897afe776f735e 100644 (file)
--- a/iokit/bsddev/IOKitBSDInit.cpp
+++ b/iokit/bsddev/IOKitBSDInit.cpp
@@ -857,4 +857,41 @@ kern_return_t IOBSDGetPlatformUUID( uuid_t uuid, mach_timespec_t timeout )
      return KERN_SUCCESS;
  }
  
+
+int IOBSDIsMediaEjectable( const char *cdev_name )
+{
+    int ret = 0;
+    OSDictionary *dictionary;
+    OSString *dev_name;
+
+    if (strncmp(cdev_name, "/dev/", 5) == 0) {
+           cdev_name += 5;
+    }
+
+    dictionary = IOService::serviceMatching( "IOMedia" );
+    if( dictionary ) {
+       dev_name = OSString::withCString( cdev_name );
+       if( dev_name ) {
+           IOService *service;
+           mach_timespec_t tv = { 5, 0 };    // wait up to "timeout" seconds for the device
+
+           dictionary->setObject( kIOBSDNameKey, dev_name );
+           dictionary->retain();
+           service = IOService::waitForService( dictionary, &tv );
+           if( service ) {
+               OSBoolean *ejectable = (OSBoolean *) service->getProperty( "Ejectable" );
+
+               if( ejectable ) {
+                       ret = (int)ejectable->getValue();
+               }
+
+           }
+           dev_name->release();
+       }
+       dictionary->release();
+    }
+
+    return ret;
+}
+
  } /* extern "C" */
diff --git a/osfmk/i386/acpi.c b/osfmk/i386/acpi.c

index cd5bdbb714074cfd3a8a638dc12d33a2f9ac9cc4..17143604cc1e506b68a467fe00a2cb4782821fd3 100644 (file)
--- a/osfmk/i386/acpi.c
+++ b/osfmk/i386/acpi.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -136,10 +136,24 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
         acpi_hibernate_callback_data_t data;
         boolean_t did_hibernate;
  #endif
+       unsigned int    cpu;
+       kern_return_t   rc;
+       unsigned int    my_cpu;
  
         kprintf("acpi_sleep_kernel hib=%d\n",
                         current_cpu_datap()->cpu_hibernate);
  
+       /* Geta ll CPUs to be in the "off" state */
+       my_cpu = cpu_number();
+       for (cpu = 0; cpu < real_ncpus; cpu += 1) {
+               if (cpu == my_cpu)
+                       continue;
+               rc = pmCPUExitHaltToOff(cpu);
+               if (rc != KERN_SUCCESS)
+                   panic("Error %d trying to transition CPU %d to OFF",
+                         rc, cpu);
+       }
+
         /* shutdown local APIC before passing control to BIOS */
         lapic_shutdown();
  
diff --git a/osfmk/i386/cpu.c b/osfmk/i386/cpu.c

index 1760eabf5962ea3d1787c7485d12491b50662a56..194a6576bc3a0589e5818285b85f6dd575910977 100644 (file)
--- a/osfmk/i386/cpu.c
+++ b/osfmk/i386/cpu.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -141,9 +141,13 @@ cpu_exit_wait(
  {
         cpu_data_t      *cdp = cpu_datap(cpu);
  
+       /*
+        * Wait until the CPU indicates that it has stopped.
+        */
         simple_lock(&x86_topo_lock);
         while ((cdp->lcpu.state != LCPU_HALT)
-              && (cdp->lcpu.state != LCPU_OFF)) {
+              && (cdp->lcpu.state != LCPU_OFF)
+              && !cdp->lcpu.stopped) {
             simple_unlock(&x86_topo_lock);
             cpu_pause();
             simple_lock(&x86_topo_lock);
diff --git a/osfmk/i386/pmCPU.c b/osfmk/i386/pmCPU.c

index 88aa0f87b1189d9be69ac33ba1a13b077d4e2ef0..8decbb9437375f69632d9fca8b7a38341c1cfec3 100644 (file)
--- a/osfmk/i386/pmCPU.c
+++ b/osfmk/i386/pmCPU.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2009 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -354,6 +354,19 @@ pmCPUExitHalt(int cpu)
      return(rc);
  }
  
+kern_return_t
+pmCPUExitHaltToOff(int cpu)
+{
+    kern_return_t      rc      = KERN_INVALID_ARGUMENT;
+
+    if (pmInitDone
+       && pmDispatch != NULL
+       && pmDispatch->exitHaltToOff != NULL)
+       rc = pmDispatch->exitHaltToOff(cpu_to_lcpu(cpu));
+
+    return(rc);
+}
+
  /*
   * Called to initialize the power management structures for the CPUs.
   */
diff --git a/osfmk/i386/pmCPU.h b/osfmk/i386/pmCPU.h

index 6026f5ed6ff5878c9c2b837b6431a54cef74ddce..cbfaebe658d377c9a8de23f690472bb300c385cb 100644 (file)
--- a/osfmk/i386/pmCPU.h
+++ b/osfmk/i386/pmCPU.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2006-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2009 Apple Inc. All rights reserved.
   *
   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   * 
@@ -37,7 +37,7 @@
   * This value should be changed each time that pmDsipatch_t or pmCallBacks_t
   * changes.
   */
-#define PM_DISPATCH_VERSION    15
+#define PM_DISPATCH_VERSION    16
  
  /*
   * Dispatch table for functions that get installed when the power
@@ -68,6 +68,7 @@ typedef struct
      void               (*pmTimerStateSave)(void);
      void               (*pmTimerStateRestore)(void);
      kern_return_t      (*exitHalt)(x86_lcpu_t *lcpu);
+    kern_return_t      (*exitHaltToOff)(x86_lcpu_t *lcpu);
      void               (*markAllCPUsOff)(void);
      void               (*pmSetRunCount)(uint32_t count);
      boolean_t          (*pmIsCPUUnAvailable)(x86_lcpu_t *lcpu);
@@ -112,6 +113,7 @@ void pmCPUHalt(uint32_t reason);
  void pmTimerSave(void);
  void pmTimerRestore(void);
  kern_return_t pmCPUExitHalt(int cpu);
+kern_return_t pmCPUExitHaltToOff(int cpu);
  
  #define PM_HALT_NORMAL         0               /* normal halt path */
  #define PM_HALT_DEBUG          1               /* debug code wants to halt */
author	Apple <opensource@apple.com>
	Fri, 7 Aug 2009 20:02:49 +0000 (20:02 +0000)
committer	Apple <opensource@apple.com>
	Fri, 7 Aug 2009 20:02:49 +0000 (20:02 +0000)
bsd/hfs/hfs.h		patch \| blob \| blame \| history
bsd/hfs/hfs_readwrite.c		patch \| blob \| blame \| history
bsd/hfs/hfs_vfsops.c		patch \| blob \| blame \| history
bsd/hfs/hfs_vfsutils.c		patch \| blob \| blame \| history
bsd/hfs/hfs_vnops.c		patch \| blob \| blame \| history
bsd/kern/kern_descrip.c		patch \| blob \| blame \| history
bsd/kern/uipc_usrreq.c		patch \| blob \| blame \| history
bsd/miscfs/specfs/spec_vnops.c		patch \| blob \| blame \| history
bsd/netat/atp_read.c		patch \| blob \| blame \| history
bsd/netat/sys_glue.c		patch \| blob \| blame \| history
bsd/netinet6/icmp6.c		patch \| blob \| blame \| history
bsd/netinet6/in6.h		patch \| blob \| blame \| history
bsd/netinet6/in6_proto.c		patch \| blob \| blame \| history
bsd/netinet6/ip6_var.h		patch \| blob \| blame \| history
bsd/netinet6/nd6.c		patch \| blob \| blame \| history
bsd/netinet6/nd6.h		patch \| blob \| blame \| history
bsd/netinet6/nd6_rtr.c		patch \| blob \| blame \| history
bsd/netkey/key.c		patch \| blob \| blame \| history
bsd/sys/systm.h		patch \| blob \| blame \| history
bsd/vfs/vfs_bio.c		patch \| blob \| blame \| history
bsd/vfs/vfs_subr.c		patch \| blob \| blame \| history
bsd/vfs/vfs_syscalls.c		patch \| blob \| blame \| history
config/MasterVersion		patch \| blob \| blame \| history
iokit/bsddev/IOKitBSDInit.cpp		patch \| blob \| blame \| history
osfmk/i386/acpi.c		patch \| blob \| blame \| history
osfmk/i386/cpu.c		patch \| blob \| blame \| history
osfmk/i386/pmCPU.c		patch \| blob \| blame \| history
osfmk/i386/pmCPU.h		patch \| blob \| blame \| history