xnu-1699.22.73.tar.gz

[apple/xnu.git] / bsd / kern / kern_lockf.c
diff --git a/bsd/kern/kern_lockf.c b/bsd/kern/kern_lockf.c

index 4e61180b6a492e0980c21a1d6c9fd72d98c58c82..b7775864c66048bd0ccc056bb2ff7412f4a27c51 100644 (file)
--- a/bsd/kern/kern_lockf.c
+++ b/bsd/kern/kern_lockf.c
@@ -89,7 +89,7 @@ static int maxlockdepth = MAXDEPTH;
  void lf_print(const char *tag, struct lockf *lock);
  void lf_printlist(const char *tag, struct lockf *lock);
  static int     lockf_debug = 2;
-SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
+SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &lockf_debug, 0, "");
  
  /*
   * If there is no mask bit selector, or there is on, and the selector is
@@ -129,10 +129,12 @@ static overlap_t lf_findoverlap(struct lockf *,
             struct lockf *, int, struct lockf ***, struct lockf **);
  static struct lockf *lf_getblock(struct lockf *);
  static int      lf_getlock(struct lockf *, struct flock *);
+#if CONFIG_EMBEDDED
+static int      lf_getlockpid(struct vnode *, struct flock *);
+#endif
  static int      lf_setlock(struct lockf *);
  static int      lf_split(struct lockf *, struct lockf *);
-static void     lf_wakelock(struct lockf *);
-
+static void     lf_wakelock(struct lockf *, boolean_t);
  
  /*
   * lf_advlock
@@ -172,6 +174,11 @@ lf_advlock(struct vnop_advlock_args *ap)
  
         /* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */
  
+#if CONFIG_EMBEDDED
+       if (ap->a_op == F_GETLKPID)
+               return lf_getlockpid(vp, fl);
+#endif
+
         /*
          * Avoid the common case of unlocking when inode has no locks.
          */
@@ -262,6 +269,9 @@ lf_advlock(struct vnop_advlock_args *ap)
         TAILQ_INIT(&lock->lf_blkhd);
         lock->lf_flags = ap->a_flags;
  
+       if (ap->a_flags & F_FLOCK)
+               lock->lf_flags |= F_WAKE1_SAFE;
+
         lck_mtx_lock(&vp->v_lock);      /* protect the lockf list */
         /*
          * Do the requested operation.
@@ -286,7 +296,7 @@ lf_advlock(struct vnop_advlock_args *ap)
                 error = EINVAL;
                 break;
         }
-       lck_mtx_unlock(&vp->v_lock);    /* done maniplulating the list */
+       lck_mtx_unlock(&vp->v_lock);    /* done manipulating the list */
  
         LOCKF_DEBUG(0, "lf_advlock: normal exit: %d\n\n", error);
         return (error);
@@ -294,25 +304,42 @@ lf_advlock(struct vnop_advlock_args *ap)
  
  
  /*
- * lf_coelesce_adjacent
+ * Take any lock attempts which are currently blocked by a given lock ("from")
+ * and mark them as blocked by a different lock ("to").  Used in the case
+ * where a byte range currently occupied by "from" is to be occupied by "to."
+ */
+static void
+lf_move_blocked(struct lockf *to, struct lockf *from)
+{
+       struct lockf *tlock;
+
+       TAILQ_FOREACH(tlock, &from->lf_blkhd, lf_block) {
+               tlock->lf_next = to;
+       }
+
+       TAILQ_CONCAT(&to->lf_blkhd, &from->lf_blkhd, lf_block);
+}
+
+/*
+ * lf_coalesce_adjacent
   *
- * Description:        Helper function: when setting a lock, coelesce adjacent
+ * Description:        Helper function: when setting a lock, coalesce adjacent
   *             locks.  Needed because adjacent locks are not overlapping,
- *             but POSIX requires that they be coelesced.
+ *             but POSIX requires that they be coalesced.
   *
   * Parameters: lock                    The new lock which may be adjacent
- *                                     to already locked reagions, and which
- *                                     should therefore be coelesced with them
+ *                                     to already locked regions, and which
+ *                                     should therefore be coalesced with them
   *
   * Returns:    <void>
   */
  static void
-lf_coelesce_adjacent(struct lockf *lock)
+lf_coalesce_adjacent(struct lockf *lock)
  {
         struct lockf **lf = lock->lf_head;
  
         while (*lf != NOLOCKF) {
-               /* reject locks that obviously could not be coelesced */
+               /* reject locks that obviously could not be coalesced */
                 if ((*lf == lock) ||
                     ((*lf)->lf_id != lock->lf_id) ||
                     ((*lf)->lf_type != lock->lf_type)) {
@@ -320,27 +347,38 @@ lf_coelesce_adjacent(struct lockf *lock)
                         continue;
                 }
  
+               /*
+                * NOTE: Assumes that if two locks are adjacent on the number line 
+                * and belong to the same owner, then they are adjacent on the list.
+                */
+
                 /* If the lock ends adjacent to us, we can coelesce it */
                 if ((*lf)->lf_end != -1 &&
                     ((*lf)->lf_end + 1) == lock->lf_start) {
                         struct lockf *adjacent = *lf;
  
-                       LOCKF_DEBUG(0, "lf_coelesce_adjacent: coelesce adjacent previous\n");
+                       LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent previous\n");
                         lock->lf_start = (*lf)->lf_start;
                         *lf = lock;
                         lf = &(*lf)->lf_next;
+
+                       lf_move_blocked(lock, adjacent);
+
                         FREE(adjacent, M_LOCKF);
                         continue;
                 }
-               /* If the lock starts adjacent to us, we can coelesce it */
+               /* If the lock starts adjacent to us, we can coalesce it */
                 if (lock->lf_end != -1 &&
                     (lock->lf_end + 1) == (*lf)->lf_start) {
                         struct lockf *adjacent = *lf;
  
-                       LOCKF_DEBUG(0, "lf_coelesce_adjacent: coelesce adjacent following\n");
+                       LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent following\n");
                         lock->lf_end = (*lf)->lf_end;
                         lock->lf_next = (*lf)->lf_next;
                         lf = &lock->lf_next;
+
+                       lf_move_blocked(lock, adjacent);
+
                         FREE(adjacent, M_LOCKF);
                         continue;
                 }
@@ -370,7 +408,7 @@ lf_coelesce_adjacent(struct lockf *lock)
   *     msleep:EINTR
   *
   * Notes:      We add the lock to the provisional lock list.  We do not
- *             coelesce at this time; this has implications for other lock
+ *             coalesce at this time; this has implications for other lock
   *             requestors in the blocker search mechanism.
   */
  static int
@@ -502,6 +540,10 @@ lf_setlock(struct lockf *lock)
                  */
                 lock->lf_next = block;
                 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
+
+               if ( !(lock->lf_flags & F_FLOCK))
+                       block->lf_flags &= ~F_WAKE1_SAFE;
+
  #ifdef LOCKF_DEBUGGING
                 if (lockf_debug & 1) {
                         lf_print("lf_setlock: blocking on", block);
@@ -509,6 +551,12 @@ lf_setlock(struct lockf *lock)
                 }
  #endif /* LOCKF_DEBUGGING */
                 error = msleep(lock, &vp->v_lock, priority, lockstr, 0);
+
+               if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
+                       if ((block = lf_getblock(lock))) {
+                               lf_move_blocked(block, lock);
+                       }
+               }
                 if (error) {    /* XXX */
                         /*
                          * We may have been awakened by a signal and/or by a
@@ -522,6 +570,9 @@ lf_setlock(struct lockf *lock)
                                 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
                                 lock->lf_next = NOLOCKF;
                         }
+                       if (!TAILQ_EMPTY(&lock->lf_blkhd))
+                               lf_wakelock(lock, TRUE);
+                         
                         FREE(lock, M_LOCKF);
                         return (error);
                 }       /* XXX */
@@ -565,10 +616,10 @@ lf_setlock(struct lockf *lock)
                          */
                         if (lock->lf_type == F_RDLCK &&
                             overlap->lf_type == F_WRLCK)
-                               lf_wakelock(overlap);
+                               lf_wakelock(overlap, TRUE);
                         overlap->lf_type = lock->lf_type;
                         FREE(lock, M_LOCKF);
-                       lock = overlap; /* for lf_coelesce_adjacent() */
+                       lock = overlap; /* for lf_coalesce_adjacent() */
                         break;
  
                 case OVERLAP_CONTAINS_LOCK:
@@ -577,7 +628,7 @@ lf_setlock(struct lockf *lock)
                          */
                         if (overlap->lf_type == lock->lf_type) {
                                 FREE(lock, M_LOCKF);
-                               lock = overlap; /* for lf_coelesce_adjacent() */
+                               lock = overlap; /* for lf_coalesce_adjacent() */
                                 break;
                         }
                         if (overlap->lf_start == lock->lf_start) {
@@ -595,7 +646,7 @@ lf_setlock(struct lockf *lock)
                                         return (ENOLCK);
                                 }
                         }
-                       lf_wakelock(overlap);
+                       lf_wakelock(overlap, TRUE);
                         break;
  
                 case OVERLAP_CONTAINED_BY_LOCK:
@@ -605,7 +656,7 @@ lf_setlock(struct lockf *lock)
                          */
                         if (lock->lf_type == F_RDLCK &&
                             overlap->lf_type == F_WRLCK) {
-                               lf_wakelock(overlap);
+                               lf_wakelock(overlap, TRUE);
                         } else {
                                 while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
                                         ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
@@ -637,7 +688,7 @@ lf_setlock(struct lockf *lock)
                         overlap->lf_next = lock;
                         overlap->lf_end = lock->lf_start - 1;
                         prev = &lock->lf_next;
-                       lf_wakelock(overlap);
+                       lf_wakelock(overlap, TRUE);
                         needtolink = 0;
                         continue;
  
@@ -650,13 +701,13 @@ lf_setlock(struct lockf *lock)
                                 lock->lf_next = overlap;
                         }
                         overlap->lf_start = lock->lf_end + 1;
-                       lf_wakelock(overlap);
+                       lf_wakelock(overlap, TRUE);
                         break;
                 }
                 break;
         }
-       /* Coelesce adjacent locks with identical attributes */
-       lf_coelesce_adjacent(lock);
+       /* Coalesce adjacent locks with identical attributes */
+       lf_coalesce_adjacent(lock);
  #ifdef LOCKF_DEBUGGING
         if (lockf_debug & 1) {
                 lf_print("lf_setlock: got the lock", lock);
@@ -704,7 +755,7 @@ lf_clearlock(struct lockf *unlock)
                 /*
                  * Wakeup the list of locks to be retried.
                  */
-               lf_wakelock(overlap);
+               lf_wakelock(overlap, FALSE);
  
                 switch (ovcase) {
                 case OVERLAP_NONE:      /* satisfy compiler enum/switch */
@@ -804,6 +855,55 @@ lf_getlock(struct lockf *lock, struct flock *fl)
         return (0);
  }
  
+#if CONFIG_EMBEDDED
+int lf_getlockpid(struct vnode *vp, struct flock *fl)
+{
+       struct lockf *lf, *blk;
+
+       if (vp == 0)
+               return EINVAL;
+
+       fl->l_type = F_UNLCK;
+       
+       lck_mtx_lock(&vp->v_lock);
+
+       for (lf = vp->v_lockf; lf; lf = lf->lf_next) {
+
+               if (lf->lf_flags & F_POSIX) {
+                       if ((((struct proc *)lf->lf_id)->p_pid) == fl->l_pid) {
+                               fl->l_type = lf->lf_type;
+                               fl->l_whence = SEEK_SET;
+                               fl->l_start = lf->lf_start;
+                               if (lf->lf_end == -1)
+                                       fl->l_len = 0;
+                               else
+                                       fl->l_len = lf->lf_end - lf->lf_start + 1;
+
+                               break;
+                       }
+               }
+
+               TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
+                       if (blk->lf_flags & F_POSIX) {
+                               if ((((struct proc *)blk->lf_id)->p_pid) == fl->l_pid) {
+                                       fl->l_type = blk->lf_type;
+                                       fl->l_whence = SEEK_SET;
+                                       fl->l_start = blk->lf_start;
+                                       if (blk->lf_end == -1)
+                                               fl->l_len = 0;
+                                       else
+                                               fl->l_len = blk->lf_end - blk->lf_start + 1;
+
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       lck_mtx_unlock(&vp->v_lock);
+       return (0);
+}
+#endif
  
  /*
   * lf_getblock
@@ -880,7 +980,7 @@ lf_getblock(struct lockf *lock)
   *             while lf_setlock will iterate over all overlapping locks to
   *
   *             The check parameter can be SELF, meaning we are looking for
- *             overelapping locks owned by us, or it can be OTHERS, meaning
+ *             overlapping locks owned by us, or it can be OTHERS, meaning
   *             we are looking for overlapping locks owned by someone else so
   *             we can report a blocking lock on an F_GETLK request.
   *
@@ -892,6 +992,7 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
                struct lockf ***prev, struct lockf **overlap)
  {
         off_t start, end;
+       int found_self = 0;
  
         *overlap = lf;
         if (lf == NOLOCKF)
@@ -905,10 +1006,28 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
         while (lf != NOLOCKF) {
                 if (((type & SELF) && lf->lf_id != lock->lf_id) ||
                     ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
+                       /* 
+                        * Locks belonging to one process are adjacent on the
+                        * list, so if we've found any locks belonging to us,
+                        * and we're now seeing something else, then we've
+                        * examined all "self" locks.  Note that bailing out
+                        * here is quite important; for coalescing, we assume 
+                        * numerically adjacent locks from the same owner to 
+                        * be adjacent on the list.
+                        */
+                       if ((type & SELF) && found_self) {
+                               return OVERLAP_NONE;
+                       }
+
                         *prev = &lf->lf_next;
                         *overlap = lf = lf->lf_next;
                         continue;
                 }
+
+               if ((type & SELF)) {
+                       found_self = 1;
+               }
+
  #ifdef LOCKF_DEBUGGING
                 if (lockf_debug & 2)
                         lf_print("\tchecking", lf);
@@ -920,6 +1039,11 @@ lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
                     (end != -1 && lf->lf_start > end)) {
                         /* Case 0 */
                         LOCKF_DEBUG(2, "no overlap\n");
+
+                       /*
+                        * NOTE: assumes that locks for the same process are 
+                        * nonintersecting and ordered.
+                        */
                         if ((type & SELF) && end != -1 && lf->lf_start > end)
                                 return (OVERLAP_NONE);
                         *prev = &lf->lf_next;
@@ -1048,19 +1172,43 @@ lf_split(struct lockf *lock1, struct lockf *lock2)
   *             in a real-world performance problem.
   */
  static void
-lf_wakelock(struct lockf *listhead)
+lf_wakelock(struct lockf *listhead, boolean_t force_all)
  {
         struct lockf *wakelock;
+       boolean_t wake_all = TRUE;
+
+       if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE))
+               wake_all = FALSE;
  
         while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
                 wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
                 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
+
                 wakelock->lf_next = NOLOCKF;
  #ifdef LOCKF_DEBUGGING
                 if (lockf_debug & 2)
                         lf_print("lf_wakelock: awakening", wakelock);
  #endif /* LOCKF_DEBUGGING */
+               if (wake_all == FALSE) {
+                       /*
+                        * If there are items on the list head block list,
+                        * move them to the wakelock list instead, and then
+                        * correct their lf_next pointers.
+                        */
+                       if (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
+                               TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block);
+
+                               struct lockf *tlock;
+
+                               TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
+                                       tlock->lf_next = wakelock;
+                               }
+                       }
+               }
                 wakeup(wakelock);
+
+               if (wake_all == FALSE)
+                       break;
         }
  }