]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/kern/kern_lockf.c
xnu-1228.0.2.tar.gz
[apple/xnu.git] / bsd / kern / kern_lockf.c
index 1ef3470cec9423d2c532851dc283299e614a5a4b..4e61180b6a492e0980c21a1d6c9fd72d98c58c82 100644 (file)
@@ -1,3 +1,30 @@
+/*
+ * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *     The Regents of the University of California.  All rights reserved.
@@ -39,7 +66,9 @@
 #include <sys/lock.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
+#include <sys/signalvar.h>
 #include <sys/unistd.h>
+#include <sys/user.h>
 #include <sys/vnode.h>
 #include <sys/vnode_internal.h>
 #include <sys/vnode_if.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 
-#if DEAD_CODE
 /*
  * This variable controls the maximum number of processes that will
  * be checked in doing deadlock detection.
  */
 static int maxlockdepth = MAXDEPTH;
-#endif /* DEAD_CODE */
 
-#ifdef LOCKF_DEBUG
+#ifdef LOCKF_DEBUGGING
 #include <sys/sysctl.h>
-
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
-
-
+void lf_print(const char *tag, struct lockf *lock);
+void lf_printlist(const char *tag, struct lockf *lock);
 static int     lockf_debug = 2;
 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
-#endif
+
+/*
+ * If there is no mask bit selector, or there is on, and the selector is
+ * set, then output the debugging diagnostic.
+ */
+#define LOCKF_DEBUG(mask, ...)                                 \
+       do {                                                    \
+               if( !(mask) || ((mask) & lockf_debug)) {        \
+                       printf(__VA_ARGS__);                    \
+               }                                               \
+       } while(0)
+#else  /* !LOCKF_DEBUGGING */
+#define LOCKF_DEBUG(mask, ...)         /* mask */
+#endif /* !LOCKF_DEBUGGING */
 
 MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
 
@@ -72,29 +111,55 @@ MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
 #define SELF   0x1
 #define OTHERS 0x2
 #define OFF_MAX        0x7fffffffffffffffULL   /* max off_t */
+
+/*
+ * Overlapping lock states
+ */
+typedef enum {
+       OVERLAP_NONE = 0,
+       OVERLAP_EQUALS_LOCK,
+       OVERLAP_CONTAINS_LOCK,
+       OVERLAP_CONTAINED_BY_LOCK,
+       OVERLAP_STARTS_BEFORE_LOCK,
+       OVERLAP_ENDS_AFTER_LOCK
+} overlap_t;
+
 static int      lf_clearlock(struct lockf *);
-static int      lf_findoverlap(struct lockf *,
+static overlap_t lf_findoverlap(struct lockf *,
            struct lockf *, int, struct lockf ***, struct lockf **);
-static struct lockf *
-        lf_getblock(struct lockf *);
+static struct lockf *lf_getblock(struct lockf *);
 static int      lf_getlock(struct lockf *, struct flock *);
 static int      lf_setlock(struct lockf *);
-static void     lf_split(struct lockf *, struct lockf *);
+static int      lf_split(struct lockf *, struct lockf *);
 static void     lf_wakelock(struct lockf *);
 
+
 /*
- * Advisory record locking support
+ * lf_advlock
+ *
+ * Description:        Advisory record locking support
+ *
+ * Parameters: ap                      Argument pointer to a vnop_advlock_args
+ *                                     argument descriptor structure for the
+ *                                     lock operation to be attempted.
+ *
+ * Returns:    0                       Success
+ *             EOVERFLOW
+ *             EINVAL
+ *             ENOLCK                  Number of locked regions exceeds limit
+ *     lf_setlock:EAGAIN
+ *     lf_setlock:EDEADLK
+ *     lf_setlock:EINTR
+ *     lf_setlock:ENOLCK
+ *     lf_clearlock:ENOLCK
+ *     vnode_size:???
+ *
+ * Notes:      We return ENOLCK when we run out of memory to support locks; as
+ *             such, there is no specific expectation limit other than the
+ *             amount of available resources.
  */
 int
-lf_advlock(ap)
-       struct vnop_advlock_args /* {
-               struct vnode *a_vp;
-               caddr_t  a_id;
-               int  a_op;
-               struct flock *a_fl;
-               int  a_flags;
-               vfs_context_t a_context;
-       } */ *ap;
+lf_advlock(struct vnop_advlock_args *ap)
 {
        struct vnode *vp = ap->a_vp;
        struct flock *fl = ap->a_fl;
@@ -113,9 +178,7 @@ lf_advlock(ap)
        if (*head == (struct lockf *)0) {
                if (ap->a_op != F_SETLK) {
                        fl->l_type = F_UNLCK;
-#ifdef LOCKF_DEBUG
-               printf("lf_advlock: unlock without lock\n");
-#endif /* LOCKF_DEBUG */
+                       LOCKF_DEBUG(0, "lf_advlock: '%s' unlock without lock\n", vfs_context_proc(context)->p_comm);
                        return (0);
                }
        }
@@ -136,67 +199,59 @@ lf_advlock(ap)
 
        case SEEK_END:
 
-               if ((error = vnode_size(vp, &size, context)))
-{
-#ifdef LOCKF_DEBUG
-               printf("lf_advlock: vnode_getattr failed: %d\n", error);
-#endif /* LOCKF_DEBUG */
+               /*
+                * It's OK to cast the u_quad_t to and off_t here, since they
+                * are the same storage size, and the value of the returned
+                * contents will never overflow into the sign bit.  We need to
+                * do this because we will use size to force range checks.
+                */
+               if ((error = vnode_size(vp, (off_t *)&size, context))) {
+                       LOCKF_DEBUG(0, "lf_advlock: vnode_getattr failed: %d\n", error);
                        return (error);
-}
+               }
 
                if (size > OFF_MAX ||
-                   (fl->l_start > 0 && size > OFF_MAX - fl->l_start))
+                   (fl->l_start > 0 &&
+                    size > (u_quad_t)(OFF_MAX - fl->l_start)))
                        return (EOVERFLOW);
                start = size + fl->l_start;
                break;
 
        default:
-#ifdef LOCKF_DEBUG
-               printf("lf_advlock: unknown whence %d\n", fl->l_whence);
-#endif /* LOCKF_DEBUG */
+               LOCKF_DEBUG(0, "lf_advlock: unknown whence %d\n", fl->l_whence);
                return (EINVAL);
        }
-       if (start < 0)
-{
-#ifdef LOCKF_DEBUG
-               printf("lf_advlock: start < 0 (%qd)\n", start);
-#endif /* LOCKF_DEBUG */
+       if (start < 0) {
+               LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
                return (EINVAL);
-}
+       }
        if (fl->l_len < 0) {
-               if (start == 0)
-{
-#ifdef LOCKF_DEBUG
-               printf("lf_advlock: len < 0 & start == 0\n");
-#endif /* LOCKF_DEBUG */
+               if (start == 0) {
+                       LOCKF_DEBUG(0, "lf_advlock: len < 0 & start == 0\n");
                        return (EINVAL);
-}
+               }
                end = start - 1;
                start += fl->l_len;
-               if (start < 0)
-{
-#ifdef LOCKF_DEBUG
-               printf("lf_advlock: start < 0 (%qd)\n", start);
-#endif /* LOCKF_DEBUG */
+               if (start < 0) {
+                       LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
                        return (EINVAL);
-}
+               }
        } else if (fl->l_len == 0)
                end = -1;
        else {
                oadd = fl->l_len - 1;
-               if (oadd > (off_t)(OFF_MAX - start))
-{
-#ifdef LOCKF_DEBUG
-               printf("lf_advlock: overflow\n");
-#endif /* LOCKF_DEBUG */
+               if (oadd > (off_t)(OFF_MAX - start)) {
+                       LOCKF_DEBUG(0, "lf_advlock: overflow\n");
                        return (EOVERFLOW);
-}
+               }
                end = start + oadd;
        }
        /*
         * Create the lockf structure
         */
        MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+       if (lock == NULL)
+               return (ENOLCK);
        lock->lf_start = start;
        lock->lf_end = end;
        lock->lf_id = ap->a_id;
@@ -233,30 +288,108 @@ lf_advlock(ap)
        }
        lck_mtx_unlock(&vp->v_lock);    /* done maniplulating the list */
 
-#ifdef LOCKF_DEBUG
-       printf("lf_advlock: normal exit: %d\n", error);
-#endif /* LOCKF_DEBUG */
+       LOCKF_DEBUG(0, "lf_advlock: normal exit: %d\n\n", error);
        return (error);
 }
 
+
 /*
- * Set a byte-range lock.
+ * lf_coelesce_adjacent
+ *
+ * Description:        Helper function: when setting a lock, coelesce adjacent
+ *             locks.  Needed because adjacent locks are not overlapping,
+ *             but POSIX requires that they be coelesced.
+ *
+ * Parameters: lock                    The new lock which may be adjacent
+ *                                     to already locked reagions, and which
+ *                                     should therefore be coelesced with them
+ *
+ * Returns:    <void>
+ */
+static void
+lf_coelesce_adjacent(struct lockf *lock)
+{
+       struct lockf **lf = lock->lf_head;
+
+       while (*lf != NOLOCKF) {
+               /* reject locks that obviously could not be coelesced */
+               if ((*lf == lock) ||
+                   ((*lf)->lf_id != lock->lf_id) ||
+                   ((*lf)->lf_type != lock->lf_type)) {
+                       lf = &(*lf)->lf_next;
+                       continue;
+               }
+
+               /* If the lock ends adjacent to us, we can coelesce it */
+               if ((*lf)->lf_end != -1 &&
+                   ((*lf)->lf_end + 1) == lock->lf_start) {
+                       struct lockf *adjacent = *lf;
+
+                       LOCKF_DEBUG(0, "lf_coelesce_adjacent: coelesce adjacent previous\n");
+                       lock->lf_start = (*lf)->lf_start;
+                       *lf = lock;
+                       lf = &(*lf)->lf_next;
+                       FREE(adjacent, M_LOCKF);
+                       continue;
+               }
+               /* If the lock starts adjacent to us, we can coelesce it */
+               if (lock->lf_end != -1 &&
+                   (lock->lf_end + 1) == (*lf)->lf_start) {
+                       struct lockf *adjacent = *lf;
+
+                       LOCKF_DEBUG(0, "lf_coelesce_adjacent: coelesce adjacent following\n");
+                       lock->lf_end = (*lf)->lf_end;
+                       lock->lf_next = (*lf)->lf_next;
+                       lf = &lock->lf_next;
+                       FREE(adjacent, M_LOCKF);
+                       continue;
+               }
+
+               /* no matching conditions; go on to next lock */
+               lf = &(*lf)->lf_next;
+       }
+}
+
+
+/*
+ * lf_setlock
+ *
+ * Description:        Set a byte-range lock.
+ *
+ * Parameters: lock                    The lock structure describing the lock
+ *                                     to be set; allocated by the caller, it
+ *                                     will be linked into the lock list if
+ *                                     the set is successful, and freed if the
+ *                                     set is unsuccessful.
+ *
+ * Returns:    0                       Success
+ *             EAGAIN
+ *             EDEADLK
+ *     lf_split:ENOLCK
+ *     lf_clearlock:ENOLCK
+ *     msleep:EINTR
+ *
+ * Notes:      We add the lock to the provisional lock list.  We do not
+ *             coelesce at this time; this has implications for other lock
+ *             requestors in the blocker search mechanism.
  */
 static int
-lf_setlock(lock)
-       struct lockf *lock;
+lf_setlock(struct lockf *lock)
 {
        struct lockf *block;
        struct lockf **head = lock->lf_head;
        struct lockf **prev, *overlap, *ltmp;
        static char lockstr[] = "lockf";
-       int ovcase, priority, needtolink, error;
+       int priority, needtolink, error;
        struct vnode *vp = lock->lf_vnode;
+       overlap_t ovcase;
 
-#ifdef LOCKF_DEBUG
-       if (lockf_debug & 1)
+#ifdef LOCKF_DEBUGGING
+       if (lockf_debug & 1) {
                lf_print("lf_setlock", lock);
-#endif /* LOCKF_DEBUG */
+               lf_printlist("lf_setlock(in)", lock);
+       }
+#endif /* LOCKF_DEBUGGING */
 
        /*
         * Set the priority
@@ -276,10 +409,7 @@ lf_setlock(lock)
                        FREE(lock, M_LOCKF);
                        return (EAGAIN);
                }
-#if DEAD_CODE
-/*
- * XXX This is dead code on MacOS X; it shouldn't be.
- */
+
                /*
                 * We are blocked. Since flock style locks cover
                 * the whole file, there is no chance for deadlock.
@@ -292,35 +422,66 @@ lf_setlock(lock)
                 */
                if ((lock->lf_flags & F_POSIX) &&
                    (block->lf_flags & F_POSIX)) {
-                       struct proc *wproc;
-                       struct thread *td;
+                       struct proc *wproc, *bproc;
+                       struct uthread *ut;
                        struct lockf *waitblock;
                        int i = 0;
 
                        /* The block is waiting on something */
-                       /* XXXKSE this is not complete under threads */
                        wproc = (struct proc *)block->lf_id;
-                       mtx_lock_spin(&sched_lock);
-                       FOREACH_THREAD_IN_PROC(wproc, td) {
-                               while (td->td_wchan &&
-                                   (td->td_wmesg == lockstr) &&
+                       proc_lock(wproc);
+                       TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
+                               /*
+                                * While the thread is asleep (uu_wchan != 0)
+                                * in this code (uu_wmesg == lockstr)
+                                * and we have not exceeded the maximum cycle
+                                * depth (i < maxlockdepth), then check for a
+                                * cycle to see if the lock is blocked behind
+                                * someone blocked behind us.
+                                */
+                               while (((waitblock = (struct lockf *)ut->uu_wchan) != NULL) &&
+                                   ut->uu_wmesg == lockstr &&
                                    (i++ < maxlockdepth)) {
-                                       waitblock = (struct lockf *)td->td_wchan;
-                                       /* Get the owner of the blocking lock */
+                                       waitblock = (struct lockf *)ut->uu_wchan;
+                                       /*
+                                        * Get the lock blocking the lock
+                                        * which would block us, and make
+                                        * certain it hasn't come unblocked
+                                        * (been granted, e.g. between the time
+                                        * we called lf_getblock, and the time
+                                        * we successfully acquired the
+                                        * proc_lock).
+                                        */
                                        waitblock = waitblock->lf_next;
+                                       if (waitblock == NULL)
+                                               break;
+
+                                       /*
+                                        * Make sure it's an advisory range
+                                        * lock and not an overall file lock;
+                                        * if we mix lock types, it's our own
+                                        * fault.
+                                        */
                                        if ((waitblock->lf_flags & F_POSIX) == 0)
                                                break;
-                                       wproc = (struct proc *)waitblock->lf_id;
-                                       if (wproc == (struct proc *)lock->lf_id) {
-                                               mtx_unlock_spin(&sched_lock);
+
+                                       /*
+                                        * If the owner of the lock that's
+                                        * blocking a lock that's blocking us
+                                        * getting the requested lock, then we
+                                        * would deadlock, so error out.
+                                        */
+                                       bproc = (struct proc *)waitblock->lf_id;
+                                       if (bproc == (struct proc *)lock->lf_id) {
+                                               proc_unlock(wproc);
                                                FREE(lock, M_LOCKF);
                                                return (EDEADLK);
                                        }
                                }
                        }
-                       mtx_unlock_spin(&sched_lock);
+                       proc_unlock(wproc);
                }
-#endif /* DEAD_CODE */
+
                /*
                 * For flock type locks, we must first remove
                 * any shared locks that we hold before we sleep
@@ -329,7 +490,10 @@ lf_setlock(lock)
                if ((lock->lf_flags & F_FLOCK) &&
                    lock->lf_type == F_WRLCK) {
                        lock->lf_type = F_UNLCK;
-                       (void) lf_clearlock(lock);
+                       if ((error = lf_clearlock(lock)) != 0) {
+                               FREE(lock, M_LOCKF);
+                               return (error);
+                       }
                        lock->lf_type = F_WRLCK;
                }
                /*
@@ -338,12 +502,12 @@ lf_setlock(lock)
                 */
                lock->lf_next = block;
                TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
-#ifdef LOCKF_DEBUG
+#ifdef LOCKF_DEBUGGING
                if (lockf_debug & 1) {
                        lf_print("lf_setlock: blocking on", block);
-                       lf_printlist("lf_setlock", block);
+                       lf_printlist("lf_setlock(block)", block);
                }
-#endif /* LOCKF_DEBUG */
+#endif /* LOCKF_DEBUGGING */
                error = msleep(lock, &vp->v_lock, priority, lockstr, 0);
                if (error) {    /* XXX */
                        /*
@@ -387,14 +551,14 @@ lf_setlock(lock)
                 *      5) overlap ends after lock
                 */
                switch (ovcase) {
-               case 0: /* no overlap */
+               case OVERLAP_NONE:
                        if (needtolink) {
                                *prev = lock;
                                lock->lf_next = overlap;
                        }
                        break;
 
-               case 1: /* overlap == lock */
+               case OVERLAP_EQUALS_LOCK:
                        /*
                         * If downgrading lock, others may be
                         * able to acquire it.
@@ -404,28 +568,37 @@ lf_setlock(lock)
                                lf_wakelock(overlap);
                        overlap->lf_type = lock->lf_type;
                        FREE(lock, M_LOCKF);
-                       lock = overlap; /* for debug output below */
+                       lock = overlap; /* for lf_coelesce_adjacent() */
                        break;
 
-               case 2: /* overlap contains lock */
+               case OVERLAP_CONTAINS_LOCK:
                        /*
                         * Check for common starting point and different types.
                         */
                        if (overlap->lf_type == lock->lf_type) {
                                FREE(lock, M_LOCKF);
-                               lock = overlap; /* for debug output below */
+                               lock = overlap; /* for lf_coelesce_adjacent() */
                                break;
                        }
                        if (overlap->lf_start == lock->lf_start) {
                                *prev = lock;
                                lock->lf_next = overlap;
                                overlap->lf_start = lock->lf_end + 1;
-                       } else
-                               lf_split(overlap, lock);
+                       } else {
+                               /*
+                                * If we can't split the lock, we can't
+                                * grant it.  Claim a system limit for the
+                                * resource shortage.
+                                */
+                               if (lf_split(overlap, lock)) {
+                                       FREE(lock, M_LOCKF);
+                                       return (ENOLCK);
+                               }
+                       }
                        lf_wakelock(overlap);
                        break;
 
-               case 3: /* lock contains overlap */
+               case OVERLAP_CONTAINED_BY_LOCK:
                        /*
                         * If downgrading lock, others may be able to
                         * acquire it, otherwise take the list.
@@ -456,7 +629,7 @@ lf_setlock(lock)
                        FREE(overlap, M_LOCKF);
                        continue;
 
-               case 4: /* overlap starts before lock */
+               case OVERLAP_STARTS_BEFORE_LOCK:
                        /*
                         * Add lock after overlap on the list.
                         */
@@ -468,7 +641,7 @@ lf_setlock(lock)
                        needtolink = 0;
                        continue;
 
-               case 5: /* overlap ends after lock */
+               case OVERLAP_ENDS_AFTER_LOCK:
                        /*
                         * Add the new lock before overlap.
                         */
@@ -482,101 +655,136 @@ lf_setlock(lock)
                }
                break;
        }
-#ifdef LOCKF_DEBUG
+       /* Coelesce adjacent locks with identical attributes */
+       lf_coelesce_adjacent(lock);
+#ifdef LOCKF_DEBUGGING
        if (lockf_debug & 1) {
                lf_print("lf_setlock: got the lock", lock);
-               lf_printlist("lf_setlock", lock);
+               lf_printlist("lf_setlock(out)", lock);
        }
-#endif /* LOCKF_DEBUG */
+#endif /* LOCKF_DEBUGGING */
        return (0);
 }
 
+
 /*
- * Remove a byte-range lock on an inode.
+ * lf_clearlock
+ *
+ * Description:        Remove a byte-range lock on an vnode.  Generally, find the
+ *             lock (or an overlap to that lock) and remove it (or shrink
+ *             it), then wakeup anyone we can.
+ *
+ * Parameters: unlock                  The lock to clear
  *
- * Generally, find the lock (or an overlap to that lock)
- * and remove it (or shrink it), then wakeup anyone we can.
+ * Returns:    0                       Success
+ *     lf_split:ENOLCK
+ *
+ * Notes:      A caller may unlock all the locks owned by the caller by
+ *             specifying the entire file range; locks owned by other
+ *             callers are not effected by this operation.
  */
 static int
-lf_clearlock(unlock)
-       struct lockf *unlock;
+lf_clearlock(struct lockf *unlock)
 {
        struct lockf **head = unlock->lf_head;
        struct lockf *lf = *head;
        struct lockf *overlap, **prev;
-       int ovcase;
+       overlap_t ovcase;
 
        if (lf == NOLOCKF)
                return (0);
-#ifdef LOCKF_DEBUG
+#ifdef LOCKF_DEBUGGING
        if (unlock->lf_type != F_UNLCK)
                panic("lf_clearlock: bad type");
        if (lockf_debug & 1)
                lf_print("lf_clearlock", unlock);
-#endif /* LOCKF_DEBUG */
+#endif /* LOCKF_DEBUGGING */
        prev = head;
-       while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap))) {
+       while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) != OVERLAP_NONE) {
                /*
                 * Wakeup the list of locks to be retried.
                 */
                lf_wakelock(overlap);
 
                switch (ovcase) {
+               case OVERLAP_NONE:      /* satisfy compiler enum/switch */
+                       break;
 
-               case 1: /* overlap == lock */
+               case OVERLAP_EQUALS_LOCK:
                        *prev = overlap->lf_next;
                        FREE(overlap, M_LOCKF);
                        break;
 
-               case 2: /* overlap contains lock: split it */
+               case OVERLAP_CONTAINS_LOCK: /* split it */
                        if (overlap->lf_start == unlock->lf_start) {
                                overlap->lf_start = unlock->lf_end + 1;
                                break;
                        }
-                       lf_split(overlap, unlock);
+                       /*
+                        * If we can't split the lock, we can't grant it.
+                        * Claim a system limit for the resource shortage.
+                        */
+                       if (lf_split(overlap, unlock))
+                               return (ENOLCK);
                        overlap->lf_next = unlock->lf_next;
                        break;
 
-               case 3: /* lock contains overlap */
+               case OVERLAP_CONTAINED_BY_LOCK:
                        *prev = overlap->lf_next;
                        lf = overlap->lf_next;
                        FREE(overlap, M_LOCKF);
                        continue;
 
-               case 4: /* overlap starts before lock */
+               case OVERLAP_STARTS_BEFORE_LOCK:
                        overlap->lf_end = unlock->lf_start - 1;
                        prev = &overlap->lf_next;
                        lf = overlap->lf_next;
                        continue;
 
-               case 5: /* overlap ends after lock */
+               case OVERLAP_ENDS_AFTER_LOCK:
                        overlap->lf_start = unlock->lf_end + 1;
                        break;
                }
                break;
        }
-#ifdef LOCKF_DEBUG
+#ifdef LOCKF_DEBUGGING
        if (lockf_debug & 1)
                lf_printlist("lf_clearlock", unlock);
-#endif /* LOCKF_DEBUG */
+#endif /* LOCKF_DEBUGGING */
        return (0);
 }
 
+
 /*
- * Check whether there is a blocking lock,
- * and if so return its process identifier.
+ * lf_getlock
+ *
+ * Description:        Check whether there is a blocking lock, and if so return
+ *             its process identifier into the lock being requested.
+ *
+ * Parameters: lock                    Pointer to lock to test for blocks
+ *             fl                      Pointer to flock structure to receive
+ *                                     the blocking lock information, if a
+ *                                     blocking lock is found.
+ *
+ * Returns:    0                       Success
+ *
+ * Implicit Returns:
+ *             *fl                     Contents modified to reflect the
+ *                                     blocking lock, if one is found; not
+ *                                     modified otherwise
+ *
+ * Notes:      fl->l_pid will be (-1) for file locks and will only be set to
+ *             the blocking process ID for advisory record locks.
  */
 static int
-lf_getlock(lock, fl)
-       struct lockf *lock;
-       struct flock *fl;
+lf_getlock(struct lockf *lock, struct flock *fl)
 {
        struct lockf *block;
 
-#ifdef LOCKF_DEBUG
+#ifdef LOCKF_DEBUGGING
        if (lockf_debug & 1)
                lf_print("lf_getlock", lock);
-#endif /* LOCKF_DEBUG */
+#endif /* LOCKF_DEBUGGING */
 
        if ((block = lf_getblock(lock))) {
                fl->l_type = block->lf_type;
@@ -596,19 +804,30 @@ lf_getlock(lock, fl)
        return (0);
 }
 
+
 /*
- * Walk the list of locks for an inode and
- * return the first blocking lock.
+ * lf_getblock
+ *
+ * Description:        Walk the list of locks for an inode and return the first
+ *             blocking lock.  A lock is considered blocking if we are not
+ *             the lock owner; otherwise, we are permitted to upgrade or
+ *             downgrade it, and it's not considered blocking.
+ *
+ * Parameters: lock                    The lock for which we are interested
+ *                                     in obtaining the blocking lock, if any
+ *
+ * Returns:    NOLOCKF                 No blocking lock exists
+ *             !NOLOCKF                The address of the blocking lock's
+ *                                     struct lockf.
  */
 static struct lockf *
-lf_getblock(lock)
-       struct lockf *lock;
+lf_getblock(struct lockf *lock)
 {
        struct lockf **prev, *overlap, *lf = *(lock->lf_head);
        int ovcase;
 
        prev = lock->lf_head;
-       while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap))) {
+       while ((ovcase = lf_findoverlap(lf, lock, OTHERS, &prev, &overlap)) != OVERLAP_NONE) {
                /*
                 * We've found an overlap, see if it blocks us
                 */
@@ -623,30 +842,64 @@ lf_getblock(lock)
        return (NOLOCKF);
 }
 
+
 /*
- * Walk the list of locks to
- * find an overlapping lock (if any).
+ * lf_findoverlap
+ *
+ * Description:        Walk the list of locks to find an overlapping lock (if any).
+ *
+ * Parameters: lf                      First lock on lock list
+ *             lock                    The lock we are checking for an overlap
+ *             check                   Check type
+ *             prev                    pointer to pointer pointer to contain
+ *                                     address of pointer to previous lock
+ *                                     pointer to overlapping lock, if overlap
+ *             overlap                 pointer to pointer to contain address
+ *                                     of overlapping lock
+ *
+ * Returns:    OVERLAP_NONE
+ *             OVERLAP_EQUALS_LOCK
+ *             OVERLAP_CONTAINS_LOCK
+ *             OVERLAP_CONTAINED_BY_LOCK
+ *             OVERLAP_STARTS_BEFORE_LOCK
+ *             OVERLAP_ENDS_AFTER_LOCK
  *
- * NOTE: this returns only the FIRST overlapping lock.  There
- *      may be more than one.
+ * Implicit Returns:
+ *             *prev                   The address of the next pointer in the
+ *                                     lock previous to the overlapping lock;
+ *                                     this is generally used to relink the
+ *                                     lock list, avoiding a second iteration.
+ *             *overlap                The pointer to the overlapping lock
+ *                                     itself; this is ussed to return data in
+ *                                     the check == OTHERS case, and for the
+ *                                     caller to modify the overlapping lock,
+ *                                     in the check == SELF case
+ *
+ * Note:       This returns only the FIRST overlapping lock.  There may be
+ *             more than one.  lf_getlock will return the first blocking lock,
+ *             while lf_setlock will iterate over all overlapping locks to
+ *
+ *             The check parameter can be SELF, meaning we are looking for
+ *             overelapping locks owned by us, or it can be OTHERS, meaning
+ *             we are looking for overlapping locks owned by someone else so
+ *             we can report a blocking lock on an F_GETLK request.
+ *
+ *             The value of *overlap and *prev are modified, even if there is
+ *             no overlapping lock found; always check the return code.
  */
-static int
-lf_findoverlap(lf, lock, type, prev, overlap)
-       struct lockf *lf;
-       struct lockf *lock;
-       int type;
-       struct lockf ***prev;
-       struct lockf **overlap;
+static overlap_t
+lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
+              struct lockf ***prev, struct lockf **overlap)
 {
        off_t start, end;
 
        *overlap = lf;
        if (lf == NOLOCKF)
                return (0);
-#ifdef LOCKF_DEBUG
+#ifdef LOCKF_DEBUGGING
        if (lockf_debug & 2)
                lf_print("lf_findoverlap: looking for overlap in", lock);
-#endif /* LOCKF_DEBUG */
+#endif /* LOCKF_DEBUGGING */
        start = lock->lf_start;
        end = lock->lf_end;
        while (lf != NOLOCKF) {
@@ -656,122 +909,111 @@ lf_findoverlap(lf, lock, type, prev, overlap)
                        *overlap = lf = lf->lf_next;
                        continue;
                }
-#ifdef LOCKF_DEBUG
+#ifdef LOCKF_DEBUGGING
                if (lockf_debug & 2)
                        lf_print("\tchecking", lf);
-#endif /* LOCKF_DEBUG */
+#endif /* LOCKF_DEBUGGING */
                /*
                 * OK, check for overlap
-                *
-                * Six cases:
-                *      0) no overlap
-                *      1) overlap == lock
-                *      2) overlap contains lock
-                *      3) lock contains overlap
-                *      4) overlap starts before lock
-                *      5) overlap ends after lock
                 */
                if ((lf->lf_end != -1 && start > lf->lf_end) ||
                    (end != -1 && lf->lf_start > end)) {
                        /* Case 0 */
-#ifdef LOCKF_DEBUG
-                       if (lockf_debug & 2)
-                               printf("no overlap\n");
-#endif /* LOCKF_DEBUG */
+                       LOCKF_DEBUG(2, "no overlap\n");
                        if ((type & SELF) && end != -1 && lf->lf_start > end)
-                               return (0);
+                               return (OVERLAP_NONE);
                        *prev = &lf->lf_next;
                        *overlap = lf = lf->lf_next;
                        continue;
                }
                if ((lf->lf_start == start) && (lf->lf_end == end)) {
-                       /* Case 1 */
-#ifdef LOCKF_DEBUG
-                       if (lockf_debug & 2)
-                               printf("overlap == lock\n");
-#endif /* LOCKF_DEBUG */
-                       return (1);
+                       LOCKF_DEBUG(2, "overlap == lock\n");
+                       return (OVERLAP_EQUALS_LOCK);
                }
                if ((lf->lf_start <= start) &&
                    (end != -1) &&
                    ((lf->lf_end >= end) || (lf->lf_end == -1))) {
-                       /* Case 2 */
-#ifdef LOCKF_DEBUG
-                       if (lockf_debug & 2)
-                               printf("overlap contains lock\n");
-#endif /* LOCKF_DEBUG */
-                       return (2);
+                       LOCKF_DEBUG(2, "overlap contains lock\n");
+                       return (OVERLAP_CONTAINS_LOCK);
                }
                if (start <= lf->lf_start &&
                           (end == -1 ||
                           (lf->lf_end != -1 && end >= lf->lf_end))) {
-                       /* Case 3 */
-#ifdef LOCKF_DEBUG
-                       if (lockf_debug & 2)
-                               printf("lock contains overlap\n");
-#endif /* LOCKF_DEBUG */
-                       return (3);
+                       LOCKF_DEBUG(2, "lock contains overlap\n");
+                       return (OVERLAP_CONTAINED_BY_LOCK);
                }
                if ((lf->lf_start < start) &&
                        ((lf->lf_end >= start) || (lf->lf_end == -1))) {
-                       /* Case 4 */
-#ifdef LOCKF_DEBUG
-                       if (lockf_debug & 2)
-                               printf("overlap starts before lock\n");
-#endif /* LOCKF_DEBUG */
-                       return (4);
+                       LOCKF_DEBUG(2, "overlap starts before lock\n");
+                       return (OVERLAP_STARTS_BEFORE_LOCK);
                }
                if ((lf->lf_start > start) &&
                        (end != -1) &&
                        ((lf->lf_end > end) || (lf->lf_end == -1))) {
-                       /* Case 5 */
-#ifdef LOCKF_DEBUG
-                       if (lockf_debug & 2)
-                               printf("overlap ends after lock\n");
-#endif /* LOCKF_DEBUG */
-                       return (5);
+                       LOCKF_DEBUG(2, "overlap ends after lock\n");
+                       return (OVERLAP_ENDS_AFTER_LOCK);
                }
                panic("lf_findoverlap: default");
        }
-       return (0);
+       return (OVERLAP_NONE);
 }
 
+
 /*
- * Split a lock and a contained region into
- * two or three locks as necessary.
+ * lf_split
+ *
+ * Description:        Split a lock and a contained region into two or three locks
+ *             as necessary.
+ *
+ * Parameters: lock1                   Lock to split
+ *             lock2                   Overlapping lock region requiring the
+ *                                     split (upgrade/downgrade/unlock)
+ *
+ * Returns:    0                       Success
+ *             ENOLCK                  No memory for new lock
+ *
+ * Implicit Returns:
+ *             *lock1                  Modified original lock
+ *             *lock2                  Overlapping lock (inserted into list)
+ *             (new lock)              Potential new lock inserted into list
+ *                                     if split results in 3 locks
+ *
+ * Notes:      This operation can only fail if the split would result in three
+ *             locks, and there is insufficient memory to allocate the third
+ *             lock; in that case, neither of the locks will be modified.
  */
-static void
-lf_split(lock1, lock2)
-       struct lockf *lock1;
-       struct lockf *lock2;
+static int
+lf_split(struct lockf *lock1, struct lockf *lock2)
 {
        struct lockf *splitlock;
 
-#ifdef LOCKF_DEBUG
+#ifdef LOCKF_DEBUGGING
        if (lockf_debug & 2) {
                lf_print("lf_split", lock1);
                lf_print("splitting from", lock2);
        }
-#endif /* LOCKF_DEBUG */
+#endif /* LOCKF_DEBUGGING */
        /*
         * Check to see if spliting into only two pieces.
         */
        if (lock1->lf_start == lock2->lf_start) {
                lock1->lf_start = lock2->lf_end + 1;
                lock2->lf_next = lock1;
-               return;
+               return (0);
        }
        if (lock1->lf_end == lock2->lf_end) {
                lock1->lf_end = lock2->lf_start - 1;
                lock2->lf_next = lock1->lf_next;
                lock1->lf_next = lock2;
-               return;
+               return (0);
        }
        /*
         * Make a new lock consisting of the last part of
         * the encompassing lock
         */
        MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
+       if (splitlock == NULL)
+               return (ENOLCK);
        bcopy(lock1, splitlock, sizeof *splitlock);
        splitlock->lf_start = lock2->lf_end + 1;
        TAILQ_INIT(&splitlock->lf_blkhd);
@@ -782,14 +1024,31 @@ lf_split(lock1, lock2)
        splitlock->lf_next = lock1->lf_next;
        lock2->lf_next = splitlock;
        lock1->lf_next = lock2;
+
+       return (0);
 }
 
+
 /*
- * Wakeup a blocklist
+ * lf_wakelock
+ *
+ * Wakeup a blocklist in the case of a downgrade or unlock, since others
+ * waiting on the lock may now be able to acquire it.
+ *
+ * Parameters: listhead                Lock list head on which waiters may
+ *                                     have pending locks
+ *
+ * Returns:    <void>
+ *
+ * Notes:      This function iterates a list of locks and wakes all waiters,
+ *             rather than only waiters for the contended regions.  Because
+ *             of this, for heavily contended files, this can result in a
+ *             "thundering herd" situation.  Refactoring the code could make
+ *             this operation more efficient, if heavy contention ever results
+ *             in a real-world performance problem.
  */
 static void
-lf_wakelock(listhead)
-       struct lockf *listhead;
+lf_wakelock(struct lockf *listhead)
 {
        struct lockf *wakelock;
 
@@ -797,38 +1056,44 @@ lf_wakelock(listhead)
                wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
                TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
                wakelock->lf_next = NOLOCKF;
-#ifdef LOCKF_DEBUG
+#ifdef LOCKF_DEBUGGING
                if (lockf_debug & 2)
                        lf_print("lf_wakelock: awakening", wakelock);
-#endif /* LOCKF_DEBUG */
+#endif /* LOCKF_DEBUGGING */
                wakeup(wakelock);
        }
 }
 
-#ifdef LOCKF_DEBUG
+
+#ifdef LOCKF_DEBUGGING
 /*
- * Print out a lock.
+ * lf_print DEBUG
+ *
+ * Print out a lock; lock information is prefixed by the string in 'tag'
+ *
+ * Parameters: tag                     A string tag for debugging
+ *             lock                    The lock whose information should be
+ *                                     displayed
+ *
+ * Returns:    <void>
  */
 void
-lf_print(tag, lock)
-       char *tag;
-       struct lockf *lock;
+lf_print(const char *tag, struct lockf *lock)
 {
-
        printf("%s: lock %p for ", tag, (void *)lock);
        if (lock->lf_flags & F_POSIX)
                printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
        else
                printf("id %p", (void *)lock->lf_id);
        if (lock->lf_vnode != 0)
-               printf(" in vno 0x%08x, %s, start %jd, end %jd",
+               printf(" in vno %p, %s, start 0x%016llx, end 0x%016llx",
                    lock->lf_vnode,
                    lock->lf_type == F_RDLCK ? "shared" :
                    lock->lf_type == F_WRLCK ? "exclusive" :
                    lock->lf_type == F_UNLCK ? "unlock" : "unknown",
                    (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
        else
-               printf(" %s, start %jd, end %jd",
+               printf(" %s, start 0x%016llx, end 0x%016llx",
                    lock->lf_type == F_RDLCK ? "shared" :
                    lock->lf_type == F_WRLCK ? "exclusive" :
                    lock->lf_type == F_UNLCK ? "unlock" : "unknown",
@@ -839,17 +1104,28 @@ lf_print(tag, lock)
                printf("\n");
 }
 
+
+/*
+ * lf_printlist DEBUG
+ *
+ * Print out a lock list for the vnode associated with 'lock'; lock information
+ * is prefixed by the string in 'tag'
+ *
+ * Parameters: tag                     A string tag for debugging
+ *             lock                    The lock whose vnode's lock list should
+ *                                     be displayed
+ *
+ * Returns:    <void>
+ */
 void
-lf_printlist(tag, lock)
-       char *tag;
-       struct lockf *lock;
+lf_printlist(const char *tag, struct lockf *lock)
 {
        struct lockf *lf, *blk;
 
        if (lock->lf_vnode == 0)
                return;
 
-       printf("%s: Lock list for vno 0x%08x:\n",
+       printf("%s: Lock list for vno %p:\n",
            tag, lock->lf_vnode);
        for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) {
                printf("\tlock %p for ",(void *)lf);
@@ -858,7 +1134,7 @@ lf_printlist(tag, lock)
                            (long)((struct proc *)lf->lf_id)->p_pid);
                else
                        printf("id %p", (void *)lf->lf_id);
-               printf(", %s, start %jd, end %jd",
+               printf(", %s, start 0x%016llx, end 0x%016llx",
                    lf->lf_type == F_RDLCK ? "shared" :
                    lf->lf_type == F_WRLCK ? "exclusive" :
                    lf->lf_type == F_UNLCK ? "unlock" :
@@ -870,7 +1146,7 @@ lf_printlist(tag, lock)
                                    (long)((struct proc *)blk->lf_id)->p_pid);
                        else
                                printf("id %p", (void *)blk->lf_id);
-                       printf(", %s, start %jd, end %jd",
+                       printf(", %s, start 0x%016llx, end 0x%016llx",
                            blk->lf_type == F_RDLCK ? "shared" :
                            blk->lf_type == F_WRLCK ? "exclusive" :
                            blk->lf_type == F_UNLCK ? "unlock" :
@@ -882,4 +1158,4 @@ lf_printlist(tag, lock)
                printf("\n");
        }
 }
-#endif /* LOCKF_DEBUG */
+#endif /* LOCKF_DEBUGGING */