2  * Copyright (c) 2006 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29  * Copyright (c) 1982, 1986, 1989, 1993 
  30  *      The Regents of the University of California.  All rights reserved. 
  32  * This code is derived from software contributed to Berkeley by 
  33  * Scooter Morris at Genentech Inc. 
  35  * Redistribution and use in source and binary forms, with or without 
  36  * modification, are permitted provided that the following conditions 
  38  * 1. Redistributions of source code must retain the above copyright 
  39  *    notice, this list of conditions and the following disclaimer. 
  40  * 2. Redistributions in binary form must reproduce the above copyright 
  41  *    notice, this list of conditions and the following disclaimer in the 
  42  *    documentation and/or other materials provided with the distribution. 
  43  * 4. Neither the name of the University nor the names of its contributors 
  44  *    may be used to endorse or promote products derived from this software 
  45  *    without specific prior written permission. 
  47  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  48  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  49  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  50  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  51  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  52  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  53  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  54  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  55  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  56  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  59  *      @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94 
  62 #include <sys/cdefs.h> 
  63 #include <sys/param.h> 
  64 #include <sys/systm.h> 
  65 #include <sys/kernel.h> 
  67 #include <sys/mount.h> 
  69 #include <sys/signalvar.h> 
  70 #include <sys/unistd.h> 
  72 #include <sys/vnode.h> 
  73 #include <sys/vnode_internal.h> 
  74 #include <sys/vnode_if.h> 
  75 #include <sys/malloc.h> 
  76 #include <sys/fcntl.h> 
  77 #include <sys/lockf.h> 
  80  * This variable controls the maximum number of processes that will 
  81  * be checked in doing deadlock detection. 
  83 static int maxlockdepth 
= MAXDEPTH
; 
  85 #ifdef LOCKF_DEBUGGING 
  86 #include <sys/sysctl.h> 
  87 #include <ufs/ufs/quota.h> 
  88 #include <ufs/ufs/inode.h> 
  89 void lf_print(const char *tag
, struct lockf 
*lock
); 
  90 void lf_printlist(const char *tag
, struct lockf 
*lock
); 
  91 static int      lockf_debug 
= 2; 
  92 SYSCTL_INT(_debug
, OID_AUTO
, lockf_debug
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, &lockf_debug
, 0, ""); 
  95  * If there is no mask bit selector, or there is on, and the selector is 
  96  * set, then output the debugging diagnostic. 
  98 #define LOCKF_DEBUG(mask, ...)                                  \ 
 100                 if( !(mask) || ((mask) & lockf_debug)) {        \ 
 101                         printf(__VA_ARGS__);                    \ 
 104 #else   /* !LOCKF_DEBUGGING */ 
 105 #define LOCKF_DEBUG(mask, ...)          /* mask */ 
 106 #endif  /* !LOCKF_DEBUGGING */ 
 108 MALLOC_DEFINE(M_LOCKF
, "lockf", "Byte-range locking structures"); 
 110 #define NOLOCKF (struct lockf *)0 
 113 #define OFF_MAX 0x7fffffffffffffffULL   /* max off_t */ 
 116  * Overlapping lock states 
 121         OVERLAP_CONTAINS_LOCK
, 
 122         OVERLAP_CONTAINED_BY_LOCK
, 
 123         OVERLAP_STARTS_BEFORE_LOCK
, 
 124         OVERLAP_ENDS_AFTER_LOCK
 
 127 static int       lf_clearlock(struct lockf 
*); 
 128 static overlap_t 
lf_findoverlap(struct lockf 
*, 
 129             struct lockf 
*, int, struct lockf 
***, struct lockf 
**); 
 130 static struct lockf 
*lf_getblock(struct lockf 
*); 
 131 static int       lf_getlock(struct lockf 
*, struct flock 
*); 
 133 static int       lf_getlockpid(struct vnode 
*, struct flock 
*); 
 135 static int       lf_setlock(struct lockf 
*); 
 136 static int       lf_split(struct lockf 
*, struct lockf 
*); 
 137 static void      lf_wakelock(struct lockf 
*, boolean_t
); 
 142  * Description: Advisory record locking support 
 144  * Parameters:  ap                      Argument pointer to a vnop_advlock_args 
 145  *                                      argument descriptor structure for the 
 146  *                                      lock operation to be attempted. 
 151  *              ENOLCK                  Number of locked regions exceeds limit 
 156  *      lf_clearlock:ENOLCK 
 159  * Notes:       We return ENOLCK when we run out of memory to support locks; as 
 160  *              such, there is no specific expectation limit other than the 
 161  *              amount of available resources. 
 164 lf_advlock(struct vnop_advlock_args 
*ap
) 
 166         struct vnode 
*vp 
= ap
->a_vp
; 
 167         struct flock 
*fl 
= ap
->a_fl
; 
 168         vfs_context_t context 
= ap
->a_context
; 
 170         off_t start
, end
, oadd
; 
 173         struct lockf 
**head 
= &vp
->v_lockf
; 
 175         /* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */ 
 178         if (ap
->a_op 
== F_GETLKPID
) 
 179                 return lf_getlockpid(vp
, fl
); 
 183          * Avoid the common case of unlocking when inode has no locks. 
 185         if (*head 
== (struct lockf 
*)0) { 
 186                 if (ap
->a_op 
!= F_SETLK
) { 
 187                         fl
->l_type 
= F_UNLCK
; 
 188                         LOCKF_DEBUG(0, "lf_advlock: '%s' unlock without lock\n", vfs_context_proc(context
)->p_comm
); 
 194          * Convert the flock structure into a start and end. 
 196         switch (fl
->l_whence
) { 
 201                  * Caller is responsible for adding any necessary offset 
 202                  * when SEEK_CUR is used. 
 210                  * It's OK to cast the u_quad_t to and off_t here, since they 
 211                  * are the same storage size, and the value of the returned 
 212                  * contents will never overflow into the sign bit.  We need to 
 213                  * do this because we will use size to force range checks. 
 215                 if ((error 
= vnode_size(vp
, (off_t 
*)&size
, context
))) { 
 216                         LOCKF_DEBUG(0, "lf_advlock: vnode_getattr failed: %d\n", error
); 
 220                 if (size 
> OFF_MAX 
|| 
 222                      size 
> (u_quad_t
)(OFF_MAX 
- fl
->l_start
))) 
 224                 start 
= size 
+ fl
->l_start
; 
 228                 LOCKF_DEBUG(0, "lf_advlock: unknown whence %d\n", fl
->l_whence
); 
 232                 LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start
); 
 237                         LOCKF_DEBUG(0, "lf_advlock: len < 0 & start == 0\n"); 
 243                         LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start
); 
 246         } else if (fl
->l_len 
== 0) 
 249                 oadd 
= fl
->l_len 
- 1; 
 250                 if (oadd 
> (off_t
)(OFF_MAX 
- start
)) { 
 251                         LOCKF_DEBUG(0, "lf_advlock: overflow\n"); 
 257          * Create the lockf structure 
 259         MALLOC(lock
, struct lockf 
*, sizeof *lock
, M_LOCKF
, M_WAITOK
); 
 262         lock
->lf_start 
= start
; 
 264         lock
->lf_id 
= ap
->a_id
; 
 266         lock
->lf_type 
= fl
->l_type
; 
 267         lock
->lf_head 
= head
; 
 268         lock
->lf_next 
= (struct lockf 
*)0; 
 269         TAILQ_INIT(&lock
->lf_blkhd
); 
 270         lock
->lf_flags 
= ap
->a_flags
; 
 272         if (ap
->a_flags 
& F_FLOCK
) 
 273                 lock
->lf_flags 
|= F_WAKE1_SAFE
; 
 275         lck_mtx_lock(&vp
->v_lock
);      /* protect the lockf list */ 
 277          * Do the requested operation. 
 281                 error 
= lf_setlock(lock
); 
 285                 error 
= lf_clearlock(lock
); 
 290                 error 
= lf_getlock(lock
, fl
); 
 299         lck_mtx_unlock(&vp
->v_lock
);    /* done manipulating the list */ 
 301         LOCKF_DEBUG(0, "lf_advlock: normal exit: %d\n\n", error
); 
 307  * Take any lock attempts which are currently blocked by a given lock ("from") 
 308  * and mark them as blocked by a different lock ("to").  Used in the case 
 309  * where a byte range currently occupied by "from" is to be occupied by "to." 
 312 lf_move_blocked(struct lockf 
*to
, struct lockf 
*from
) 
 316         TAILQ_FOREACH(tlock
, &from
->lf_blkhd
, lf_block
) { 
 320         TAILQ_CONCAT(&to
->lf_blkhd
, &from
->lf_blkhd
, lf_block
); 
 324  * lf_coalesce_adjacent 
 326  * Description: Helper function: when setting a lock, coalesce adjacent 
 327  *              locks.  Needed because adjacent locks are not overlapping, 
 328  *              but POSIX requires that they be coalesced. 
 330  * Parameters:  lock                    The new lock which may be adjacent 
 331  *                                      to already locked regions, and which 
 332  *                                      should therefore be coalesced with them 
 337 lf_coalesce_adjacent(struct lockf 
*lock
) 
 339         struct lockf 
**lf 
= lock
->lf_head
; 
 341         while (*lf 
!= NOLOCKF
) { 
 342                 /* reject locks that obviously could not be coalesced */ 
 344                     ((*lf
)->lf_id 
!= lock
->lf_id
) || 
 345                     ((*lf
)->lf_type 
!= lock
->lf_type
)) { 
 346                         lf 
= &(*lf
)->lf_next
; 
 351                  * NOTE: Assumes that if two locks are adjacent on the number line  
 352                  * and belong to the same owner, then they are adjacent on the list. 
 355                 /* If the lock ends adjacent to us, we can coelesce it */ 
 356                 if ((*lf
)->lf_end 
!= -1 && 
 357                     ((*lf
)->lf_end 
+ 1) == lock
->lf_start
) { 
 358                         struct lockf 
*adjacent 
= *lf
; 
 360                         LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent previous\n"); 
 361                         lock
->lf_start 
= (*lf
)->lf_start
; 
 363                         lf 
= &(*lf
)->lf_next
; 
 365                         lf_move_blocked(lock
, adjacent
); 
 367                         FREE(adjacent
, M_LOCKF
); 
 370                 /* If the lock starts adjacent to us, we can coalesce it */ 
 371                 if (lock
->lf_end 
!= -1 && 
 372                     (lock
->lf_end 
+ 1) == (*lf
)->lf_start
) { 
 373                         struct lockf 
*adjacent 
= *lf
; 
 375                         LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent following\n"); 
 376                         lock
->lf_end 
= (*lf
)->lf_end
; 
 377                         lock
->lf_next 
= (*lf
)->lf_next
; 
 380                         lf_move_blocked(lock
, adjacent
); 
 382                         FREE(adjacent
, M_LOCKF
); 
 386                 /* no matching conditions; go on to next lock */ 
 387                 lf 
= &(*lf
)->lf_next
; 
 395  * Description: Set a byte-range lock. 
 397  * Parameters:  lock                    The lock structure describing the lock 
 398  *                                      to be set; allocated by the caller, it 
 399  *                                      will be linked into the lock list if 
 400  *                                      the set is successful, and freed if the 
 401  *                                      set is unsuccessful. 
 407  *      lf_clearlock:ENOLCK 
 410  * Notes:       We add the lock to the provisional lock list.  We do not 
 411  *              coalesce at this time; this has implications for other lock 
 412  *              requestors in the blocker search mechanism. 
 415 lf_setlock(struct lockf 
*lock
) 
 418         struct lockf 
**head 
= lock
->lf_head
; 
 419         struct lockf 
**prev
, *overlap
, *ltmp
; 
 420         static char lockstr
[] = "lockf"; 
 421         int priority
, needtolink
, error
; 
 422         struct vnode 
*vp 
= lock
->lf_vnode
; 
 425 #ifdef LOCKF_DEBUGGING 
 426         if (lockf_debug 
& 1) { 
 427                 lf_print("lf_setlock", lock
); 
 428                 lf_printlist("lf_setlock(in)", lock
); 
 430 #endif /* LOCKF_DEBUGGING */ 
 436         if (lock
->lf_type 
== F_WRLCK
) 
 440          * Scan lock list for this file looking for locks that would block us. 
 442         while ((block 
= lf_getblock(lock
))) { 
 444                  * Free the structure and return if nonblocking. 
 446                 if ((lock
->lf_flags 
& F_WAIT
) == 0) { 
 452                  * We are blocked. Since flock style locks cover 
 453                  * the whole file, there is no chance for deadlock. 
 454                  * For byte-range locks we must check for deadlock. 
 456                  * Deadlock detection is done by looking through the 
 457                  * wait channels to see if there are any cycles that 
 458                  * involve us. MAXDEPTH is set just to make sure we 
 459                  * do not go off into neverland. 
 461                 if ((lock
->lf_flags 
& F_POSIX
) && 
 462                     (block
->lf_flags 
& F_POSIX
)) { 
 463                         struct proc 
*wproc
, *bproc
; 
 465                         struct lockf 
*waitblock
; 
 468                         /* The block is waiting on something */ 
 469                         wproc 
= (struct proc 
*)block
->lf_id
; 
 471                         TAILQ_FOREACH(ut
, &wproc
->p_uthlist
, uu_list
) { 
 473                                  * While the thread is asleep (uu_wchan != 0) 
 474                                  * in this code (uu_wmesg == lockstr) 
 475                                  * and we have not exceeded the maximum cycle 
 476                                  * depth (i < maxlockdepth), then check for a 
 477                                  * cycle to see if the lock is blocked behind 
 478                                  * someone blocked behind us. 
 480                                 while (((waitblock 
= (struct lockf 
*)ut
->uu_wchan
) != NULL
) && 
 481                                     ut
->uu_wmesg 
== lockstr 
&& 
 482                                     (i
++ < maxlockdepth
)) { 
 483                                         waitblock 
= (struct lockf 
*)ut
->uu_wchan
; 
 485                                          * Get the lock blocking the lock 
 486                                          * which would block us, and make 
 487                                          * certain it hasn't come unblocked 
 488                                          * (been granted, e.g. between the time 
 489                                          * we called lf_getblock, and the time 
 490                                          * we successfully acquired the 
 493                                         waitblock 
= waitblock
->lf_next
; 
 494                                         if (waitblock 
== NULL
) 
 498                                          * Make sure it's an advisory range 
 499                                          * lock and not an overall file lock; 
 500                                          * if we mix lock types, it's our own 
 503                                         if ((waitblock
->lf_flags 
& F_POSIX
) == 0) 
 507                                          * If the owner of the lock that's 
 508                                          * blocking a lock that's blocking us 
 509                                          * getting the requested lock, then we 
 510                                          * would deadlock, so error out. 
 512                                         bproc 
= (struct proc 
*)waitblock
->lf_id
; 
 513                                         if (bproc 
== (struct proc 
*)lock
->lf_id
) { 
 524                  * For flock type locks, we must first remove 
 525                  * any shared locks that we hold before we sleep 
 526                  * waiting for an exclusive lock. 
 528                 if ((lock
->lf_flags 
& F_FLOCK
) && 
 529                     lock
->lf_type 
== F_WRLCK
) { 
 530                         lock
->lf_type 
= F_UNLCK
; 
 531                         if ((error 
= lf_clearlock(lock
)) != 0) { 
 535                         lock
->lf_type 
= F_WRLCK
; 
 538                  * Add our lock to the blocked list and sleep until we're free. 
 539                  * Remember who blocked us (for deadlock detection). 
 541                 lock
->lf_next 
= block
; 
 542                 TAILQ_INSERT_TAIL(&block
->lf_blkhd
, lock
, lf_block
); 
 544                 if ( !(lock
->lf_flags 
& F_FLOCK
)) 
 545                         block
->lf_flags 
&= ~F_WAKE1_SAFE
; 
 547 #ifdef LOCKF_DEBUGGING 
 548                 if (lockf_debug 
& 1) { 
 549                         lf_print("lf_setlock: blocking on", block
); 
 550                         lf_printlist("lf_setlock(block)", block
); 
 552 #endif /* LOCKF_DEBUGGING */ 
 553                 error 
= msleep(lock
, &vp
->v_lock
, priority
, lockstr
, 0); 
 555                 if (!TAILQ_EMPTY(&lock
->lf_blkhd
)) { 
 556                         if ((block 
= lf_getblock(lock
))) { 
 557                                 lf_move_blocked(block
, lock
); 
 560                 if (error
) {    /* XXX */ 
 562                          * We may have been awakened by a signal and/or by a 
 563                          * debugger continuing us (in which cases we must remove 
 564                          * ourselves from the blocked list) and/or by another 
 565                          * process releasing a lock (in which case we have 
 566                          * already been removed from the blocked list and our 
 567                          * lf_next field set to NOLOCKF). 
 570                                 TAILQ_REMOVE(&lock
->lf_next
->lf_blkhd
, lock
, lf_block
); 
 571                                 lock
->lf_next 
= NOLOCKF
; 
 573                         if (!TAILQ_EMPTY(&lock
->lf_blkhd
)) 
 574                                 lf_wakelock(lock
, TRUE
); 
 581          * No blocks!!  Add the lock.  Note that we will 
 582          * downgrade or upgrade any overlapping locks this 
 583          * process already owns. 
 585          * Skip over locks owned by other processes. 
 586          * Handle any locks that overlap and are owned by ourselves. 
 592                 ovcase 
= lf_findoverlap(block
, lock
, SELF
, &prev
, &overlap
); 
 594                         block 
= overlap
->lf_next
; 
 599                  *      2) overlap contains lock 
 600                  *      3) lock contains overlap 
 601                  *      4) overlap starts before lock 
 602                  *      5) overlap ends after lock 
 608                                 lock
->lf_next 
= overlap
; 
 612                 case OVERLAP_EQUALS_LOCK
: 
 614                          * If downgrading lock, others may be 
 615                          * able to acquire it. 
 617                         if (lock
->lf_type 
== F_RDLCK 
&& 
 618                             overlap
->lf_type 
== F_WRLCK
) 
 619                                 lf_wakelock(overlap
, TRUE
); 
 620                         overlap
->lf_type 
= lock
->lf_type
; 
 622                         lock 
= overlap
; /* for lf_coalesce_adjacent() */ 
 625                 case OVERLAP_CONTAINS_LOCK
: 
 627                          * Check for common starting point and different types. 
 629                         if (overlap
->lf_type 
== lock
->lf_type
) { 
 631                                 lock 
= overlap
; /* for lf_coalesce_adjacent() */ 
 634                         if (overlap
->lf_start 
== lock
->lf_start
) { 
 636                                 lock
->lf_next 
= overlap
; 
 637                                 overlap
->lf_start 
= lock
->lf_end 
+ 1; 
 640                                  * If we can't split the lock, we can't 
 641                                  * grant it.  Claim a system limit for the 
 644                                 if (lf_split(overlap
, lock
)) { 
 649                         lf_wakelock(overlap
, TRUE
); 
 652                 case OVERLAP_CONTAINED_BY_LOCK
: 
 654                          * If downgrading lock, others may be able to 
 655                          * acquire it, otherwise take the list. 
 657                         if (lock
->lf_type 
== F_RDLCK 
&& 
 658                             overlap
->lf_type 
== F_WRLCK
) { 
 659                                 lf_wakelock(overlap
, TRUE
); 
 661                                 while (!TAILQ_EMPTY(&overlap
->lf_blkhd
)) { 
 662                                         ltmp 
= TAILQ_FIRST(&overlap
->lf_blkhd
); 
 663                                         TAILQ_REMOVE(&overlap
->lf_blkhd
, ltmp
, 
 665                                         TAILQ_INSERT_TAIL(&lock
->lf_blkhd
, 
 667                                         ltmp
->lf_next 
= lock
; 
 671                          * Add the new lock if necessary and delete the overlap. 
 675                                 lock
->lf_next 
= overlap
->lf_next
; 
 676                                 prev 
= &lock
->lf_next
; 
 679                                 *prev 
= overlap
->lf_next
; 
 680                         FREE(overlap
, M_LOCKF
); 
 683                 case OVERLAP_STARTS_BEFORE_LOCK
: 
 685                          * Add lock after overlap on the list. 
 687                         lock
->lf_next 
= overlap
->lf_next
; 
 688                         overlap
->lf_next 
= lock
; 
 689                         overlap
->lf_end 
= lock
->lf_start 
- 1; 
 690                         prev 
= &lock
->lf_next
; 
 691                         lf_wakelock(overlap
, TRUE
); 
 695                 case OVERLAP_ENDS_AFTER_LOCK
: 
 697                          * Add the new lock before overlap. 
 701                                 lock
->lf_next 
= overlap
; 
 703                         overlap
->lf_start 
= lock
->lf_end 
+ 1; 
 704                         lf_wakelock(overlap
, TRUE
); 
 709         /* Coalesce adjacent locks with identical attributes */ 
 710         lf_coalesce_adjacent(lock
); 
 711 #ifdef LOCKF_DEBUGGING 
 712         if (lockf_debug 
& 1) { 
 713                 lf_print("lf_setlock: got the lock", lock
); 
 714                 lf_printlist("lf_setlock(out)", lock
); 
 716 #endif /* LOCKF_DEBUGGING */ 
 724  * Description: Remove a byte-range lock on an vnode.  Generally, find the 
 725  *              lock (or an overlap to that lock) and remove it (or shrink 
 726  *              it), then wakeup anyone we can. 
 728  * Parameters:  unlock                  The lock to clear 
 733  * Notes:       A caller may unlock all the locks owned by the caller by 
 734  *              specifying the entire file range; locks owned by other 
 735  *              callers are not effected by this operation. 
 738 lf_clearlock(struct lockf 
*unlock
) 
 740         struct lockf 
**head 
= unlock
->lf_head
; 
 741         struct lockf 
*lf 
= *head
; 
 742         struct lockf 
*overlap
, **prev
; 
 747 #ifdef LOCKF_DEBUGGING 
 748         if (unlock
->lf_type 
!= F_UNLCK
) 
 749                 panic("lf_clearlock: bad type"); 
 751                 lf_print("lf_clearlock", unlock
); 
 752 #endif /* LOCKF_DEBUGGING */ 
 754         while ((ovcase 
= lf_findoverlap(lf
, unlock
, SELF
, &prev
, &overlap
)) != OVERLAP_NONE
) { 
 756                  * Wakeup the list of locks to be retried. 
 758                 lf_wakelock(overlap
, FALSE
); 
 761                 case OVERLAP_NONE
:      /* satisfy compiler enum/switch */ 
 764                 case OVERLAP_EQUALS_LOCK
: 
 765                         *prev 
= overlap
->lf_next
; 
 766                         FREE(overlap
, M_LOCKF
); 
 769                 case OVERLAP_CONTAINS_LOCK
: /* split it */ 
 770                         if (overlap
->lf_start 
== unlock
->lf_start
) { 
 771                                 overlap
->lf_start 
= unlock
->lf_end 
+ 1; 
 775                          * If we can't split the lock, we can't grant it. 
 776                          * Claim a system limit for the resource shortage. 
 778                         if (lf_split(overlap
, unlock
)) 
 780                         overlap
->lf_next 
= unlock
->lf_next
; 
 783                 case OVERLAP_CONTAINED_BY_LOCK
: 
 784                         *prev 
= overlap
->lf_next
; 
 785                         lf 
= overlap
->lf_next
; 
 786                         FREE(overlap
, M_LOCKF
); 
 789                 case OVERLAP_STARTS_BEFORE_LOCK
: 
 790                         overlap
->lf_end 
= unlock
->lf_start 
- 1; 
 791                         prev 
= &overlap
->lf_next
; 
 792                         lf 
= overlap
->lf_next
; 
 795                 case OVERLAP_ENDS_AFTER_LOCK
: 
 796                         overlap
->lf_start 
= unlock
->lf_end 
+ 1; 
 801 #ifdef LOCKF_DEBUGGING 
 803                 lf_printlist("lf_clearlock", unlock
); 
 804 #endif /* LOCKF_DEBUGGING */ 
 812  * Description: Check whether there is a blocking lock, and if so return 
 813  *              its process identifier into the lock being requested. 
 815  * Parameters:  lock                    Pointer to lock to test for blocks 
 816  *              fl                      Pointer to flock structure to receive 
 817  *                                      the blocking lock information, if a 
 818  *                                      blocking lock is found. 
 823  *              *fl                     Contents modified to reflect the 
 824  *                                      blocking lock, if one is found; not 
 827  * Notes:       fl->l_pid will be (-1) for file locks and will only be set to 
 828  *              the blocking process ID for advisory record locks. 
 831 lf_getlock(struct lockf 
*lock
, struct flock 
*fl
) 
 835 #ifdef LOCKF_DEBUGGING 
 837                 lf_print("lf_getlock", lock
); 
 838 #endif /* LOCKF_DEBUGGING */ 
 840         if ((block 
= lf_getblock(lock
))) { 
 841                 fl
->l_type 
= block
->lf_type
; 
 842                 fl
->l_whence 
= SEEK_SET
; 
 843                 fl
->l_start 
= block
->lf_start
; 
 844                 if (block
->lf_end 
== -1) 
 847                         fl
->l_len 
= block
->lf_end 
- block
->lf_start 
+ 1; 
 848                 if (block
->lf_flags 
& F_POSIX
) 
 849                         fl
->l_pid 
= proc_pid((struct proc 
*)(block
->lf_id
)); 
 853                 fl
->l_type 
= F_UNLCK
; 
 859 int lf_getlockpid(struct vnode 
*vp
, struct flock 
*fl
) 
 861         struct lockf 
*lf
, *blk
; 
 866         fl
->l_type 
= F_UNLCK
; 
 868         lck_mtx_lock(&vp
->v_lock
); 
 870         for (lf 
= vp
->v_lockf
; lf
; lf 
= lf
->lf_next
) { 
 872                 if (lf
->lf_flags 
& F_POSIX
) { 
 873                         if ((((struct proc 
*)lf
->lf_id
)->p_pid
) == fl
->l_pid
) { 
 874                                 fl
->l_type 
= lf
->lf_type
; 
 875                                 fl
->l_whence 
= SEEK_SET
; 
 876                                 fl
->l_start 
= lf
->lf_start
; 
 877                                 if (lf
->lf_end 
== -1) 
 880                                         fl
->l_len 
= lf
->lf_end 
- lf
->lf_start 
+ 1; 
 886                 TAILQ_FOREACH(blk
, &lf
->lf_blkhd
, lf_block
) { 
 887                         if (blk
->lf_flags 
& F_POSIX
) { 
 888                                 if ((((struct proc 
*)blk
->lf_id
)->p_pid
) == fl
->l_pid
) { 
 889                                         fl
->l_type 
= blk
->lf_type
; 
 890                                         fl
->l_whence 
= SEEK_SET
; 
 891                                         fl
->l_start 
= blk
->lf_start
; 
 892                                         if (blk
->lf_end 
== -1) 
 895                                                 fl
->l_len 
= blk
->lf_end 
- blk
->lf_start 
+ 1; 
 903         lck_mtx_unlock(&vp
->v_lock
); 
 911  * Description: Walk the list of locks for an inode and return the first 
 912  *              blocking lock.  A lock is considered blocking if we are not 
 913  *              the lock owner; otherwise, we are permitted to upgrade or 
 914  *              downgrade it, and it's not considered blocking. 
 916  * Parameters:  lock                    The lock for which we are interested 
 917  *                                      in obtaining the blocking lock, if any 
 919  * Returns:     NOLOCKF                 No blocking lock exists 
 920  *              !NOLOCKF                The address of the blocking lock's 
 923 static struct lockf 
* 
 924 lf_getblock(struct lockf 
*lock
) 
 926         struct lockf 
**prev
, *overlap
, *lf 
= *(lock
->lf_head
); 
 929         prev 
= lock
->lf_head
; 
 930         while ((ovcase 
= lf_findoverlap(lf
, lock
, OTHERS
, &prev
, &overlap
)) != OVERLAP_NONE
) { 
 932                  * We've found an overlap, see if it blocks us 
 934                 if ((lock
->lf_type 
== F_WRLCK 
|| overlap
->lf_type 
== F_WRLCK
)) 
 937                  * Nope, point to the next one on the list and 
 938                  * see if it blocks us 
 940                 lf 
= overlap
->lf_next
; 
 949  * Description: Walk the list of locks to find an overlapping lock (if any). 
 951  * Parameters:  lf                      First lock on lock list 
 952  *              lock                    The lock we are checking for an overlap 
 954  *              prev                    pointer to pointer pointer to contain 
 955  *                                      address of pointer to previous lock 
 956  *                                      pointer to overlapping lock, if overlap 
 957  *              overlap                 pointer to pointer to contain address 
 958  *                                      of overlapping lock 
 960  * Returns:     OVERLAP_NONE 
 961  *              OVERLAP_EQUALS_LOCK 
 962  *              OVERLAP_CONTAINS_LOCK 
 963  *              OVERLAP_CONTAINED_BY_LOCK 
 964  *              OVERLAP_STARTS_BEFORE_LOCK 
 965  *              OVERLAP_ENDS_AFTER_LOCK 
 968  *              *prev                   The address of the next pointer in the 
 969  *                                      lock previous to the overlapping lock; 
 970  *                                      this is generally used to relink the 
 971  *                                      lock list, avoiding a second iteration. 
 972  *              *overlap                The pointer to the overlapping lock 
 973  *                                      itself; this is ussed to return data in 
 974  *                                      the check == OTHERS case, and for the 
 975  *                                      caller to modify the overlapping lock, 
 976  *                                      in the check == SELF case 
 978  * Note:        This returns only the FIRST overlapping lock.  There may be 
 979  *              more than one.  lf_getlock will return the first blocking lock, 
 980  *              while lf_setlock will iterate over all overlapping locks to 
 982  *              The check parameter can be SELF, meaning we are looking for 
 983  *              overlapping locks owned by us, or it can be OTHERS, meaning 
 984  *              we are looking for overlapping locks owned by someone else so 
 985  *              we can report a blocking lock on an F_GETLK request. 
 987  *              The value of *overlap and *prev are modified, even if there is 
 988  *              no overlapping lock found; always check the return code. 
 991 lf_findoverlap(struct lockf 
*lf
, struct lockf 
*lock
, int type
, 
 992                struct lockf 
***prev
, struct lockf 
**overlap
) 
1000 #ifdef LOCKF_DEBUGGING 
1001         if (lockf_debug 
& 2) 
1002                 lf_print("lf_findoverlap: looking for overlap in", lock
); 
1003 #endif /* LOCKF_DEBUGGING */ 
1004         start 
= lock
->lf_start
; 
1006         while (lf 
!= NOLOCKF
) { 
1007                 if (((type 
& SELF
) && lf
->lf_id 
!= lock
->lf_id
) || 
1008                     ((type 
& OTHERS
) && lf
->lf_id 
== lock
->lf_id
)) { 
1010                          * Locks belonging to one process are adjacent on the 
1011                          * list, so if we've found any locks belonging to us, 
1012                          * and we're now seeing something else, then we've 
1013                          * examined all "self" locks.  Note that bailing out 
1014                          * here is quite important; for coalescing, we assume  
1015                          * numerically adjacent locks from the same owner to  
1016                          * be adjacent on the list. 
1018                         if ((type 
& SELF
) && found_self
) { 
1019                                 return OVERLAP_NONE
; 
1022                         *prev 
= &lf
->lf_next
; 
1023                         *overlap 
= lf 
= lf
->lf_next
; 
1027                 if ((type 
& SELF
)) { 
1031 #ifdef LOCKF_DEBUGGING 
1032                 if (lockf_debug 
& 2) 
1033                         lf_print("\tchecking", lf
); 
1034 #endif /* LOCKF_DEBUGGING */ 
1036                  * OK, check for overlap 
1038                 if ((lf
->lf_end 
!= -1 && start 
> lf
->lf_end
) || 
1039                     (end 
!= -1 && lf
->lf_start 
> end
)) { 
1041                         LOCKF_DEBUG(2, "no overlap\n"); 
1044                          * NOTE: assumes that locks for the same process are  
1045                          * nonintersecting and ordered. 
1047                         if ((type 
& SELF
) && end 
!= -1 && lf
->lf_start 
> end
) 
1048                                 return (OVERLAP_NONE
); 
1049                         *prev 
= &lf
->lf_next
; 
1050                         *overlap 
= lf 
= lf
->lf_next
; 
1053                 if ((lf
->lf_start 
== start
) && (lf
->lf_end 
== end
)) { 
1054                         LOCKF_DEBUG(2, "overlap == lock\n"); 
1055                         return (OVERLAP_EQUALS_LOCK
); 
1057                 if ((lf
->lf_start 
<= start
) && 
1059                     ((lf
->lf_end 
>= end
) || (lf
->lf_end 
== -1))) { 
1060                         LOCKF_DEBUG(2, "overlap contains lock\n"); 
1061                         return (OVERLAP_CONTAINS_LOCK
); 
1063                 if (start 
<= lf
->lf_start 
&& 
1065                            (lf
->lf_end 
!= -1 && end 
>= lf
->lf_end
))) { 
1066                         LOCKF_DEBUG(2, "lock contains overlap\n"); 
1067                         return (OVERLAP_CONTAINED_BY_LOCK
); 
1069                 if ((lf
->lf_start 
< start
) && 
1070                         ((lf
->lf_end 
>= start
) || (lf
->lf_end 
== -1))) { 
1071                         LOCKF_DEBUG(2, "overlap starts before lock\n"); 
1072                         return (OVERLAP_STARTS_BEFORE_LOCK
); 
1074                 if ((lf
->lf_start 
> start
) && 
1076                         ((lf
->lf_end 
> end
) || (lf
->lf_end 
== -1))) { 
1077                         LOCKF_DEBUG(2, "overlap ends after lock\n"); 
1078                         return (OVERLAP_ENDS_AFTER_LOCK
); 
1080                 panic("lf_findoverlap: default"); 
1082         return (OVERLAP_NONE
); 
1089  * Description: Split a lock and a contained region into two or three locks 
1092  * Parameters:  lock1                   Lock to split 
1093  *              lock2                   Overlapping lock region requiring the 
1094  *                                      split (upgrade/downgrade/unlock) 
1096  * Returns:     0                       Success 
1097  *              ENOLCK                  No memory for new lock 
1100  *              *lock1                  Modified original lock 
1101  *              *lock2                  Overlapping lock (inserted into list) 
1102  *              (new lock)              Potential new lock inserted into list 
1103  *                                      if split results in 3 locks 
1105  * Notes:       This operation can only fail if the split would result in three 
1106  *              locks, and there is insufficient memory to allocate the third 
1107  *              lock; in that case, neither of the locks will be modified. 
1110 lf_split(struct lockf 
*lock1
, struct lockf 
*lock2
) 
1112         struct lockf 
*splitlock
; 
1114 #ifdef LOCKF_DEBUGGING 
1115         if (lockf_debug 
& 2) { 
1116                 lf_print("lf_split", lock1
); 
1117                 lf_print("splitting from", lock2
); 
1119 #endif /* LOCKF_DEBUGGING */ 
1121          * Check to see if spliting into only two pieces. 
1123         if (lock1
->lf_start 
== lock2
->lf_start
) { 
1124                 lock1
->lf_start 
= lock2
->lf_end 
+ 1; 
1125                 lock2
->lf_next 
= lock1
; 
1128         if (lock1
->lf_end 
== lock2
->lf_end
) { 
1129                 lock1
->lf_end 
= lock2
->lf_start 
- 1; 
1130                 lock2
->lf_next 
= lock1
->lf_next
; 
1131                 lock1
->lf_next 
= lock2
; 
1135          * Make a new lock consisting of the last part of 
1136          * the encompassing lock 
1138         MALLOC(splitlock
, struct lockf 
*, sizeof *splitlock
, M_LOCKF
, M_WAITOK
); 
1139         if (splitlock 
== NULL
) 
1141         bcopy(lock1
, splitlock
, sizeof *splitlock
); 
1142         splitlock
->lf_start 
= lock2
->lf_end 
+ 1; 
1143         TAILQ_INIT(&splitlock
->lf_blkhd
); 
1144         lock1
->lf_end 
= lock2
->lf_start 
- 1; 
1146          * OK, now link it in 
1148         splitlock
->lf_next 
= lock1
->lf_next
; 
1149         lock2
->lf_next 
= splitlock
; 
1150         lock1
->lf_next 
= lock2
; 
1159  * Wakeup a blocklist in the case of a downgrade or unlock, since others 
1160  * waiting on the lock may now be able to acquire it. 
1162  * Parameters:  listhead                Lock list head on which waiters may 
1163  *                                      have pending locks 
1167  * Notes:       This function iterates a list of locks and wakes all waiters, 
1168  *              rather than only waiters for the contended regions.  Because 
1169  *              of this, for heavily contended files, this can result in a 
1170  *              "thundering herd" situation.  Refactoring the code could make 
1171  *              this operation more efficient, if heavy contention ever results 
1172  *              in a real-world performance problem. 
1175 lf_wakelock(struct lockf 
*listhead
, boolean_t force_all
) 
1177         struct lockf 
*wakelock
; 
1178         boolean_t wake_all 
= TRUE
; 
1180         if (force_all 
== FALSE 
&& (listhead
->lf_flags 
& F_WAKE1_SAFE
)) 
1183         while (!TAILQ_EMPTY(&listhead
->lf_blkhd
)) { 
1184                 wakelock 
= TAILQ_FIRST(&listhead
->lf_blkhd
); 
1185                 TAILQ_REMOVE(&listhead
->lf_blkhd
, wakelock
, lf_block
); 
1187                 wakelock
->lf_next 
= NOLOCKF
; 
1188 #ifdef LOCKF_DEBUGGING 
1189                 if (lockf_debug 
& 2) 
1190                         lf_print("lf_wakelock: awakening", wakelock
); 
1191 #endif /* LOCKF_DEBUGGING */ 
1192                 if (wake_all 
== FALSE
) { 
1194                          * If there are items on the list head block list, 
1195                          * move them to the wakelock list instead, and then 
1196                          * correct their lf_next pointers. 
1198                         if (!TAILQ_EMPTY(&listhead
->lf_blkhd
)) { 
1199                                 TAILQ_CONCAT(&wakelock
->lf_blkhd
, &listhead
->lf_blkhd
, lf_block
); 
1201                                 struct lockf 
*tlock
; 
1203                                 TAILQ_FOREACH(tlock
, &wakelock
->lf_blkhd
, lf_block
) { 
1204                                         tlock
->lf_next 
= wakelock
; 
1210                 if (wake_all 
== FALSE
) 
1216 #ifdef LOCKF_DEBUGGING 
1220  * Print out a lock; lock information is prefixed by the string in 'tag' 
1222  * Parameters:  tag                     A string tag for debugging 
1223  *              lock                    The lock whose information should be 
1229 lf_print(const char *tag
, struct lockf 
*lock
) 
1231         printf("%s: lock %p for ", tag
, (void *)lock
); 
1232         if (lock
->lf_flags 
& F_POSIX
) 
1233                 printf("proc %ld", (long)((struct proc 
*)lock
->lf_id
)->p_pid
); 
1235                 printf("id %p", (void *)lock
->lf_id
); 
1236         if (lock
->lf_vnode 
!= 0) 
1237                 printf(" in vno %p, %s, start 0x%016llx, end 0x%016llx", 
1239                     lock
->lf_type 
== F_RDLCK 
? "shared" : 
1240                     lock
->lf_type 
== F_WRLCK 
? "exclusive" : 
1241                     lock
->lf_type 
== F_UNLCK 
? "unlock" : "unknown", 
1242                     (intmax_t)lock
->lf_start
, (intmax_t)lock
->lf_end
); 
1244                 printf(" %s, start 0x%016llx, end 0x%016llx", 
1245                     lock
->lf_type 
== F_RDLCK 
? "shared" : 
1246                     lock
->lf_type 
== F_WRLCK 
? "exclusive" : 
1247                     lock
->lf_type 
== F_UNLCK 
? "unlock" : "unknown", 
1248                     (intmax_t)lock
->lf_start
, (intmax_t)lock
->lf_end
); 
1249         if (!TAILQ_EMPTY(&lock
->lf_blkhd
)) 
1250                 printf(" block %p\n", (void *)TAILQ_FIRST(&lock
->lf_blkhd
)); 
1257  * lf_printlist DEBUG 
1259  * Print out a lock list for the vnode associated with 'lock'; lock information 
1260  * is prefixed by the string in 'tag' 
1262  * Parameters:  tag                     A string tag for debugging 
1263  *              lock                    The lock whose vnode's lock list should 
1269 lf_printlist(const char *tag
, struct lockf 
*lock
) 
1271         struct lockf 
*lf
, *blk
; 
1273         if (lock
->lf_vnode 
== 0) 
1276         printf("%s: Lock list for vno %p:\n", 
1277             tag
, lock
->lf_vnode
); 
1278         for (lf 
= lock
->lf_vnode
->v_lockf
; lf
; lf 
= lf
->lf_next
) { 
1279                 printf("\tlock %p for ",(void *)lf
); 
1280                 if (lf
->lf_flags 
& F_POSIX
) 
1282                             (long)((struct proc 
*)lf
->lf_id
)->p_pid
); 
1284                         printf("id %p", (void *)lf
->lf_id
); 
1285                 printf(", %s, start 0x%016llx, end 0x%016llx", 
1286                     lf
->lf_type 
== F_RDLCK 
? "shared" : 
1287                     lf
->lf_type 
== F_WRLCK 
? "exclusive" : 
1288                     lf
->lf_type 
== F_UNLCK 
? "unlock" : 
1289                     "unknown", (intmax_t)lf
->lf_start
, (intmax_t)lf
->lf_end
); 
1290                 TAILQ_FOREACH(blk
, &lf
->lf_blkhd
, lf_block
) { 
1291                         printf("\n\t\tlock request %p for ", (void *)blk
); 
1292                         if (blk
->lf_flags 
& F_POSIX
) 
1294                                     (long)((struct proc 
*)blk
->lf_id
)->p_pid
); 
1296                                 printf("id %p", (void *)blk
->lf_id
); 
1297                         printf(", %s, start 0x%016llx, end 0x%016llx", 
1298                             blk
->lf_type 
== F_RDLCK 
? "shared" : 
1299                             blk
->lf_type 
== F_WRLCK 
? "exclusive" : 
1300                             blk
->lf_type 
== F_UNLCK 
? "unlock" : 
1301                             "unknown", (intmax_t)blk
->lf_start
, 
1302                             (intmax_t)blk
->lf_end
); 
1303                         if (!TAILQ_EMPTY(&blk
->lf_blkhd
)) 
1304                                 panic("lf_printlist: bad list"); 
1309 #endif /* LOCKF_DEBUGGING */