]>
git.saurik.com Git - apple/xnu.git/blob - bsd/kern/kern_lockf.c
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Scooter Morris at Genentech Inc.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
35 #include <sys/cdefs.h>
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
40 #include <sys/mount.h>
42 #include <sys/unistd.h>
43 #include <sys/vnode.h>
44 #include <sys/vnode_internal.h>
45 #include <sys/vnode_if.h>
46 #include <sys/malloc.h>
47 #include <sys/fcntl.h>
48 #include <sys/lockf.h>
52 * This variable controls the maximum number of processes that will
53 * be checked in doing deadlock detection.
55 static int maxlockdepth
= MAXDEPTH
;
56 #endif /* DEAD_CODE */
59 #include <sys/sysctl.h>
61 #include <ufs/ufs/quota.h>
62 #include <ufs/ufs/inode.h>
65 static int lockf_debug
= 2;
66 SYSCTL_INT(_debug
, OID_AUTO
, lockf_debug
, CTLFLAG_RW
, &lockf_debug
, 0, "");
69 MALLOC_DEFINE(M_LOCKF
, "lockf", "Byte-range locking structures");
71 #define NOLOCKF (struct lockf *)0
74 #define OFF_MAX 0x7fffffffffffffffULL /* max off_t */
75 static int lf_clearlock(struct lockf
*);
76 static int lf_findoverlap(struct lockf
*,
77 struct lockf
*, int, struct lockf
***, struct lockf
**);
79 lf_getblock(struct lockf
*);
80 static int lf_getlock(struct lockf
*, struct flock
*);
81 static int lf_setlock(struct lockf
*);
82 static void lf_split(struct lockf
*, struct lockf
*);
83 static void lf_wakelock(struct lockf
*);
86 * Advisory record locking support
90 struct vnop_advlock_args
/* {
96 vfs_context_t a_context;
99 struct vnode
*vp
= ap
->a_vp
;
100 struct flock
*fl
= ap
->a_fl
;
101 vfs_context_t context
= ap
->a_context
;
103 off_t start
, end
, oadd
;
106 struct lockf
**head
= &vp
->v_lockf
;
108 /* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */
111 * Avoid the common case of unlocking when inode has no locks.
113 if (*head
== (struct lockf
*)0) {
114 if (ap
->a_op
!= F_SETLK
) {
115 fl
->l_type
= F_UNLCK
;
117 printf("lf_advlock: unlock without lock\n");
118 #endif /* LOCKF_DEBUG */
124 * Convert the flock structure into a start and end.
126 switch (fl
->l_whence
) {
131 * Caller is responsible for adding any necessary offset
132 * when SEEK_CUR is used.
139 if ((error
= vnode_size(vp
, &size
, context
)))
142 printf("lf_advlock: vnode_getattr failed: %d\n", error
);
143 #endif /* LOCKF_DEBUG */
147 if (size
> OFF_MAX
||
148 (fl
->l_start
> 0 && size
> OFF_MAX
- fl
->l_start
))
150 start
= size
+ fl
->l_start
;
155 printf("lf_advlock: unknown whence %d\n", fl
->l_whence
);
156 #endif /* LOCKF_DEBUG */
162 printf("lf_advlock: start < 0 (%qd)\n", start
);
163 #endif /* LOCKF_DEBUG */
170 printf("lf_advlock: len < 0 & start == 0\n");
171 #endif /* LOCKF_DEBUG */
179 printf("lf_advlock: start < 0 (%qd)\n", start
);
180 #endif /* LOCKF_DEBUG */
183 } else if (fl
->l_len
== 0)
186 oadd
= fl
->l_len
- 1;
187 if (oadd
> (off_t
)(OFF_MAX
- start
))
190 printf("lf_advlock: overflow\n");
191 #endif /* LOCKF_DEBUG */
197 * Create the lockf structure
199 MALLOC(lock
, struct lockf
*, sizeof *lock
, M_LOCKF
, M_WAITOK
);
200 lock
->lf_start
= start
;
202 lock
->lf_id
= ap
->a_id
;
204 lock
->lf_type
= fl
->l_type
;
205 lock
->lf_head
= head
;
206 lock
->lf_next
= (struct lockf
*)0;
207 TAILQ_INIT(&lock
->lf_blkhd
);
208 lock
->lf_flags
= ap
->a_flags
;
210 lck_mtx_lock(&vp
->v_lock
); /* protect the lockf list */
212 * Do the requested operation.
216 error
= lf_setlock(lock
);
220 error
= lf_clearlock(lock
);
225 error
= lf_getlock(lock
, fl
);
234 lck_mtx_unlock(&vp
->v_lock
); /* done maniplulating the list */
237 printf("lf_advlock: normal exit: %d\n", error
);
238 #endif /* LOCKF_DEBUG */
243 * Set a byte-range lock.
250 struct lockf
**head
= lock
->lf_head
;
251 struct lockf
**prev
, *overlap
, *ltmp
;
252 static char lockstr
[] = "lockf";
253 int ovcase
, priority
, needtolink
, error
;
254 struct vnode
*vp
= lock
->lf_vnode
;
258 lf_print("lf_setlock", lock
);
259 #endif /* LOCKF_DEBUG */
265 if (lock
->lf_type
== F_WRLCK
)
269 * Scan lock list for this file looking for locks that would block us.
271 while ((block
= lf_getblock(lock
))) {
273 * Free the structure and return if nonblocking.
275 if ((lock
->lf_flags
& F_WAIT
) == 0) {
281 * XXX This is dead code on MacOS X; it shouldn't be.
284 * We are blocked. Since flock style locks cover
285 * the whole file, there is no chance for deadlock.
286 * For byte-range locks we must check for deadlock.
288 * Deadlock detection is done by looking through the
289 * wait channels to see if there are any cycles that
290 * involve us. MAXDEPTH is set just to make sure we
291 * do not go off into neverland.
293 if ((lock
->lf_flags
& F_POSIX
) &&
294 (block
->lf_flags
& F_POSIX
)) {
297 struct lockf
*waitblock
;
300 /* The block is waiting on something */
301 /* XXXKSE this is not complete under threads */
302 wproc
= (struct proc
*)block
->lf_id
;
303 mtx_lock_spin(&sched_lock
);
304 FOREACH_THREAD_IN_PROC(wproc
, td
) {
305 while (td
->td_wchan
&&
306 (td
->td_wmesg
== lockstr
) &&
307 (i
++ < maxlockdepth
)) {
308 waitblock
= (struct lockf
*)td
->td_wchan
;
309 /* Get the owner of the blocking lock */
310 waitblock
= waitblock
->lf_next
;
311 if ((waitblock
->lf_flags
& F_POSIX
) == 0)
313 wproc
= (struct proc
*)waitblock
->lf_id
;
314 if (wproc
== (struct proc
*)lock
->lf_id
) {
315 mtx_unlock_spin(&sched_lock
);
321 mtx_unlock_spin(&sched_lock
);
323 #endif /* DEAD_CODE */
325 * For flock type locks, we must first remove
326 * any shared locks that we hold before we sleep
327 * waiting for an exclusive lock.
329 if ((lock
->lf_flags
& F_FLOCK
) &&
330 lock
->lf_type
== F_WRLCK
) {
331 lock
->lf_type
= F_UNLCK
;
332 (void) lf_clearlock(lock
);
333 lock
->lf_type
= F_WRLCK
;
336 * Add our lock to the blocked list and sleep until we're free.
337 * Remember who blocked us (for deadlock detection).
339 lock
->lf_next
= block
;
340 TAILQ_INSERT_TAIL(&block
->lf_blkhd
, lock
, lf_block
);
342 if (lockf_debug
& 1) {
343 lf_print("lf_setlock: blocking on", block
);
344 lf_printlist("lf_setlock", block
);
346 #endif /* LOCKF_DEBUG */
347 error
= msleep(lock
, &vp
->v_lock
, priority
, lockstr
, 0);
348 if (error
) { /* XXX */
350 * We may have been awakened by a signal and/or by a
351 * debugger continuing us (in which cases we must remove
352 * ourselves from the blocked list) and/or by another
353 * process releasing a lock (in which case we have
354 * already been removed from the blocked list and our
355 * lf_next field set to NOLOCKF).
358 TAILQ_REMOVE(&lock
->lf_next
->lf_blkhd
, lock
, lf_block
);
359 lock
->lf_next
= NOLOCKF
;
366 * No blocks!! Add the lock. Note that we will
367 * downgrade or upgrade any overlapping locks this
368 * process already owns.
370 * Skip over locks owned by other processes.
371 * Handle any locks that overlap and are owned by ourselves.
377 ovcase
= lf_findoverlap(block
, lock
, SELF
, &prev
, &overlap
);
379 block
= overlap
->lf_next
;
384 * 2) overlap contains lock
385 * 3) lock contains overlap
386 * 4) overlap starts before lock
387 * 5) overlap ends after lock
390 case 0: /* no overlap */
393 lock
->lf_next
= overlap
;
397 case 1: /* overlap == lock */
399 * If downgrading lock, others may be
400 * able to acquire it.
402 if (lock
->lf_type
== F_RDLCK
&&
403 overlap
->lf_type
== F_WRLCK
)
404 lf_wakelock(overlap
);
405 overlap
->lf_type
= lock
->lf_type
;
407 lock
= overlap
; /* for debug output below */
410 case 2: /* overlap contains lock */
412 * Check for common starting point and different types.
414 if (overlap
->lf_type
== lock
->lf_type
) {
416 lock
= overlap
; /* for debug output below */
419 if (overlap
->lf_start
== lock
->lf_start
) {
421 lock
->lf_next
= overlap
;
422 overlap
->lf_start
= lock
->lf_end
+ 1;
424 lf_split(overlap
, lock
);
425 lf_wakelock(overlap
);
428 case 3: /* lock contains overlap */
430 * If downgrading lock, others may be able to
431 * acquire it, otherwise take the list.
433 if (lock
->lf_type
== F_RDLCK
&&
434 overlap
->lf_type
== F_WRLCK
) {
435 lf_wakelock(overlap
);
437 while (!TAILQ_EMPTY(&overlap
->lf_blkhd
)) {
438 ltmp
= TAILQ_FIRST(&overlap
->lf_blkhd
);
439 TAILQ_REMOVE(&overlap
->lf_blkhd
, ltmp
,
441 TAILQ_INSERT_TAIL(&lock
->lf_blkhd
,
443 ltmp
->lf_next
= lock
;
447 * Add the new lock if necessary and delete the overlap.
451 lock
->lf_next
= overlap
->lf_next
;
452 prev
= &lock
->lf_next
;
455 *prev
= overlap
->lf_next
;
456 FREE(overlap
, M_LOCKF
);
459 case 4: /* overlap starts before lock */
461 * Add lock after overlap on the list.
463 lock
->lf_next
= overlap
->lf_next
;
464 overlap
->lf_next
= lock
;
465 overlap
->lf_end
= lock
->lf_start
- 1;
466 prev
= &lock
->lf_next
;
467 lf_wakelock(overlap
);
471 case 5: /* overlap ends after lock */
473 * Add the new lock before overlap.
477 lock
->lf_next
= overlap
;
479 overlap
->lf_start
= lock
->lf_end
+ 1;
480 lf_wakelock(overlap
);
486 if (lockf_debug
& 1) {
487 lf_print("lf_setlock: got the lock", lock
);
488 lf_printlist("lf_setlock", lock
);
490 #endif /* LOCKF_DEBUG */
495 * Remove a byte-range lock on an inode.
497 * Generally, find the lock (or an overlap to that lock)
498 * and remove it (or shrink it), then wakeup anyone we can.
502 struct lockf
*unlock
;
504 struct lockf
**head
= unlock
->lf_head
;
505 struct lockf
*lf
= *head
;
506 struct lockf
*overlap
, **prev
;
512 if (unlock
->lf_type
!= F_UNLCK
)
513 panic("lf_clearlock: bad type");
515 lf_print("lf_clearlock", unlock
);
516 #endif /* LOCKF_DEBUG */
518 while ((ovcase
= lf_findoverlap(lf
, unlock
, SELF
, &prev
, &overlap
))) {
520 * Wakeup the list of locks to be retried.
522 lf_wakelock(overlap
);
526 case 1: /* overlap == lock */
527 *prev
= overlap
->lf_next
;
528 FREE(overlap
, M_LOCKF
);
531 case 2: /* overlap contains lock: split it */
532 if (overlap
->lf_start
== unlock
->lf_start
) {
533 overlap
->lf_start
= unlock
->lf_end
+ 1;
536 lf_split(overlap
, unlock
);
537 overlap
->lf_next
= unlock
->lf_next
;
540 case 3: /* lock contains overlap */
541 *prev
= overlap
->lf_next
;
542 lf
= overlap
->lf_next
;
543 FREE(overlap
, M_LOCKF
);
546 case 4: /* overlap starts before lock */
547 overlap
->lf_end
= unlock
->lf_start
- 1;
548 prev
= &overlap
->lf_next
;
549 lf
= overlap
->lf_next
;
552 case 5: /* overlap ends after lock */
553 overlap
->lf_start
= unlock
->lf_end
+ 1;
560 lf_printlist("lf_clearlock", unlock
);
561 #endif /* LOCKF_DEBUG */
566 * Check whether there is a blocking lock,
567 * and if so return its process identifier.
578 lf_print("lf_getlock", lock
);
579 #endif /* LOCKF_DEBUG */
581 if ((block
= lf_getblock(lock
))) {
582 fl
->l_type
= block
->lf_type
;
583 fl
->l_whence
= SEEK_SET
;
584 fl
->l_start
= block
->lf_start
;
585 if (block
->lf_end
== -1)
588 fl
->l_len
= block
->lf_end
- block
->lf_start
+ 1;
589 if (block
->lf_flags
& F_POSIX
)
590 fl
->l_pid
= proc_pid((struct proc
*)(block
->lf_id
));
594 fl
->l_type
= F_UNLCK
;
600 * Walk the list of locks for an inode and
601 * return the first blocking lock.
603 static struct lockf
*
607 struct lockf
**prev
, *overlap
, *lf
= *(lock
->lf_head
);
610 prev
= lock
->lf_head
;
611 while ((ovcase
= lf_findoverlap(lf
, lock
, OTHERS
, &prev
, &overlap
))) {
613 * We've found an overlap, see if it blocks us
615 if ((lock
->lf_type
== F_WRLCK
|| overlap
->lf_type
== F_WRLCK
))
618 * Nope, point to the next one on the list and
619 * see if it blocks us
621 lf
= overlap
->lf_next
;
627 * Walk the list of locks to
628 * find an overlapping lock (if any).
630 * NOTE: this returns only the FIRST overlapping lock. There
631 * may be more than one.
634 lf_findoverlap(lf
, lock
, type
, prev
, overlap
)
638 struct lockf
***prev
;
639 struct lockf
**overlap
;
648 lf_print("lf_findoverlap: looking for overlap in", lock
);
649 #endif /* LOCKF_DEBUG */
650 start
= lock
->lf_start
;
652 while (lf
!= NOLOCKF
) {
653 if (((type
& SELF
) && lf
->lf_id
!= lock
->lf_id
) ||
654 ((type
& OTHERS
) && lf
->lf_id
== lock
->lf_id
)) {
655 *prev
= &lf
->lf_next
;
656 *overlap
= lf
= lf
->lf_next
;
661 lf_print("\tchecking", lf
);
662 #endif /* LOCKF_DEBUG */
664 * OK, check for overlap
669 * 2) overlap contains lock
670 * 3) lock contains overlap
671 * 4) overlap starts before lock
672 * 5) overlap ends after lock
674 if ((lf
->lf_end
!= -1 && start
> lf
->lf_end
) ||
675 (end
!= -1 && lf
->lf_start
> end
)) {
679 printf("no overlap\n");
680 #endif /* LOCKF_DEBUG */
681 if ((type
& SELF
) && end
!= -1 && lf
->lf_start
> end
)
683 *prev
= &lf
->lf_next
;
684 *overlap
= lf
= lf
->lf_next
;
687 if ((lf
->lf_start
== start
) && (lf
->lf_end
== end
)) {
691 printf("overlap == lock\n");
692 #endif /* LOCKF_DEBUG */
695 if ((lf
->lf_start
<= start
) &&
697 ((lf
->lf_end
>= end
) || (lf
->lf_end
== -1))) {
701 printf("overlap contains lock\n");
702 #endif /* LOCKF_DEBUG */
705 if (start
<= lf
->lf_start
&&
707 (lf
->lf_end
!= -1 && end
>= lf
->lf_end
))) {
711 printf("lock contains overlap\n");
712 #endif /* LOCKF_DEBUG */
715 if ((lf
->lf_start
< start
) &&
716 ((lf
->lf_end
>= start
) || (lf
->lf_end
== -1))) {
720 printf("overlap starts before lock\n");
721 #endif /* LOCKF_DEBUG */
724 if ((lf
->lf_start
> start
) &&
726 ((lf
->lf_end
> end
) || (lf
->lf_end
== -1))) {
730 printf("overlap ends after lock\n");
731 #endif /* LOCKF_DEBUG */
734 panic("lf_findoverlap: default");
740 * Split a lock and a contained region into
741 * two or three locks as necessary.
744 lf_split(lock1
, lock2
)
748 struct lockf
*splitlock
;
751 if (lockf_debug
& 2) {
752 lf_print("lf_split", lock1
);
753 lf_print("splitting from", lock2
);
755 #endif /* LOCKF_DEBUG */
757 * Check to see if spliting into only two pieces.
759 if (lock1
->lf_start
== lock2
->lf_start
) {
760 lock1
->lf_start
= lock2
->lf_end
+ 1;
761 lock2
->lf_next
= lock1
;
764 if (lock1
->lf_end
== lock2
->lf_end
) {
765 lock1
->lf_end
= lock2
->lf_start
- 1;
766 lock2
->lf_next
= lock1
->lf_next
;
767 lock1
->lf_next
= lock2
;
771 * Make a new lock consisting of the last part of
772 * the encompassing lock
774 MALLOC(splitlock
, struct lockf
*, sizeof *splitlock
, M_LOCKF
, M_WAITOK
);
775 bcopy(lock1
, splitlock
, sizeof *splitlock
);
776 splitlock
->lf_start
= lock2
->lf_end
+ 1;
777 TAILQ_INIT(&splitlock
->lf_blkhd
);
778 lock1
->lf_end
= lock2
->lf_start
- 1;
782 splitlock
->lf_next
= lock1
->lf_next
;
783 lock2
->lf_next
= splitlock
;
784 lock1
->lf_next
= lock2
;
791 lf_wakelock(listhead
)
792 struct lockf
*listhead
;
794 struct lockf
*wakelock
;
796 while (!TAILQ_EMPTY(&listhead
->lf_blkhd
)) {
797 wakelock
= TAILQ_FIRST(&listhead
->lf_blkhd
);
798 TAILQ_REMOVE(&listhead
->lf_blkhd
, wakelock
, lf_block
);
799 wakelock
->lf_next
= NOLOCKF
;
802 lf_print("lf_wakelock: awakening", wakelock
);
803 #endif /* LOCKF_DEBUG */
818 printf("%s: lock %p for ", tag
, (void *)lock
);
819 if (lock
->lf_flags
& F_POSIX
)
820 printf("proc %ld", (long)((struct proc
*)lock
->lf_id
)->p_pid
);
822 printf("id %p", (void *)lock
->lf_id
);
823 if (lock
->lf_vnode
!= 0)
824 printf(" in vno 0x%08x, %s, start %jd, end %jd",
826 lock
->lf_type
== F_RDLCK
? "shared" :
827 lock
->lf_type
== F_WRLCK
? "exclusive" :
828 lock
->lf_type
== F_UNLCK
? "unlock" : "unknown",
829 (intmax_t)lock
->lf_start
, (intmax_t)lock
->lf_end
);
831 printf(" %s, start %jd, end %jd",
832 lock
->lf_type
== F_RDLCK
? "shared" :
833 lock
->lf_type
== F_WRLCK
? "exclusive" :
834 lock
->lf_type
== F_UNLCK
? "unlock" : "unknown",
835 (intmax_t)lock
->lf_start
, (intmax_t)lock
->lf_end
);
836 if (!TAILQ_EMPTY(&lock
->lf_blkhd
))
837 printf(" block %p\n", (void *)TAILQ_FIRST(&lock
->lf_blkhd
));
843 lf_printlist(tag
, lock
)
847 struct lockf
*lf
, *blk
;
849 if (lock
->lf_vnode
== 0)
852 printf("%s: Lock list for vno 0x%08x:\n",
853 tag
, lock
->lf_vnode
);
854 for (lf
= lock
->lf_vnode
->v_lockf
; lf
; lf
= lf
->lf_next
) {
855 printf("\tlock %p for ",(void *)lf
);
856 if (lf
->lf_flags
& F_POSIX
)
858 (long)((struct proc
*)lf
->lf_id
)->p_pid
);
860 printf("id %p", (void *)lf
->lf_id
);
861 printf(", %s, start %jd, end %jd",
862 lf
->lf_type
== F_RDLCK
? "shared" :
863 lf
->lf_type
== F_WRLCK
? "exclusive" :
864 lf
->lf_type
== F_UNLCK
? "unlock" :
865 "unknown", (intmax_t)lf
->lf_start
, (intmax_t)lf
->lf_end
);
866 TAILQ_FOREACH(blk
, &lf
->lf_blkhd
, lf_block
) {
867 printf("\n\t\tlock request %p for ", (void *)blk
);
868 if (blk
->lf_flags
& F_POSIX
)
870 (long)((struct proc
*)blk
->lf_id
)->p_pid
);
872 printf("id %p", (void *)blk
->lf_id
);
873 printf(", %s, start %jd, end %jd",
874 blk
->lf_type
== F_RDLCK
? "shared" :
875 blk
->lf_type
== F_WRLCK
? "exclusive" :
876 blk
->lf_type
== F_UNLCK
? "unlock" :
877 "unknown", (intmax_t)blk
->lf_start
,
878 (intmax_t)blk
->lf_end
);
879 if (!TAILQ_EMPTY(&blk
->lf_blkhd
))
880 panic("lf_printlist: bad list");
885 #endif /* LOCKF_DEBUG */