]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/kern/kern_lockf.c
xnu-3247.10.11.tar.gz
[apple/xnu.git] / bsd / kern / kern_lockf.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2015 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1989, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * This code is derived from software contributed to Berkeley by
33 * Scooter Morris at Genentech Inc.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution.
43 * 4. Neither the name of the University nor the names of its contributors
44 * may be used to endorse or promote products derived from this software
45 * without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 * @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
60 */
61
62#include <sys/cdefs.h>
63#include <sys/param.h>
64#include <sys/systm.h>
65#include <sys/kernel.h>
66#include <sys/lock.h>
67#include <sys/mount.h>
68#include <sys/proc.h>
69#include <sys/signalvar.h>
70#include <sys/unistd.h>
71#include <sys/user.h>
72#include <sys/vnode.h>
73#include <sys/vnode_internal.h>
74#include <sys/vnode_if.h>
75#include <sys/malloc.h>
76#include <sys/fcntl.h>
77#include <sys/lockf.h>
78#include <sys/sdt.h>
79#include <kern/task.h>
80
81#include <sys/file_internal.h>
82
83/*
84 * This variable controls the maximum number of processes that will
85 * be checked in doing deadlock detection.
86 */
87static int maxlockdepth = MAXDEPTH;
88
89#if (DEVELOPMENT || DEBUG)
90#define LOCKF_DEBUGGING 1
91#endif
92
93#ifdef LOCKF_DEBUGGING
94#include <sys/sysctl.h>
95void lf_print(const char *tag, struct lockf *lock);
96void lf_printlist(const char *tag, struct lockf *lock);
97
98#define LF_DBG_LOCKOP (1 << 0) /* setlk, getlk, clearlk */
99#define LF_DBG_LIST (1 << 1) /* split, coalesce */
100#define LF_DBG_IMPINH (1 << 2) /* importance inheritance */
101#define LF_DBG_TRACE (1 << 3) /* errors, exit */
102
103static int lockf_debug = 0; /* was 2, could be 3 ;-) */
104SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &lockf_debug, 0, "");
105
106/*
107 * If there is no mask bit selector, or there is one, and the selector is
108 * set, then output the debugging diagnostic.
109 */
110#define LOCKF_DEBUG(mask, ...) \
111 do { \
112 if( !(mask) || ((mask) & lockf_debug)) { \
113 printf(__VA_ARGS__); \
114 } \
115 } while(0)
116#else /* !LOCKF_DEBUGGING */
117#define LOCKF_DEBUG(mask, ...) /* mask */
118#endif /* !LOCKF_DEBUGGING */
119
120MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
121
122#define NOLOCKF (struct lockf *)0
123#define SELF 0x1
124#define OTHERS 0x2
125#define OFF_MAX 0x7fffffffffffffffULL /* max off_t */
126
127/*
128 * Overlapping lock states
129 */
130typedef enum {
131 OVERLAP_NONE = 0,
132 OVERLAP_EQUALS_LOCK,
133 OVERLAP_CONTAINS_LOCK,
134 OVERLAP_CONTAINED_BY_LOCK,
135 OVERLAP_STARTS_BEFORE_LOCK,
136 OVERLAP_ENDS_AFTER_LOCK
137} overlap_t;
138
139static int lf_clearlock(struct lockf *);
140static overlap_t lf_findoverlap(struct lockf *,
141 struct lockf *, int, struct lockf ***, struct lockf **);
142static struct lockf *lf_getblock(struct lockf *, pid_t);
143static int lf_getlock(struct lockf *, struct flock *, pid_t);
144static int lf_setlock(struct lockf *, struct timespec *);
145static int lf_split(struct lockf *, struct lockf *);
146static void lf_wakelock(struct lockf *, boolean_t);
147#if IMPORTANCE_INHERITANCE
148static void lf_hold_assertion(task_t, struct lockf *);
149static void lf_jump_to_queue_head(struct lockf *, struct lockf *);
150static void lf_drop_assertion(struct lockf *);
151static void lf_boost_blocking_proc(struct lockf *, struct lockf *);
152#endif /* IMPORTANCE_INHERITANCE */
153
154/*
155 * lf_advlock
156 *
157 * Description: Advisory record locking support
158 *
159 * Parameters: ap Argument pointer to a vnop_advlock_args
160 * argument descriptor structure for the
161 * lock operation to be attempted.
162 *
163 * Returns: 0 Success
164 * EOVERFLOW
165 * EINVAL
166 * ENOLCK Number of locked regions exceeds limit
167 * lf_setlock:EAGAIN
168 * lf_setlock:EDEADLK
169 * lf_setlock:EINTR
170 * lf_setlock:ENOLCK
171 * lf_setlock:ETIMEDOUT
172 * lf_clearlock:ENOLCK
173 * vnode_size:???
174 *
175 * Notes: We return ENOLCK when we run out of memory to support locks; as
176 * such, there is no specific expectation limit other than the
177 * amount of available resources.
178 */
179int
180lf_advlock(struct vnop_advlock_args *ap)
181{
182 struct vnode *vp = ap->a_vp;
183 struct flock *fl = ap->a_fl;
184 vfs_context_t context = ap->a_context;
185 struct lockf *lock;
186 off_t start, end, oadd;
187 u_quad_t size;
188 int error;
189 struct lockf **head = &vp->v_lockf;
190
191 /* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */
192
193 /*
194 * Avoid the common case of unlocking when inode has no locks.
195 */
196 if (*head == (struct lockf *)0) {
197 if (ap->a_op != F_SETLK) {
198 fl->l_type = F_UNLCK;
199 LOCKF_DEBUG(LF_DBG_TRACE,
200 "lf_advlock: '%s' unlock without lock\n",
201 vfs_context_proc(context)->p_comm);
202 return (0);
203 }
204 }
205
206 /*
207 * Convert the flock structure into a start and end.
208 */
209 switch (fl->l_whence) {
210
211 case SEEK_SET:
212 case SEEK_CUR:
213 /*
214 * Caller is responsible for adding any necessary offset
215 * when SEEK_CUR is used.
216 */
217 start = fl->l_start;
218 break;
219
220 case SEEK_END:
221
222 /*
223 * It's OK to cast the u_quad_t to and off_t here, since they
224 * are the same storage size, and the value of the returned
225 * contents will never overflow into the sign bit. We need to
226 * do this because we will use size to force range checks.
227 */
228 if ((error = vnode_size(vp, (off_t *)&size, context))) {
229 LOCKF_DEBUG(LF_DBG_TRACE,
230 "lf_advlock: vnode_getattr failed: %d\n", error);
231 return (error);
232 }
233
234 if (size > OFF_MAX ||
235 (fl->l_start > 0 &&
236 size > (u_quad_t)(OFF_MAX - fl->l_start)))
237 return (EOVERFLOW);
238 start = size + fl->l_start;
239 break;
240
241 default:
242 LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: unknown whence %d\n",
243 fl->l_whence);
244 return (EINVAL);
245 }
246 if (start < 0) {
247 LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: start < 0 (%qd)\n",
248 start);
249 return (EINVAL);
250 }
251 if (fl->l_len < 0) {
252 if (start == 0) {
253 LOCKF_DEBUG(LF_DBG_TRACE,
254 "lf_advlock: len < 0 & start == 0\n");
255 return (EINVAL);
256 }
257 end = start - 1;
258 start += fl->l_len;
259 if (start < 0) {
260 LOCKF_DEBUG(LF_DBG_TRACE,
261 "lf_advlock: start < 0 (%qd)\n", start);
262 return (EINVAL);
263 }
264 } else if (fl->l_len == 0)
265 end = -1;
266 else {
267 oadd = fl->l_len - 1;
268 if (oadd > (off_t)(OFF_MAX - start)) {
269 LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: overflow\n");
270 return (EOVERFLOW);
271 }
272 end = start + oadd;
273 }
274 /*
275 * Create the lockf structure
276 */
277 MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
278 if (lock == NULL)
279 return (ENOLCK);
280 lock->lf_start = start;
281 lock->lf_end = end;
282 lock->lf_id = ap->a_id;
283 lock->lf_vnode = vp;
284 lock->lf_type = fl->l_type;
285 lock->lf_head = head;
286 lock->lf_next = (struct lockf *)0;
287 TAILQ_INIT(&lock->lf_blkhd);
288 lock->lf_flags = ap->a_flags;
289#if IMPORTANCE_INHERITANCE
290 lock->lf_boosted = LF_NOT_BOOSTED;
291#endif
292 if (ap->a_flags & F_POSIX)
293 lock->lf_owner = (struct proc *)lock->lf_id;
294 else
295 lock->lf_owner = NULL;
296
297 if (ap->a_flags & F_FLOCK)
298 lock->lf_flags |= F_WAKE1_SAFE;
299
300 lck_mtx_lock(&vp->v_lock); /* protect the lockf list */
301 /*
302 * Do the requested operation.
303 */
304 switch(ap->a_op) {
305 case F_SETLK:
306 /*
307 * For F_OFD_* locks, lf_id is the fileglob.
308 * Record an "lf_owner" iff this is a confined fd
309 * i.e. it cannot escape this process and will be
310 * F_UNLCKed before the owner exits. (This is
311 * the implicit guarantee needed to ensure lf_owner
312 * remains a valid reference here.)
313 */
314 if (ap->a_flags & F_OFD_LOCK) {
315 struct fileglob *fg = (void *)lock->lf_id;
316 if (fg->fg_lflags & FG_CONFINED)
317 lock->lf_owner = current_proc();
318 }
319 error = lf_setlock(lock, ap->a_timeout);
320 break;
321
322 case F_UNLCK:
323 error = lf_clearlock(lock);
324 FREE(lock, M_LOCKF);
325 break;
326
327 case F_GETLK:
328 error = lf_getlock(lock, fl, -1);
329 FREE(lock, M_LOCKF);
330 break;
331
332
333 default:
334 FREE(lock, M_LOCKF);
335 error = EINVAL;
336 break;
337 }
338 lck_mtx_unlock(&vp->v_lock); /* done manipulating the list */
339
340 LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: normal exit: %d\n", error);
341 return (error);
342}
343
344/*
345 * Empty the queue of msleeping requests for a lock on the given vnode.
346 * Called with the vnode already locked. Used for forced unmount, where
347 * a flock(2) invoker sleeping on a blocked lock holds an iocount reference
348 * that prevents the vnode from ever being drained. Force unmounting wins.
349 */
350void
351lf_abort_advlocks(vnode_t vp)
352{
353 struct lockf *lock;
354
355 if ((lock = vp->v_lockf) == NULL)
356 return;
357
358 lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
359
360 if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
361 struct lockf *tlock;
362
363 TAILQ_FOREACH(tlock, &lock->lf_blkhd, lf_block) {
364 /*
365 * Setting this flag should cause all
366 * currently blocked F_SETLK request to
367 * return to userland with an errno.
368 */
369 tlock->lf_flags |= F_ABORT;
370 }
371 lf_wakelock(lock, TRUE);
372 }
373}
374
375/*
376 * Take any lock attempts which are currently blocked by a given lock ("from")
377 * and mark them as blocked by a different lock ("to"). Used in the case
378 * where a byte range currently occupied by "from" is to be occupied by "to."
379 */
380static void
381lf_move_blocked(struct lockf *to, struct lockf *from)
382{
383 struct lockf *tlock;
384
385 TAILQ_FOREACH(tlock, &from->lf_blkhd, lf_block) {
386 tlock->lf_next = to;
387 }
388
389 TAILQ_CONCAT(&to->lf_blkhd, &from->lf_blkhd, lf_block);
390}
391
392/*
393 * lf_coalesce_adjacent
394 *
395 * Description: Helper function: when setting a lock, coalesce adjacent
396 * locks. Needed because adjacent locks are not overlapping,
397 * but POSIX requires that they be coalesced.
398 *
399 * Parameters: lock The new lock which may be adjacent
400 * to already locked regions, and which
401 * should therefore be coalesced with them
402 *
403 * Returns: <void>
404 */
405static void
406lf_coalesce_adjacent(struct lockf *lock)
407{
408 struct lockf **lf = lock->lf_head;
409
410 while (*lf != NOLOCKF) {
411 /* reject locks that obviously could not be coalesced */
412 if ((*lf == lock) ||
413 ((*lf)->lf_id != lock->lf_id) ||
414 ((*lf)->lf_type != lock->lf_type)) {
415 lf = &(*lf)->lf_next;
416 continue;
417 }
418
419 /*
420 * NOTE: Assumes that if two locks are adjacent on the number line
421 * and belong to the same owner, then they are adjacent on the list.
422 */
423 if ((*lf)->lf_end != -1 &&
424 ((*lf)->lf_end + 1) == lock->lf_start) {
425 struct lockf *adjacent = *lf;
426
427 LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent previous\n");
428 lock->lf_start = (*lf)->lf_start;
429 *lf = lock;
430 lf = &(*lf)->lf_next;
431
432 lf_move_blocked(lock, adjacent);
433
434 FREE(adjacent, M_LOCKF);
435 continue;
436 }
437 /* If the lock starts adjacent to us, we can coalesce it */
438 if (lock->lf_end != -1 &&
439 (lock->lf_end + 1) == (*lf)->lf_start) {
440 struct lockf *adjacent = *lf;
441
442 LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent following\n");
443 lock->lf_end = (*lf)->lf_end;
444 lock->lf_next = (*lf)->lf_next;
445 lf = &lock->lf_next;
446
447 lf_move_blocked(lock, adjacent);
448
449 FREE(adjacent, M_LOCKF);
450 continue;
451 }
452
453 /* no matching conditions; go on to next lock */
454 lf = &(*lf)->lf_next;
455 }
456}
457
458/*
459 * lf_setlock
460 *
461 * Description: Set a byte-range lock.
462 *
463 * Parameters: lock The lock structure describing the lock
464 * to be set; allocated by the caller, it
465 * will be linked into the lock list if
466 * the set is successful, and freed if the
467 * set is unsuccessful.
468 *
469 * timeout Timeout specified in the case of
470 * SETLKWTIMEOUT.
471 *
472 * Returns: 0 Success
473 * EAGAIN
474 * EDEADLK
475 * lf_split:ENOLCK
476 * lf_clearlock:ENOLCK
477 * msleep:EINTR
478 * msleep:ETIMEDOUT
479 *
480 * Notes: We add the lock to the provisional lock list. We do not
481 * coalesce at this time; this has implications for other lock
482 * requestors in the blocker search mechanism.
483 */
484static int
485lf_setlock(struct lockf *lock, struct timespec *timeout)
486{
487 struct lockf *block;
488 struct lockf **head = lock->lf_head;
489 struct lockf **prev, *overlap, *ltmp;
490 static char lockstr[] = "lockf";
491 int priority, needtolink, error;
492 struct vnode *vp = lock->lf_vnode;
493 overlap_t ovcase;
494
495#ifdef LOCKF_DEBUGGING
496 if (lockf_debug & LF_DBG_LOCKOP) {
497 lf_print("lf_setlock", lock);
498 lf_printlist("lf_setlock(in)", lock);
499 }
500#endif /* LOCKF_DEBUGGING */
501
502 /*
503 * Set the priority
504 */
505 priority = PLOCK;
506 if (lock->lf_type == F_WRLCK)
507 priority += 4;
508 priority |= PCATCH;
509 /*
510 * Scan lock list for this file looking for locks that would block us.
511 */
512 while ((block = lf_getblock(lock, -1))) {
513 /*
514 * Free the structure and return if nonblocking.
515 */
516 if ((lock->lf_flags & F_WAIT) == 0) {
517 DTRACE_FSINFO(advlock__nowait, vnode_t, vp);
518 FREE(lock, M_LOCKF);
519 return (EAGAIN);
520 }
521
522 /*
523 * We are blocked. Since flock style locks cover
524 * the whole file, there is no chance for deadlock.
525 *
526 * OFD byte-range locks currently do NOT support
527 * deadlock detection.
528 *
529 * For POSIX byte-range locks we must check for deadlock.
530 *
531 * Deadlock detection is done by looking through the
532 * wait channels to see if there are any cycles that
533 * involve us. MAXDEPTH is set just to make sure we
534 * do not go off into neverland.
535 */
536 if ((lock->lf_flags & F_POSIX) &&
537 (block->lf_flags & F_POSIX)) {
538 struct proc *wproc, *bproc;
539 struct uthread *ut;
540 struct lockf *waitblock;
541 int i = 0;
542
543 /* The block is waiting on something */
544 wproc = block->lf_owner;
545 proc_lock(wproc);
546 TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
547 /*
548 * While the thread is asleep (uu_wchan != 0)
549 * in this code (uu_wmesg == lockstr)
550 * and we have not exceeded the maximum cycle
551 * depth (i < maxlockdepth), then check for a
552 * cycle to see if the lock is blocked behind
553 * someone blocked behind us.
554 */
555 while (((waitblock = (struct lockf *)ut->uu_wchan) != NULL) &&
556 ut->uu_wmesg == lockstr &&
557 (i++ < maxlockdepth)) {
558 waitblock = (struct lockf *)ut->uu_wchan;
559 /*
560 * Get the lock blocking the lock
561 * which would block us, and make
562 * certain it hasn't come unblocked
563 * (been granted, e.g. between the time
564 * we called lf_getblock, and the time
565 * we successfully acquired the
566 * proc_lock).
567 */
568 waitblock = waitblock->lf_next;
569 if (waitblock == NULL)
570 break;
571
572 /*
573 * Make sure it's an advisory range
574 * lock and not any other kind of lock;
575 * if we mix lock types, it's our own
576 * fault.
577 */
578 if ((waitblock->lf_flags & F_POSIX) == 0)
579 break;
580
581 /*
582 * If the owner of the lock that's
583 * blocking a lock that's blocking us
584 * getting the requested lock, then we
585 * would deadlock, so error out.
586 */
587 bproc = waitblock->lf_owner;
588 if (bproc == lock->lf_owner) {
589 proc_unlock(wproc);
590 FREE(lock, M_LOCKF);
591 return (EDEADLK);
592 }
593 }
594 }
595 proc_unlock(wproc);
596 }
597
598 /*
599 * For flock type locks, we must first remove
600 * any shared locks that we hold before we sleep
601 * waiting for an exclusive lock.
602 */
603 if ((lock->lf_flags & F_FLOCK) &&
604 lock->lf_type == F_WRLCK) {
605 lock->lf_type = F_UNLCK;
606 if ((error = lf_clearlock(lock)) != 0) {
607 FREE(lock, M_LOCKF);
608 return (error);
609 }
610 lock->lf_type = F_WRLCK;
611 }
612 /*
613 * Add our lock to the blocked list and sleep until we're free.
614 * Remember who blocked us (for deadlock detection).
615 */
616 lock->lf_next = block;
617 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
618
619 if ( !(lock->lf_flags & F_FLOCK))
620 block->lf_flags &= ~F_WAKE1_SAFE;
621
622#if IMPORTANCE_INHERITANCE
623 /*
624 * Importance donation is done only for cases where the
625 * owning task can be unambiguously determined.
626 *
627 * POSIX type locks are not inherited by child processes;
628 * we maintain a 1:1 mapping between a lock and its owning
629 * process.
630 *
631 * Flock type locks are inherited across fork() and there is
632 * no 1:1 mapping in the general case. However, the fileglobs
633 * used by OFD locks *may* be confined to the process that
634 * created them, and thus have an "owner", in which case
635 * we also attempt importance donation.
636 */
637 if ((lock->lf_flags & block->lf_flags & F_POSIX) != 0)
638 lf_boost_blocking_proc(lock, block);
639 else if ((lock->lf_flags & block->lf_flags & F_OFD_LOCK) &&
640 lock->lf_owner != block->lf_owner &&
641 NULL != lock->lf_owner && NULL != block->lf_owner)
642 lf_boost_blocking_proc(lock, block);
643#endif /* IMPORTANCE_INHERITANCE */
644
645#ifdef LOCKF_DEBUGGING
646 if (lockf_debug & LF_DBG_LOCKOP) {
647 lf_print("lf_setlock: blocking on", block);
648 lf_printlist("lf_setlock(block)", block);
649 }
650#endif /* LOCKF_DEBUGGING */
651 DTRACE_FSINFO(advlock__wait, vnode_t, vp);
652
653 error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
654
655 if (error == 0 && (lock->lf_flags & F_ABORT) != 0)
656 error = EBADF;
657
658 if (lock->lf_next) {
659 /*
660 * lf_wakelock() always sets wakelock->lf_next to
661 * NULL before a wakeup; so we've been woken early
662 * - perhaps by a debugger, signal or other event.
663 *
664 * Remove 'lock' from the block list (avoids double-add
665 * in the spurious case, which would create a cycle)
666 */
667 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
668 lock->lf_next = NULL;
669
670 if (error == 0) {
671 /*
672 * If this was a spurious wakeup, retry
673 */
674 printf("%s: spurious wakeup, retrying lock\n",
675 __func__);
676 continue;
677 }
678 }
679
680 if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
681 if ((block = lf_getblock(lock, -1)) != NULL)
682 lf_move_blocked(block, lock);
683 }
684
685 if (error) {
686 if (!TAILQ_EMPTY(&lock->lf_blkhd))
687 lf_wakelock(lock, TRUE);
688 FREE(lock, M_LOCKF);
689 /* Return ETIMEDOUT if timeout occoured. */
690 if (error == EWOULDBLOCK) {
691 error = ETIMEDOUT;
692 }
693 return (error);
694 }
695 }
696
697 /*
698 * No blocks!! Add the lock. Note that we will
699 * downgrade or upgrade any overlapping locks this
700 * process already owns.
701 *
702 * Skip over locks owned by other processes.
703 * Handle any locks that overlap and are owned by ourselves.
704 */
705 prev = head;
706 block = *head;
707 needtolink = 1;
708 for (;;) {
709 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
710 if (ovcase)
711 block = overlap->lf_next;
712 /*
713 * Six cases:
714 * 0) no overlap
715 * 1) overlap == lock
716 * 2) overlap contains lock
717 * 3) lock contains overlap
718 * 4) overlap starts before lock
719 * 5) overlap ends after lock
720 */
721 switch (ovcase) {
722 case OVERLAP_NONE:
723 if (needtolink) {
724 *prev = lock;
725 lock->lf_next = overlap;
726 }
727 break;
728
729 case OVERLAP_EQUALS_LOCK:
730 /*
731 * If downgrading lock, others may be
732 * able to acquire it.
733 */
734 if (lock->lf_type == F_RDLCK &&
735 overlap->lf_type == F_WRLCK)
736 lf_wakelock(overlap, TRUE);
737 overlap->lf_type = lock->lf_type;
738 FREE(lock, M_LOCKF);
739 lock = overlap; /* for lf_coalesce_adjacent() */
740 break;
741
742 case OVERLAP_CONTAINS_LOCK:
743 /*
744 * Check for common starting point and different types.
745 */
746 if (overlap->lf_type == lock->lf_type) {
747 FREE(lock, M_LOCKF);
748 lock = overlap; /* for lf_coalesce_adjacent() */
749 break;
750 }
751 if (overlap->lf_start == lock->lf_start) {
752 *prev = lock;
753 lock->lf_next = overlap;
754 overlap->lf_start = lock->lf_end + 1;
755 } else {
756 /*
757 * If we can't split the lock, we can't
758 * grant it. Claim a system limit for the
759 * resource shortage.
760 */
761 if (lf_split(overlap, lock)) {
762 FREE(lock, M_LOCKF);
763 return (ENOLCK);
764 }
765 }
766 lf_wakelock(overlap, TRUE);
767 break;
768
769 case OVERLAP_CONTAINED_BY_LOCK:
770 /*
771 * If downgrading lock, others may be able to
772 * acquire it, otherwise take the list.
773 */
774 if (lock->lf_type == F_RDLCK &&
775 overlap->lf_type == F_WRLCK) {
776 lf_wakelock(overlap, TRUE);
777 } else {
778 while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
779 ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
780 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
781 lf_block);
782 TAILQ_INSERT_TAIL(&lock->lf_blkhd,
783 ltmp, lf_block);
784 ltmp->lf_next = lock;
785 }
786 }
787 /*
788 * Add the new lock if necessary and delete the overlap.
789 */
790 if (needtolink) {
791 *prev = lock;
792 lock->lf_next = overlap->lf_next;
793 prev = &lock->lf_next;
794 needtolink = 0;
795 } else
796 *prev = overlap->lf_next;
797 FREE(overlap, M_LOCKF);
798 continue;
799
800 case OVERLAP_STARTS_BEFORE_LOCK:
801 /*
802 * Add lock after overlap on the list.
803 */
804 lock->lf_next = overlap->lf_next;
805 overlap->lf_next = lock;
806 overlap->lf_end = lock->lf_start - 1;
807 prev = &lock->lf_next;
808 lf_wakelock(overlap, TRUE);
809 needtolink = 0;
810 continue;
811
812 case OVERLAP_ENDS_AFTER_LOCK:
813 /*
814 * Add the new lock before overlap.
815 */
816 if (needtolink) {
817 *prev = lock;
818 lock->lf_next = overlap;
819 }
820 overlap->lf_start = lock->lf_end + 1;
821 lf_wakelock(overlap, TRUE);
822 break;
823 }
824 break;
825 }
826 /* Coalesce adjacent locks with identical attributes */
827 lf_coalesce_adjacent(lock);
828#ifdef LOCKF_DEBUGGING
829 if (lockf_debug & LF_DBG_LOCKOP) {
830 lf_print("lf_setlock: got the lock", lock);
831 lf_printlist("lf_setlock(out)", lock);
832 }
833#endif /* LOCKF_DEBUGGING */
834 return (0);
835}
836
837
838/*
839 * lf_clearlock
840 *
841 * Description: Remove a byte-range lock on an vnode. Generally, find the
842 * lock (or an overlap to that lock) and remove it (or shrink
843 * it), then wakeup anyone we can.
844 *
845 * Parameters: unlock The lock to clear
846 *
847 * Returns: 0 Success
848 * lf_split:ENOLCK
849 *
850 * Notes: A caller may unlock all the locks owned by the caller by
851 * specifying the entire file range; locks owned by other
852 * callers are not effected by this operation.
853 */
854static int
855lf_clearlock(struct lockf *unlock)
856{
857 struct lockf **head = unlock->lf_head;
858 struct lockf *lf = *head;
859 struct lockf *overlap, **prev;
860 overlap_t ovcase;
861
862 if (lf == NOLOCKF)
863 return (0);
864#ifdef LOCKF_DEBUGGING
865 if (unlock->lf_type != F_UNLCK)
866 panic("lf_clearlock: bad type");
867 if (lockf_debug & LF_DBG_LOCKOP)
868 lf_print("lf_clearlock", unlock);
869#endif /* LOCKF_DEBUGGING */
870 prev = head;
871 while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) != OVERLAP_NONE) {
872 /*
873 * Wakeup the list of locks to be retried.
874 */
875 lf_wakelock(overlap, FALSE);
876#if IMPORTANCE_INHERITANCE
877 if (overlap->lf_boosted == LF_BOOSTED) {
878 lf_drop_assertion(overlap);
879 }
880#endif /* IMPORTANCE_INHERITANCE */
881
882 switch (ovcase) {
883 case OVERLAP_NONE: /* satisfy compiler enum/switch */
884 break;
885
886 case OVERLAP_EQUALS_LOCK:
887 *prev = overlap->lf_next;
888 FREE(overlap, M_LOCKF);
889 break;
890
891 case OVERLAP_CONTAINS_LOCK: /* split it */
892 if (overlap->lf_start == unlock->lf_start) {
893 overlap->lf_start = unlock->lf_end + 1;
894 break;
895 }
896 /*
897 * If we can't split the lock, we can't grant it.
898 * Claim a system limit for the resource shortage.
899 */
900 if (lf_split(overlap, unlock))
901 return (ENOLCK);
902 overlap->lf_next = unlock->lf_next;
903 break;
904
905 case OVERLAP_CONTAINED_BY_LOCK:
906 *prev = overlap->lf_next;
907 lf = overlap->lf_next;
908 FREE(overlap, M_LOCKF);
909 continue;
910
911 case OVERLAP_STARTS_BEFORE_LOCK:
912 overlap->lf_end = unlock->lf_start - 1;
913 prev = &overlap->lf_next;
914 lf = overlap->lf_next;
915 continue;
916
917 case OVERLAP_ENDS_AFTER_LOCK:
918 overlap->lf_start = unlock->lf_end + 1;
919 break;
920 }
921 break;
922 }
923#ifdef LOCKF_DEBUGGING
924 if (lockf_debug & LF_DBG_LOCKOP)
925 lf_printlist("lf_clearlock", unlock);
926#endif /* LOCKF_DEBUGGING */
927 return (0);
928}
929
930
931/*
932 * lf_getlock
933 *
934 * Description: Check whether there is a blocking lock, and if so return
935 * its process identifier into the lock being requested.
936 *
937 * Parameters: lock Pointer to lock to test for blocks
938 * fl Pointer to flock structure to receive
939 * the blocking lock information, if a
940 * blocking lock is found.
941 * matchpid -1, or pid value to match in lookup.
942 *
943 * Returns: 0 Success
944 *
945 * Implicit Returns:
946 * *fl Contents modified to reflect the
947 * blocking lock, if one is found; not
948 * modified otherwise
949 *
950 * Notes: fl->l_pid will be (-1) for file locks and will only be set to
951 * the blocking process ID for advisory record locks.
952 */
953static int
954lf_getlock(struct lockf *lock, struct flock *fl, pid_t matchpid)
955{
956 struct lockf *block;
957
958#ifdef LOCKF_DEBUGGING
959 if (lockf_debug & LF_DBG_LOCKOP)
960 lf_print("lf_getlock", lock);
961#endif /* LOCKF_DEBUGGING */
962
963 if ((block = lf_getblock(lock, matchpid))) {
964 fl->l_type = block->lf_type;
965 fl->l_whence = SEEK_SET;
966 fl->l_start = block->lf_start;
967 if (block->lf_end == -1)
968 fl->l_len = 0;
969 else
970 fl->l_len = block->lf_end - block->lf_start + 1;
971 if (NULL != block->lf_owner) {
972 /*
973 * lf_owner is only non-NULL when the lock
974 * "owner" can be unambiguously determined
975 */
976 fl->l_pid = proc_pid(block->lf_owner);
977 } else
978 fl->l_pid = -1;
979 } else {
980 fl->l_type = F_UNLCK;
981 }
982 return (0);
983}
984
985/*
986 * lf_getblock
987 *
988 * Description: Walk the list of locks for an inode and return the first
989 * blocking lock. A lock is considered blocking if we are not
990 * the lock owner; otherwise, we are permitted to upgrade or
991 * downgrade it, and it's not considered blocking.
992 *
993 * Parameters: lock The lock for which we are interested
994 * in obtaining the blocking lock, if any
995 * matchpid -1, or pid value to match in lookup.
996 *
997 * Returns: NOLOCKF No blocking lock exists
998 * !NOLOCKF The address of the blocking lock's
999 * struct lockf.
1000 */
1001static struct lockf *
1002lf_getblock(struct lockf *lock, pid_t matchpid)
1003{
1004 struct lockf **prev, *overlap, *lf = *(lock->lf_head);
1005
1006 for (prev = lock->lf_head;
1007 lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != OVERLAP_NONE;
1008 lf = overlap->lf_next) {
1009 /*
1010 * Found an overlap.
1011 *
1012 * If we're matching pids, and it's a record lock,
1013 * or it's an OFD lock on a process-confined fd,
1014 * but the pid doesn't match, then keep on looking ..
1015 */
1016 if (matchpid != -1 &&
1017 (overlap->lf_flags & (F_POSIX|F_OFD_LOCK)) != 0 &&
1018 proc_pid(overlap->lf_owner) != matchpid)
1019 continue;
1020
1021 /*
1022 * does it block us?
1023 */
1024 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
1025 return (overlap);
1026 }
1027 return (NOLOCKF);
1028}
1029
1030
1031/*
1032 * lf_findoverlap
1033 *
1034 * Description: Walk the list of locks to find an overlapping lock (if any).
1035 *
1036 * Parameters: lf First lock on lock list
1037 * lock The lock we are checking for an overlap
1038 * check Check type
1039 * prev pointer to pointer pointer to contain
1040 * address of pointer to previous lock
1041 * pointer to overlapping lock, if overlap
1042 * overlap pointer to pointer to contain address
1043 * of overlapping lock
1044 *
1045 * Returns: OVERLAP_NONE
1046 * OVERLAP_EQUALS_LOCK
1047 * OVERLAP_CONTAINS_LOCK
1048 * OVERLAP_CONTAINED_BY_LOCK
1049 * OVERLAP_STARTS_BEFORE_LOCK
1050 * OVERLAP_ENDS_AFTER_LOCK
1051 *
1052 * Implicit Returns:
1053 * *prev The address of the next pointer in the
1054 * lock previous to the overlapping lock;
1055 * this is generally used to relink the
1056 * lock list, avoiding a second iteration.
1057 * *overlap The pointer to the overlapping lock
1058 * itself; this is used to return data in
1059 * the check == OTHERS case, and for the
1060 * caller to modify the overlapping lock,
1061 * in the check == SELF case
1062 *
1063 * Note: This returns only the FIRST overlapping lock. There may be
1064 * more than one. lf_getlock will return the first blocking lock,
1065 * while lf_setlock will iterate over all overlapping locks to
1066 *
1067 * The check parameter can be SELF, meaning we are looking for
1068 * overlapping locks owned by us, or it can be OTHERS, meaning
1069 * we are looking for overlapping locks owned by someone else so
1070 * we can report a blocking lock on an F_GETLK request.
1071 *
1072 * The value of *overlap and *prev are modified, even if there is
1073 * no overlapping lock found; always check the return code.
1074 */
1075static overlap_t
1076lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
1077 struct lockf ***prev, struct lockf **overlap)
1078{
1079 off_t start, end;
1080 int found_self = 0;
1081
1082 *overlap = lf;
1083 if (lf == NOLOCKF)
1084 return (0);
1085#ifdef LOCKF_DEBUGGING
1086 if (lockf_debug & LF_DBG_LIST)
1087 lf_print("lf_findoverlap: looking for overlap in", lock);
1088#endif /* LOCKF_DEBUGGING */
1089 start = lock->lf_start;
1090 end = lock->lf_end;
1091 while (lf != NOLOCKF) {
1092 if (((type & SELF) && lf->lf_id != lock->lf_id) ||
1093 ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
1094 /*
1095 * Locks belonging to one process are adjacent on the
1096 * list, so if we've found any locks belonging to us,
1097 * and we're now seeing something else, then we've
1098 * examined all "self" locks. Note that bailing out
1099 * here is quite important; for coalescing, we assume
1100 * numerically adjacent locks from the same owner to
1101 * be adjacent on the list.
1102 */
1103 if ((type & SELF) && found_self) {
1104 return OVERLAP_NONE;
1105 }
1106
1107 *prev = &lf->lf_next;
1108 *overlap = lf = lf->lf_next;
1109 continue;
1110 }
1111
1112 if ((type & SELF)) {
1113 found_self = 1;
1114 }
1115
1116#ifdef LOCKF_DEBUGGING
1117 if (lockf_debug & LF_DBG_LIST)
1118 lf_print("\tchecking", lf);
1119#endif /* LOCKF_DEBUGGING */
1120 /*
1121 * OK, check for overlap
1122 */
1123 if ((lf->lf_end != -1 && start > lf->lf_end) ||
1124 (end != -1 && lf->lf_start > end)) {
1125 /* Case 0 */
1126 LOCKF_DEBUG(LF_DBG_LIST, "no overlap\n");
1127
1128 /*
1129 * NOTE: assumes that locks for the same process are
1130 * nonintersecting and ordered.
1131 */
1132 if ((type & SELF) && end != -1 && lf->lf_start > end)
1133 return (OVERLAP_NONE);
1134 *prev = &lf->lf_next;
1135 *overlap = lf = lf->lf_next;
1136 continue;
1137 }
1138 if ((lf->lf_start == start) && (lf->lf_end == end)) {
1139 LOCKF_DEBUG(LF_DBG_LIST, "overlap == lock\n");
1140 return (OVERLAP_EQUALS_LOCK);
1141 }
1142 if ((lf->lf_start <= start) &&
1143 (end != -1) &&
1144 ((lf->lf_end >= end) || (lf->lf_end == -1))) {
1145 LOCKF_DEBUG(LF_DBG_LIST, "overlap contains lock\n");
1146 return (OVERLAP_CONTAINS_LOCK);
1147 }
1148 if (start <= lf->lf_start &&
1149 (end == -1 ||
1150 (lf->lf_end != -1 && end >= lf->lf_end))) {
1151 LOCKF_DEBUG(LF_DBG_LIST, "lock contains overlap\n");
1152 return (OVERLAP_CONTAINED_BY_LOCK);
1153 }
1154 if ((lf->lf_start < start) &&
1155 ((lf->lf_end >= start) || (lf->lf_end == -1))) {
1156 LOCKF_DEBUG(LF_DBG_LIST, "overlap starts before lock\n");
1157 return (OVERLAP_STARTS_BEFORE_LOCK);
1158 }
1159 if ((lf->lf_start > start) &&
1160 (end != -1) &&
1161 ((lf->lf_end > end) || (lf->lf_end == -1))) {
1162 LOCKF_DEBUG(LF_DBG_LIST, "overlap ends after lock\n");
1163 return (OVERLAP_ENDS_AFTER_LOCK);
1164 }
1165 panic("lf_findoverlap: default");
1166 }
1167 return (OVERLAP_NONE);
1168}
1169
1170
1171/*
1172 * lf_split
1173 *
1174 * Description: Split a lock and a contained region into two or three locks
1175 * as necessary.
1176 *
1177 * Parameters: lock1 Lock to split
1178 * lock2 Overlapping lock region requiring the
1179 * split (upgrade/downgrade/unlock)
1180 *
1181 * Returns: 0 Success
1182 * ENOLCK No memory for new lock
1183 *
1184 * Implicit Returns:
1185 * *lock1 Modified original lock
1186 * *lock2 Overlapping lock (inserted into list)
1187 * (new lock) Potential new lock inserted into list
1188 * if split results in 3 locks
1189 *
1190 * Notes: This operation can only fail if the split would result in three
1191 * locks, and there is insufficient memory to allocate the third
1192 * lock; in that case, neither of the locks will be modified.
1193 */
1194static int
1195lf_split(struct lockf *lock1, struct lockf *lock2)
1196{
1197 struct lockf *splitlock;
1198
1199#ifdef LOCKF_DEBUGGING
1200 if (lockf_debug & LF_DBG_LIST) {
1201 lf_print("lf_split", lock1);
1202 lf_print("splitting from", lock2);
1203 }
1204#endif /* LOCKF_DEBUGGING */
1205 /*
1206 * Check to see if splitting into only two pieces.
1207 */
1208 if (lock1->lf_start == lock2->lf_start) {
1209 lock1->lf_start = lock2->lf_end + 1;
1210 lock2->lf_next = lock1;
1211 return (0);
1212 }
1213 if (lock1->lf_end == lock2->lf_end) {
1214 lock1->lf_end = lock2->lf_start - 1;
1215 lock2->lf_next = lock1->lf_next;
1216 lock1->lf_next = lock2;
1217 return (0);
1218 }
1219 /*
1220 * Make a new lock consisting of the last part of
1221 * the encompassing lock
1222 */
1223 MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
1224 if (splitlock == NULL)
1225 return (ENOLCK);
1226 bcopy(lock1, splitlock, sizeof *splitlock);
1227 splitlock->lf_start = lock2->lf_end + 1;
1228 TAILQ_INIT(&splitlock->lf_blkhd);
1229 lock1->lf_end = lock2->lf_start - 1;
1230 /*
1231 * OK, now link it in
1232 */
1233 splitlock->lf_next = lock1->lf_next;
1234 lock2->lf_next = splitlock;
1235 lock1->lf_next = lock2;
1236
1237 return (0);
1238}
1239
1240
1241/*
1242 * lf_wakelock
1243 *
1244 * Wakeup a blocklist in the case of a downgrade or unlock, since others
1245 * waiting on the lock may now be able to acquire it.
1246 *
1247 * Parameters: listhead Lock list head on which waiters may
1248 * have pending locks
1249 *
1250 * Returns: <void>
1251 *
1252 * Notes: This function iterates a list of locks and wakes all waiters,
1253 * rather than only waiters for the contended regions. Because
1254 * of this, for heavily contended files, this can result in a
1255 * "thundering herd" situation. Refactoring the code could make
1256 * this operation more efficient, if heavy contention ever results
1257 * in a real-world performance problem.
1258 */
1259static void
1260lf_wakelock(struct lockf *listhead, boolean_t force_all)
1261{
1262 struct lockf *wakelock;
1263 boolean_t wake_all = TRUE;
1264
1265 if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE))
1266 wake_all = FALSE;
1267
1268 while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
1269 wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
1270 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
1271
1272 wakelock->lf_next = NOLOCKF;
1273#ifdef LOCKF_DEBUGGING
1274 if (lockf_debug & LF_DBG_LOCKOP)
1275 lf_print("lf_wakelock: awakening", wakelock);
1276#endif /* LOCKF_DEBUGGING */
1277 if (wake_all == FALSE) {
1278 /*
1279 * If there are items on the list head block list,
1280 * move them to the wakelock list instead, and then
1281 * correct their lf_next pointers.
1282 */
1283 if (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
1284 TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block);
1285
1286 struct lockf *tlock;
1287
1288 TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
1289 if (TAILQ_NEXT(tlock, lf_block) == tlock) {
1290 /* See rdar://10887303 */
1291 panic("cycle in wakelock list");
1292 }
1293 tlock->lf_next = wakelock;
1294 }
1295 }
1296 }
1297 wakeup(wakelock);
1298
1299 if (wake_all == FALSE)
1300 break;
1301 }
1302}
1303
1304
1305#ifdef LOCKF_DEBUGGING
1306#define GET_LF_OWNER_PID(lf) (proc_pid((lf)->lf_owner))
1307
1308/*
1309 * lf_print DEBUG
1310 *
1311 * Print out a lock; lock information is prefixed by the string in 'tag'
1312 *
1313 * Parameters: tag A string tag for debugging
1314 * lock The lock whose information should be
1315 * displayed
1316 *
1317 * Returns: <void>
1318 */
1319void
1320lf_print(const char *tag, struct lockf *lock)
1321{
1322 printf("%s: lock %p for ", tag, (void *)lock);
1323 if (lock->lf_flags & F_POSIX)
1324 printf("proc %p (owner %d)",
1325 lock->lf_id, GET_LF_OWNER_PID(lock));
1326 else if (lock->lf_flags & F_OFD_LOCK)
1327 printf("fg %p (owner %d)",
1328 lock->lf_id, GET_LF_OWNER_PID(lock));
1329 else
1330 printf("id %p", (void *)lock->lf_id);
1331 if (lock->lf_vnode != 0)
1332 printf(" in vno %p, %s, start 0x%016llx, end 0x%016llx",
1333 lock->lf_vnode,
1334 lock->lf_type == F_RDLCK ? "shared" :
1335 lock->lf_type == F_WRLCK ? "exclusive" :
1336 lock->lf_type == F_UNLCK ? "unlock" : "unknown",
1337 (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
1338 else
1339 printf(" %s, start 0x%016llx, end 0x%016llx",
1340 lock->lf_type == F_RDLCK ? "shared" :
1341 lock->lf_type == F_WRLCK ? "exclusive" :
1342 lock->lf_type == F_UNLCK ? "unlock" : "unknown",
1343 (intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
1344 if (!TAILQ_EMPTY(&lock->lf_blkhd))
1345 printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
1346 else
1347 printf("\n");
1348}
1349
1350
1351/*
1352 * lf_printlist DEBUG
1353 *
1354 * Print out a lock list for the vnode associated with 'lock'; lock information
1355 * is prefixed by the string in 'tag'
1356 *
1357 * Parameters: tag A string tag for debugging
1358 * lock The lock whose vnode's lock list should
1359 * be displayed
1360 *
1361 * Returns: <void>
1362 */
1363void
1364lf_printlist(const char *tag, struct lockf *lock)
1365{
1366 struct lockf *lf, *blk;
1367
1368 if (lock->lf_vnode == 0)
1369 return;
1370
1371 printf("%s: Lock list for vno %p:\n",
1372 tag, lock->lf_vnode);
1373 for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) {
1374 printf("\tlock %p for ",(void *)lf);
1375 if (lf->lf_flags & F_POSIX)
1376 printf("proc %p (owner %d)",
1377 lf->lf_id, GET_LF_OWNER_PID(lf));
1378 else if (lf->lf_flags & F_OFD_LOCK)
1379 printf("fg %p (owner %d)",
1380 lf->lf_id, GET_LF_OWNER_PID(lf));
1381 else
1382 printf("id %p", (void *)lf->lf_id);
1383 printf(", %s, start 0x%016llx, end 0x%016llx",
1384 lf->lf_type == F_RDLCK ? "shared" :
1385 lf->lf_type == F_WRLCK ? "exclusive" :
1386 lf->lf_type == F_UNLCK ? "unlock" :
1387 "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
1388 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
1389 printf("\n\t\tlock request %p for ", (void *)blk);
1390 if (blk->lf_flags & F_POSIX)
1391 printf("proc %p (owner %d)",
1392 blk->lf_id, GET_LF_OWNER_PID(blk));
1393 else if (blk->lf_flags & F_OFD_LOCK)
1394 printf("fg %p (owner %d)",
1395 blk->lf_id, GET_LF_OWNER_PID(blk));
1396 else
1397 printf("id %p", (void *)blk->lf_id);
1398 printf(", %s, start 0x%016llx, end 0x%016llx",
1399 blk->lf_type == F_RDLCK ? "shared" :
1400 blk->lf_type == F_WRLCK ? "exclusive" :
1401 blk->lf_type == F_UNLCK ? "unlock" :
1402 "unknown", (intmax_t)blk->lf_start,
1403 (intmax_t)blk->lf_end);
1404 if (!TAILQ_EMPTY(&blk->lf_blkhd))
1405 panic("lf_printlist: bad list");
1406 }
1407 printf("\n");
1408 }
1409}
1410#endif /* LOCKF_DEBUGGING */
1411
1412#if IMPORTANCE_INHERITANCE
1413
1414/*
1415 * lf_hold_assertion
1416 *
1417 * Call task importance hold assertion on the owner of the lock.
1418 *
1419 * Parameters: block_task Owner of the lock blocking
1420 * current thread.
1421 *
1422 * block lock on which the current thread
1423 * is blocking on.
1424 *
1425 * Returns: <void>
1426 *
1427 * Notes: The task reference on block_task is not needed to be hold since
1428 * the current thread has vnode lock and block_task has a file
1429 * lock, thus removing file lock in exit requires block_task to
1430 * grab the vnode lock.
1431 */
1432static void
1433lf_hold_assertion(task_t block_task, struct lockf *block)
1434{
1435 if (task_importance_hold_file_lock_assertion(block_task, 1)) {
1436 block->lf_boosted = LF_BOOSTED;
1437 LOCKF_DEBUG(LF_DBG_IMPINH,
1438 "lf: importance hold file lock assert on pid %d lock %p\n",
1439 proc_pid(block->lf_owner), block);
1440 }
1441}
1442
1443
1444/*
1445 * lf_jump_to_queue_head
1446 *
1447 * Jump the lock from the tail of the block queue to the head of
1448 * the queue.
1449 *
1450 * Parameters: block lockf struct containing the
1451 * block queue.
1452 * lock lockf struct to be jumped to the
1453 * front.
1454 *
1455 * Returns: <void>
1456 */
1457static void
1458lf_jump_to_queue_head(struct lockf *block, struct lockf *lock)
1459{
1460 /* Move the lock to the head of the block queue. */
1461 TAILQ_REMOVE(&block->lf_blkhd, lock, lf_block);
1462 TAILQ_INSERT_HEAD(&block->lf_blkhd, lock, lf_block);
1463}
1464
1465
1466/*
1467 * lf_drop_assertion
1468 *
1469 * Drops the task hold assertion.
1470 *
1471 * Parameters: block lockf struct holding the assertion.
1472 *
1473 * Returns: <void>
1474 */
1475static void
1476lf_drop_assertion(struct lockf *block)
1477{
1478 LOCKF_DEBUG(LF_DBG_IMPINH, "lf: %d: dropping assertion for lock %p\n",
1479 proc_pid(block->lf_owner), block);
1480
1481 task_t current_task = proc_task(block->lf_owner);
1482 task_importance_drop_file_lock_assertion(current_task, 1);
1483 block->lf_boosted = LF_NOT_BOOSTED;
1484}
1485
1486static void
1487lf_boost_blocking_proc(struct lockf *lock, struct lockf *block)
1488{
1489 task_t ltask = proc_task(lock->lf_owner);
1490 task_t btask = proc_task(block->lf_owner);
1491
1492 /*
1493 * Check if ltask can donate importance. The
1494 * check of imp_donor bit is done without holding
1495 * any lock. The value may change after you read it,
1496 * but it is ok to boost a task while someone else is
1497 * unboosting you.
1498 *
1499 * TODO: Support live inheritance on file locks.
1500 */
1501 if (task_is_importance_donor(ltask)) {
1502 LOCKF_DEBUG(LF_DBG_IMPINH,
1503 "lf: %d: attempt to boost pid %d that holds lock %p\n",
1504 proc_pid(lock->lf_owner), proc_pid(block->lf_owner), block);
1505
1506 if (block->lf_boosted != LF_BOOSTED &&
1507 task_is_importance_receiver_type(btask)) {
1508 lf_hold_assertion(btask, block);
1509 }
1510 lf_jump_to_queue_head(block, lock);
1511 }
1512}
1513#endif /* IMPORTANCE_INHERITANCE */