git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* Copyright (c) 1982, 1986, 1989, 1993
	30	* The Regents of the University of California. All rights reserved.
	31	*
	32	* This code is derived from software contributed to Berkeley by
	33	* Scooter Morris at Genentech Inc.
	34	*
	35	* Redistribution and use in source and binary forms, with or without
	36	* modification, are permitted provided that the following conditions
	37	* are met:
	38	* 1. Redistributions of source code must retain the above copyright
	39	* notice, this list of conditions and the following disclaimer.
	40	* 2. Redistributions in binary form must reproduce the above copyright
	41	* notice, this list of conditions and the following disclaimer in the
	42	* documentation and/or other materials provided with the distribution.
	43	* 4. Neither the name of the University nor the names of its contributors
	44	* may be used to endorse or promote products derived from this software
	45	* without specific prior written permission.
	46	*
	47	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	48	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	49	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	50	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	51	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	52	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	53	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	54	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	55	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	56	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	57	* SUCH DAMAGE.
	58	*
	59	* @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
	60	*/
	61
	62	#include <sys/cdefs.h>
	63	#include <sys/param.h>
	64	#include <sys/systm.h>
	65	#include <sys/kernel.h>
	66	#include <sys/lock.h>
	67	#include <sys/mount.h>
	68	#include <sys/proc.h>
	69	#include <sys/signalvar.h>
	70	#include <sys/unistd.h>
	71	#include <sys/user.h>
	72	#include <sys/vnode.h>
	73	#include <sys/vnode_internal.h>
	74	#include <sys/vnode_if.h>
	75	#include <sys/malloc.h>
	76	#include <sys/fcntl.h>
	77	#include <sys/lockf.h>
	78	#include <sys/sdt.h>
	79	#include <kern/task.h>
	80
	81	/*
	82	* This variable controls the maximum number of processes that will
	83	* be checked in doing deadlock detection.
	84	*/
	85	static int maxlockdepth = MAXDEPTH;
	86
	87	#ifdef LOCKF_DEBUGGING
	88	#include <sys/sysctl.h>
	89	#include <ufs/ufs/quota.h>
	90	#include <ufs/ufs/inode.h>
	91	void lf_print(const char tag, struct lockf lock);
	92	void lf_printlist(const char tag, struct lockf lock);
	93	static int lockf_debug = 2;
	94	SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW \| CTLFLAG_LOCKED, &lockf_debug, 0, "");
	95
	96	/*
	97	* If there is no mask bit selector, or there is on, and the selector is
	98	* set, then output the debugging diagnostic.
	99	*/
	100	#define LOCKF_DEBUG(mask, ...) \
	101	do { \
	102	if( !(mask) \|\| ((mask) & lockf_debug)) { \
	103	printf(__VA_ARGS__); \
	104	} \
	105	} while(0)
	106	#else /* !LOCKF_DEBUGGING */
	107	#define LOCKF_DEBUG(mask, ...) /* mask */
	108	#endif /* !LOCKF_DEBUGGING */
	109
	110	MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
	111
	112	#define NOLOCKF (struct lockf *)0
	113	#define SELF 0x1
	114	#define OTHERS 0x2
	115	#define OFF_MAX 0x7fffffffffffffffULL /* max off_t */
	116
	117	/*
	118	* Overlapping lock states
	119	*/
	120	typedef enum {
	121	OVERLAP_NONE = 0,
	122	OVERLAP_EQUALS_LOCK,
	123	OVERLAP_CONTAINS_LOCK,
	124	OVERLAP_CONTAINED_BY_LOCK,
	125	OVERLAP_STARTS_BEFORE_LOCK,
	126	OVERLAP_ENDS_AFTER_LOCK
	127	} overlap_t;
	128
	129	static int lf_clearlock(struct lockf *);
	130	static overlap_t lf_findoverlap(struct lockf *,
	131	struct lockf , int, struct lockf , struct lockf );
	132	static struct lockf lf_getblock(struct lockf , pid_t);
	133	static int lf_getlock(struct lockf , struct flock , pid_t);
	134	static int lf_setlock(struct lockf , struct timespec );
	135	static int lf_split(struct lockf , struct lockf );
	136	static void lf_wakelock(struct lockf *, boolean_t);
	137	#if IMPORTANCE_INHERITANCE
	138	static void lf_hold_assertion(task_t, struct lockf *);
	139	static void lf_jump_to_queue_head(struct lockf , struct lockf );
	140	static void lf_drop_assertion(struct lockf *);
	141	#endif /* IMPORTANCE_INHERITANCE */
	142
	143	/*
	144	* lf_advlock
	145	*
	146	* Description: Advisory record locking support
	147	*
	148	* Parameters: ap Argument pointer to a vnop_advlock_args
	149	* argument descriptor structure for the
	150	* lock operation to be attempted.
	151	*
	152	* Returns: 0 Success
	153	* EOVERFLOW
	154	* EINVAL
	155	* ENOLCK Number of locked regions exceeds limit
	156	* lf_setlock:EAGAIN
	157	* lf_setlock:EDEADLK
	158	* lf_setlock:EINTR
	159	* lf_setlock:ENOLCK
	160	* lf_setlock:ETIMEDOUT
	161	* lf_clearlock:ENOLCK
	162	* vnode_size:???
	163	*
	164	* Notes: We return ENOLCK when we run out of memory to support locks; as
	165	* such, there is no specific expectation limit other than the
	166	* amount of available resources.
	167	*/
	168	int
	169	lf_advlock(struct vnop_advlock_args *ap)
	170	{
	171	struct vnode *vp = ap->a_vp;
	172	struct flock *fl = ap->a_fl;
	173	vfs_context_t context = ap->a_context;
	174	struct lockf *lock;
	175	off_t start, end, oadd;
	176	u_quad_t size;
	177	int error;
	178	struct lockf **head = &vp->v_lockf;
	179
	180	/* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */
	181
	182	/*
	183	* Avoid the common case of unlocking when inode has no locks.
	184	*/
	185	if (head == (struct lockf )0) {
	186	if (ap->a_op != F_SETLK) {
	187	fl->l_type = F_UNLCK;
	188	LOCKF_DEBUG(0, "lf_advlock: '%s' unlock without lock\n", vfs_context_proc(context)->p_comm);
	189	return (0);
	190	}
	191	}
	192
	193	/*
	194	* Convert the flock structure into a start and end.
	195	*/
	196	switch (fl->l_whence) {
	197
	198	case SEEK_SET:
	199	case SEEK_CUR:
	200	/*
	201	* Caller is responsible for adding any necessary offset
	202	* when SEEK_CUR is used.
	203	*/
	204	start = fl->l_start;
	205	break;
	206
	207	case SEEK_END:
	208
	209	/*
	210	* It's OK to cast the u_quad_t to and off_t here, since they
	211	* are the same storage size, and the value of the returned
	212	* contents will never overflow into the sign bit. We need to
	213	* do this because we will use size to force range checks.
	214	*/
	215	if ((error = vnode_size(vp, (off_t *)&size, context))) {
	216	LOCKF_DEBUG(0, "lf_advlock: vnode_getattr failed: %d\n", error);
	217	return (error);
	218	}
	219
	220	if (size > OFF_MAX \|\|
	221	(fl->l_start > 0 &&
	222	size > (u_quad_t)(OFF_MAX - fl->l_start)))
	223	return (EOVERFLOW);
	224	start = size + fl->l_start;
	225	break;
	226
	227	default:
	228	LOCKF_DEBUG(0, "lf_advlock: unknown whence %d\n", fl->l_whence);
	229	return (EINVAL);
	230	}
	231	if (start < 0) {
	232	LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
	233	return (EINVAL);
	234	}
	235	if (fl->l_len < 0) {
	236	if (start == 0) {
	237	LOCKF_DEBUG(0, "lf_advlock: len < 0 & start == 0\n");
	238	return (EINVAL);
	239	}
	240	end = start - 1;
	241	start += fl->l_len;
	242	if (start < 0) {
	243	LOCKF_DEBUG(0, "lf_advlock: start < 0 (%qd)\n", start);
	244	return (EINVAL);
	245	}
	246	} else if (fl->l_len == 0)
	247	end = -1;
	248	else {
	249	oadd = fl->l_len - 1;
	250	if (oadd > (off_t)(OFF_MAX - start)) {
	251	LOCKF_DEBUG(0, "lf_advlock: overflow\n");
	252	return (EOVERFLOW);
	253	}
	254	end = start + oadd;
	255	}
	256	/*
	257	* Create the lockf structure
	258	*/
	259	MALLOC(lock, struct lockf , sizeof lock, M_LOCKF, M_WAITOK);
	260	if (lock == NULL)
	261	return (ENOLCK);
	262	lock->lf_start = start;
	263	lock->lf_end = end;
	264	lock->lf_id = ap->a_id;
	265	lock->lf_vnode = vp;
	266	lock->lf_type = fl->l_type;
	267	lock->lf_head = head;
	268	lock->lf_next = (struct lockf *)0;
	269	TAILQ_INIT(&lock->lf_blkhd);
	270	lock->lf_flags = ap->a_flags;
	271	#if IMPORTANCE_INHERITANCE
	272	lock->lf_boosted = LF_NOT_BOOSTED;
	273	#endif /* IMPORTANCE_INHERITANCE */
	274
	275	if (ap->a_flags & F_FLOCK)
	276	lock->lf_flags \|= F_WAKE1_SAFE;
	277
	278	lck_mtx_lock(&vp->v_lock); /* protect the lockf list */
	279	/*
	280	* Do the requested operation.
	281	*/
	282	switch(ap->a_op) {
	283	case F_SETLK:
	284	error = lf_setlock(lock, ap->a_timeout);
	285	break;
	286
	287	case F_UNLCK:
	288	error = lf_clearlock(lock);
	289	FREE(lock, M_LOCKF);
	290	break;
	291
	292	case F_GETLK:
	293	error = lf_getlock(lock, fl, -1);
	294	FREE(lock, M_LOCKF);
	295	break;
	296
	297
	298	default:
	299	FREE(lock, M_LOCKF);
	300	error = EINVAL;
	301	break;
	302	}
	303	lck_mtx_unlock(&vp->v_lock); /* done manipulating the list */
	304
	305	LOCKF_DEBUG(0, "lf_advlock: normal exit: %d\n\n", error);
	306	return (error);
	307	}
	308
	309	/*
	310	* Empty the queue of msleeping requests for a lock on the given vnode.
	311	* Called with the vnode already locked. Used for forced unmount, where
	312	* a flock(2) invoker sleeping on a blocked lock holds an iocount reference
	313	* that prevents the vnode from ever being drained. Force unmounting wins.
	314	*/
	315	void
	316	lf_abort_advlocks(vnode_t vp)
	317	{
	318	struct lockf *lock;
	319
	320	if ((lock = vp->v_lockf) == NULL)
	321	return;
	322
	323	lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
	324
	325	if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
	326	struct lockf *tlock;
	327
	328	TAILQ_FOREACH(tlock, &lock->lf_blkhd, lf_block) {
	329	/*
	330	* Setting this flag should cause all
	331	* currently blocked F_SETLK request to
	332	* return to userland with an errno.
	333	*/
	334	tlock->lf_flags \|= F_ABORT;
	335	}
	336	lf_wakelock(lock, TRUE);
	337	}
	338	}
	339
	340	/*
	341	* Take any lock attempts which are currently blocked by a given lock ("from")
	342	* and mark them as blocked by a different lock ("to"). Used in the case
	343	* where a byte range currently occupied by "from" is to be occupied by "to."
	344	*/
	345	static void
	346	lf_move_blocked(struct lockf to, struct lockf from)
	347	{
	348	struct lockf *tlock;
	349
	350	TAILQ_FOREACH(tlock, &from->lf_blkhd, lf_block) {
	351	tlock->lf_next = to;
	352	}
	353
	354	TAILQ_CONCAT(&to->lf_blkhd, &from->lf_blkhd, lf_block);
	355	}
	356
	357	/*
	358	* lf_coalesce_adjacent
	359	*
	360	* Description: Helper function: when setting a lock, coalesce adjacent
	361	* locks. Needed because adjacent locks are not overlapping,
	362	* but POSIX requires that they be coalesced.
	363	*
	364	* Parameters: lock The new lock which may be adjacent
	365	* to already locked regions, and which
	366	* should therefore be coalesced with them
	367	*
	368	* Returns: <void>
	369	*/
	370	static void
	371	lf_coalesce_adjacent(struct lockf *lock)
	372	{
	373	struct lockf **lf = lock->lf_head;
	374
	375	while (*lf != NOLOCKF) {
	376	/* reject locks that obviously could not be coalesced */
	377	if ((*lf == lock) \|\|
	378	((*lf)->lf_id != lock->lf_id) \|\|
	379	((*lf)->lf_type != lock->lf_type)) {
	380	lf = &(*lf)->lf_next;
	381	continue;
	382	}
	383
	384	/*
	385	* NOTE: Assumes that if two locks are adjacent on the number line
	386	* and belong to the same owner, then they are adjacent on the list.
	387	*/
	388	if ((*lf)->lf_end != -1 &&
	389	((*lf)->lf_end + 1) == lock->lf_start) {
	390	struct lockf adjacent = lf;
	391
	392	LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent previous\n");
	393	lock->lf_start = (*lf)->lf_start;
	394	*lf = lock;
	395	lf = &(*lf)->lf_next;
	396
	397	lf_move_blocked(lock, adjacent);
	398
	399	FREE(adjacent, M_LOCKF);
	400	continue;
	401	}
	402	/* If the lock starts adjacent to us, we can coalesce it */
	403	if (lock->lf_end != -1 &&
	404	(lock->lf_end + 1) == (*lf)->lf_start) {
	405	struct lockf adjacent = lf;
	406
	407	LOCKF_DEBUG(0, "lf_coalesce_adjacent: coalesce adjacent following\n");
	408	lock->lf_end = (*lf)->lf_end;
	409	lock->lf_next = (*lf)->lf_next;
	410	lf = &lock->lf_next;
	411
	412	lf_move_blocked(lock, adjacent);
	413
	414	FREE(adjacent, M_LOCKF);
	415	continue;
	416	}
	417
	418	/* no matching conditions; go on to next lock */
	419	lf = &(*lf)->lf_next;
	420	}
	421	}
	422
	423
	424	/*
	425	* lf_setlock
	426	*
	427	* Description: Set a byte-range lock.
	428	*
	429	* Parameters: lock The lock structure describing the lock
	430	* to be set; allocated by the caller, it
	431	* will be linked into the lock list if
	432	* the set is successful, and freed if the
	433	* set is unsuccessful.
	434	*
	435	* timeout Timeout specified in the case of
	436	* SETLKWTIMEOUT.
	437	*
	438	* Returns: 0 Success
	439	* EAGAIN
	440	* EDEADLK
	441	* lf_split:ENOLCK
	442	* lf_clearlock:ENOLCK
	443	* msleep:EINTR
	444	* msleep:ETIMEDOUT
	445	*
	446	* Notes: We add the lock to the provisional lock list. We do not
	447	* coalesce at this time; this has implications for other lock
	448	* requestors in the blocker search mechanism.
	449	*/
	450	static int
	451	lf_setlock(struct lockf lock, struct timespec timeout)
	452	{
	453	struct lockf *block;
	454	struct lockf **head = lock->lf_head;
	455	struct lockf *prev, overlap, *ltmp;
	456	static char lockstr[] = "lockf";
	457	int priority, needtolink, error;
	458	struct vnode *vp = lock->lf_vnode;
	459	overlap_t ovcase;
	460	#if IMPORTANCE_INHERITANCE
	461	task_t boosting_task, block_task;
	462	#endif /* IMPORTANCE_INHERITANCE */
	463
	464	#ifdef LOCKF_DEBUGGING
	465	if (lockf_debug & 1) {
	466	lf_print("lf_setlock", lock);
	467	lf_printlist("lf_setlock(in)", lock);
	468	}
	469	#endif /* LOCKF_DEBUGGING */
	470
	471	/*
	472	* Set the priority
	473	*/
	474	priority = PLOCK;
	475	if (lock->lf_type == F_WRLCK)
	476	priority += 4;
	477	priority \|= PCATCH;
	478	/*
	479	* Scan lock list for this file looking for locks that would block us.
	480	*/
	481	while ((block = lf_getblock(lock, -1))) {
	482	/*
	483	* Free the structure and return if nonblocking.
	484	*/
	485	if ((lock->lf_flags & F_WAIT) == 0) {
	486	DTRACE_FSINFO(advlock__nowait, vnode_t, vp);
	487	FREE(lock, M_LOCKF);
	488	return (EAGAIN);
	489	}
	490
	491	/*
	492	* We are blocked. Since flock style locks cover
	493	* the whole file, there is no chance for deadlock.
	494	* For byte-range locks we must check for deadlock.
	495	*
	496	* Deadlock detection is done by looking through the
	497	* wait channels to see if there are any cycles that
	498	* involve us. MAXDEPTH is set just to make sure we
	499	* do not go off into neverland.
	500	*/
	501	if ((lock->lf_flags & F_POSIX) &&
	502	(block->lf_flags & F_POSIX)) {
	503	struct proc wproc, bproc;
	504	struct uthread *ut;
	505	struct lockf *waitblock;
	506	int i = 0;
	507
	508	/* The block is waiting on something */
	509	wproc = (struct proc *)block->lf_id;
	510	proc_lock(wproc);
	511	TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
	512	/*
	513	* While the thread is asleep (uu_wchan != 0)
	514	* in this code (uu_wmesg == lockstr)
	515	* and we have not exceeded the maximum cycle
	516	* depth (i < maxlockdepth), then check for a
	517	* cycle to see if the lock is blocked behind
	518	* someone blocked behind us.
	519	*/
	520	while (((waitblock = (struct lockf *)ut->uu_wchan) != NULL) &&
	521	ut->uu_wmesg == lockstr &&
	522	(i++ < maxlockdepth)) {
	523	waitblock = (struct lockf *)ut->uu_wchan;
	524	/*
	525	* Get the lock blocking the lock
	526	* which would block us, and make
	527	* certain it hasn't come unblocked
	528	* (been granted, e.g. between the time
	529	* we called lf_getblock, and the time
	530	* we successfully acquired the
	531	* proc_lock).
	532	*/
	533	waitblock = waitblock->lf_next;
	534	if (waitblock == NULL)
	535	break;
	536
	537	/*
	538	* Make sure it's an advisory range
	539	* lock and not an overall file lock;
	540	* if we mix lock types, it's our own
	541	* fault.
	542	*/
	543	if ((waitblock->lf_flags & F_POSIX) == 0)
	544	break;
	545
	546	/*
	547	* If the owner of the lock that's
	548	* blocking a lock that's blocking us
	549	* getting the requested lock, then we
	550	* would deadlock, so error out.
	551	*/
	552	bproc = (struct proc *)waitblock->lf_id;
	553	if (bproc == (struct proc *)lock->lf_id) {
	554	proc_unlock(wproc);
	555	FREE(lock, M_LOCKF);
	556	return (EDEADLK);
	557	}
	558	}
	559	}
	560	proc_unlock(wproc);
	561	}
	562
	563	/*
	564	* For flock type locks, we must first remove
	565	* any shared locks that we hold before we sleep
	566	* waiting for an exclusive lock.
	567	*/
	568	if ((lock->lf_flags & F_FLOCK) &&
	569	lock->lf_type == F_WRLCK) {
	570	lock->lf_type = F_UNLCK;
	571	if ((error = lf_clearlock(lock)) != 0) {
	572	FREE(lock, M_LOCKF);
	573	return (error);
	574	}
	575	lock->lf_type = F_WRLCK;
	576	}
	577	/*
	578	* Add our lock to the blocked list and sleep until we're free.
	579	* Remember who blocked us (for deadlock detection).
	580	*/
	581	lock->lf_next = block;
	582	TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
	583
	584	if ( !(lock->lf_flags & F_FLOCK))
	585	block->lf_flags &= ~F_WAKE1_SAFE;
	586
	587	#ifdef LOCKF_DEBUGGING
	588	if (lockf_debug & 1) {
	589	lf_print("lf_setlock: blocking on", block);
	590	lf_printlist("lf_setlock(block)", block);
	591	}
	592	#endif /* LOCKF_DEBUGGING */
	593	DTRACE_FSINFO(advlock__wait, vnode_t, vp);
	594	#if IMPORTANCE_INHERITANCE
	595	/*
	596	* Posix type of locks are not inherited by child processes and
	597	* it maintains one to one mapping between lock and its owner, while
	598	* Flock type of locks are inherited across forks and it does not
	599	* maintian any one to one mapping between the lock and the lock
	600	* owner. Thus importance donation is done only for Posix type of
	601	* locks.
	602	*/
	603	if ((lock->lf_flags & F_POSIX) && (block->lf_flags & F_POSIX)) {
	604	block_task = proc_task((proc_t) block->lf_id);
	605	boosting_task = proc_task((proc_t) lock->lf_id);
	606
	607	/* Check if current task can donate importance. The
	608	* check of imp_donor bit is done without holding
	609	* task lock. The value may change after you read it,
	610	* but it is ok to boost a task while someone else is
	611	* unboosting you.
	612	*/
	613	if (task_is_importance_donor(boosting_task)) {
	614	if (block->lf_boosted != LF_BOOSTED &&
	615	task_is_importance_receiver(block_task)) {
	616	lf_hold_assertion(block_task, block);
	617	}
	618	lf_jump_to_queue_head(block, lock);
	619	}
	620	}
	621	#endif /* IMPORTANCE_INHERITANCE */
	622	error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
	623
	624	if (error == 0 && (lock->lf_flags & F_ABORT) != 0)
	625	error = EBADF;
	626
	627	if (lock->lf_next) {
	628	/*
	629	* lf_wakelock() always sets wakelock->lf_next to
	630	* NULL before a wakeup; so we've been woken early
	631	* - perhaps by a debugger, signal or other event.
	632	*
	633	* Remove 'lock' from the block list (avoids double-add
	634	* in the spurious case, which would create a cycle)
	635	*/
	636	TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
	637	lock->lf_next = NULL;
	638
	639	if (error == 0) {
	640	/*
	641	* If this was a spurious wakeup, retry
	642	*/
	643	printf("%s: spurious wakeup, retrying lock\n",
	644	__func__);
	645	continue;
	646	}
	647	}
	648
	649	if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
	650	if ((block = lf_getblock(lock, -1)) != NULL)
	651	lf_move_blocked(block, lock);
	652	}
	653
	654	if (error) {
	655	if (!TAILQ_EMPTY(&lock->lf_blkhd))
	656	lf_wakelock(lock, TRUE);
	657	FREE(lock, M_LOCKF);
	658	/* Return ETIMEDOUT if timeout occoured. */
	659	if (error == EWOULDBLOCK) {
	660	error = ETIMEDOUT;
	661	}
	662	return (error);
	663	}
	664	}
	665
	666	/*
	667	* No blocks!! Add the lock. Note that we will
	668	* downgrade or upgrade any overlapping locks this
	669	* process already owns.
	670	*
	671	* Skip over locks owned by other processes.
	672	* Handle any locks that overlap and are owned by ourselves.
	673	*/
	674	prev = head;
	675	block = *head;
	676	needtolink = 1;
	677	for (;;) {
	678	ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
	679	if (ovcase)
	680	block = overlap->lf_next;
	681	/*
	682	* Six cases:
	683	* 0) no overlap
	684	* 1) overlap == lock
	685	* 2) overlap contains lock
	686	* 3) lock contains overlap
	687	* 4) overlap starts before lock
	688	* 5) overlap ends after lock
	689	*/
	690	switch (ovcase) {
	691	case OVERLAP_NONE:
	692	if (needtolink) {
	693	*prev = lock;
	694	lock->lf_next = overlap;
	695	}
	696	break;
	697
	698	case OVERLAP_EQUALS_LOCK:
	699	/*
	700	* If downgrading lock, others may be
	701	* able to acquire it.
	702	*/
	703	if (lock->lf_type == F_RDLCK &&
	704	overlap->lf_type == F_WRLCK)
	705	lf_wakelock(overlap, TRUE);
	706	overlap->lf_type = lock->lf_type;
	707	FREE(lock, M_LOCKF);
	708	lock = overlap; /* for lf_coalesce_adjacent() */
	709	break;
	710
	711	case OVERLAP_CONTAINS_LOCK:
	712	/*
	713	* Check for common starting point and different types.
	714	*/
	715	if (overlap->lf_type == lock->lf_type) {
	716	FREE(lock, M_LOCKF);
	717	lock = overlap; /* for lf_coalesce_adjacent() */
	718	break;
	719	}
	720	if (overlap->lf_start == lock->lf_start) {
	721	*prev = lock;
	722	lock->lf_next = overlap;
	723	overlap->lf_start = lock->lf_end + 1;
	724	} else {
	725	/*
	726	* If we can't split the lock, we can't
	727	* grant it. Claim a system limit for the
	728	* resource shortage.
	729	*/
	730	if (lf_split(overlap, lock)) {
	731	FREE(lock, M_LOCKF);
	732	return (ENOLCK);
	733	}
	734	}
	735	lf_wakelock(overlap, TRUE);
	736	break;
	737
	738	case OVERLAP_CONTAINED_BY_LOCK:
	739	/*
	740	* If downgrading lock, others may be able to
	741	* acquire it, otherwise take the list.
	742	*/
	743	if (lock->lf_type == F_RDLCK &&
	744	overlap->lf_type == F_WRLCK) {
	745	lf_wakelock(overlap, TRUE);
	746	} else {
	747	while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
	748	ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
	749	TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
	750	lf_block);
	751	TAILQ_INSERT_TAIL(&lock->lf_blkhd,
	752	ltmp, lf_block);
	753	ltmp->lf_next = lock;
	754	}
	755	}
	756	/*
	757	* Add the new lock if necessary and delete the overlap.
	758	*/
	759	if (needtolink) {
	760	*prev = lock;
	761	lock->lf_next = overlap->lf_next;
	762	prev = &lock->lf_next;
	763	needtolink = 0;
	764	} else
	765	*prev = overlap->lf_next;
	766	FREE(overlap, M_LOCKF);
	767	continue;
	768
	769	case OVERLAP_STARTS_BEFORE_LOCK:
	770	/*
	771	* Add lock after overlap on the list.
	772	*/
	773	lock->lf_next = overlap->lf_next;
	774	overlap->lf_next = lock;
	775	overlap->lf_end = lock->lf_start - 1;
	776	prev = &lock->lf_next;
	777	lf_wakelock(overlap, TRUE);
	778	needtolink = 0;
	779	continue;
	780
	781	case OVERLAP_ENDS_AFTER_LOCK:
	782	/*
	783	* Add the new lock before overlap.
	784	*/
	785	if (needtolink) {
	786	*prev = lock;
	787	lock->lf_next = overlap;
	788	}
	789	overlap->lf_start = lock->lf_end + 1;
	790	lf_wakelock(overlap, TRUE);
	791	break;
	792	}
	793	break;
	794	}
	795	/* Coalesce adjacent locks with identical attributes */
	796	lf_coalesce_adjacent(lock);
	797	#ifdef LOCKF_DEBUGGING
	798	if (lockf_debug & 1) {
	799	lf_print("lf_setlock: got the lock", lock);
	800	lf_printlist("lf_setlock(out)", lock);
	801	}
	802	#endif /* LOCKF_DEBUGGING */
	803	return (0);
	804	}
	805
	806
	807	/*
	808	* lf_clearlock
	809	*
	810	* Description: Remove a byte-range lock on an vnode. Generally, find the
	811	* lock (or an overlap to that lock) and remove it (or shrink
	812	* it), then wakeup anyone we can.
	813	*
	814	* Parameters: unlock The lock to clear
	815	*
	816	* Returns: 0 Success
	817	* lf_split:ENOLCK
	818	*
	819	* Notes: A caller may unlock all the locks owned by the caller by
	820	* specifying the entire file range; locks owned by other
	821	* callers are not effected by this operation.
	822	*/
	823	static int
	824	lf_clearlock(struct lockf *unlock)
	825	{
	826	struct lockf **head = unlock->lf_head;
	827	struct lockf lf = head;
	828	struct lockf overlap, *prev;
	829	overlap_t ovcase;
	830
	831	if (lf == NOLOCKF)
	832	return (0);
	833	#ifdef LOCKF_DEBUGGING
	834	if (unlock->lf_type != F_UNLCK)
	835	panic("lf_clearlock: bad type");
	836	if (lockf_debug & 1)
	837	lf_print("lf_clearlock", unlock);
	838	#endif /* LOCKF_DEBUGGING */
	839	prev = head;
	840	while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) != OVERLAP_NONE) {
	841	/*
	842	* Wakeup the list of locks to be retried.
	843	*/
	844	lf_wakelock(overlap, FALSE);
	845	#if IMPORTANCE_INHERITANCE
	846	if (overlap->lf_boosted == LF_BOOSTED) {
	847	lf_drop_assertion(overlap);
	848	}
	849	#endif /* IMPORTANCE_INHERITANCE */
	850
	851	switch (ovcase) {
	852	case OVERLAP_NONE: /* satisfy compiler enum/switch */
	853	break;
	854
	855	case OVERLAP_EQUALS_LOCK:
	856	*prev = overlap->lf_next;
	857	FREE(overlap, M_LOCKF);
	858	break;
	859
	860	case OVERLAP_CONTAINS_LOCK: /* split it */
	861	if (overlap->lf_start == unlock->lf_start) {
	862	overlap->lf_start = unlock->lf_end + 1;
	863	break;
	864	}
	865	/*
	866	* If we can't split the lock, we can't grant it.
	867	* Claim a system limit for the resource shortage.
	868	*/
	869	if (lf_split(overlap, unlock))
	870	return (ENOLCK);
	871	overlap->lf_next = unlock->lf_next;
	872	break;
	873
	874	case OVERLAP_CONTAINED_BY_LOCK:
	875	*prev = overlap->lf_next;
	876	lf = overlap->lf_next;
	877	FREE(overlap, M_LOCKF);
	878	continue;
	879
	880	case OVERLAP_STARTS_BEFORE_LOCK:
	881	overlap->lf_end = unlock->lf_start - 1;
	882	prev = &overlap->lf_next;
	883	lf = overlap->lf_next;
	884	continue;
	885
	886	case OVERLAP_ENDS_AFTER_LOCK:
	887	overlap->lf_start = unlock->lf_end + 1;
	888	break;
	889	}
	890	break;
	891	}
	892	#ifdef LOCKF_DEBUGGING
	893	if (lockf_debug & 1)
	894	lf_printlist("lf_clearlock", unlock);
	895	#endif /* LOCKF_DEBUGGING */
	896	return (0);
	897	}
	898
	899
	900	/*
	901	* lf_getlock
	902	*
	903	* Description: Check whether there is a blocking lock, and if so return
	904	* its process identifier into the lock being requested.
	905	*
	906	* Parameters: lock Pointer to lock to test for blocks
	907	* fl Pointer to flock structure to receive
	908	* the blocking lock information, if a
	909	* blocking lock is found.
	910	* matchpid -1, or pid value to match in lookup.
	911	*
	912	* Returns: 0 Success
	913	*
	914	* Implicit Returns:
	915	* *fl Contents modified to reflect the
	916	* blocking lock, if one is found; not
	917	* modified otherwise
	918	*
	919	* Notes: fl->l_pid will be (-1) for file locks and will only be set to
	920	* the blocking process ID for advisory record locks.
	921	*/
	922	static int
	923	lf_getlock(struct lockf lock, struct flock fl, pid_t matchpid)
	924	{
	925	struct lockf *block;
	926
	927	#ifdef LOCKF_DEBUGGING
	928	if (lockf_debug & 1)
	929	lf_print("lf_getlock", lock);
	930	#endif /* LOCKF_DEBUGGING */
	931
	932	if ((block = lf_getblock(lock, matchpid))) {
	933	fl->l_type = block->lf_type;
	934	fl->l_whence = SEEK_SET;
	935	fl->l_start = block->lf_start;
	936	if (block->lf_end == -1)
	937	fl->l_len = 0;
	938	else
	939	fl->l_len = block->lf_end - block->lf_start + 1;
	940	if (block->lf_flags & F_POSIX)
	941	fl->l_pid = proc_pid((struct proc *)(block->lf_id));
	942	else
	943	fl->l_pid = -1;
	944	} else {
	945	fl->l_type = F_UNLCK;
	946	}
	947	return (0);
	948	}
	949
	950	/*
	951	* lf_getblock
	952	*
	953	* Description: Walk the list of locks for an inode and return the first
	954	* blocking lock. A lock is considered blocking if we are not
	955	* the lock owner; otherwise, we are permitted to upgrade or
	956	* downgrade it, and it's not considered blocking.
	957	*
	958	* Parameters: lock The lock for which we are interested
	959	* in obtaining the blocking lock, if any
	960	* matchpid -1, or pid value to match in lookup.
	961	*
	962	* Returns: NOLOCKF No blocking lock exists
	963	* !NOLOCKF The address of the blocking lock's
	964	* struct lockf.
	965	*/
	966	static struct lockf *
	967	lf_getblock(struct lockf *lock, pid_t matchpid)
	968	{
	969	struct lockf *prev, overlap, lf = (lock->lf_head);
	970
	971	for (prev = lock->lf_head;
	972	lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != OVERLAP_NONE;
	973	lf = overlap->lf_next) {
	974	/*
	975	* Found an overlap.
	976	*
	977	* If we're matching pids, and it's a record lock,
	978	* but the pid doesn't match, then keep on looking ..
	979	*/
	980	if (matchpid != -1 &&
	981	(overlap->lf_flags & F_POSIX) != 0 &&
	982	proc_pid((struct proc *)(overlap->lf_id)) != matchpid)
	983	continue;
	984	/*
	985	* does it block us?
	986	*/
	987	if ((lock->lf_type == F_WRLCK \|\| overlap->lf_type == F_WRLCK))
	988	return (overlap);
	989	}
	990	return (NOLOCKF);
	991	}
	992
	993
	994	/*
	995	* lf_findoverlap
	996	*
	997	* Description: Walk the list of locks to find an overlapping lock (if any).
	998	*
	999	* Parameters: lf First lock on lock list
	1000	* lock The lock we are checking for an overlap
	1001	* check Check type
	1002	* prev pointer to pointer pointer to contain
	1003	* address of pointer to previous lock
	1004	* pointer to overlapping lock, if overlap
	1005	* overlap pointer to pointer to contain address
	1006	* of overlapping lock
	1007	*
	1008	* Returns: OVERLAP_NONE
	1009	* OVERLAP_EQUALS_LOCK
	1010	* OVERLAP_CONTAINS_LOCK
	1011	* OVERLAP_CONTAINED_BY_LOCK
	1012	* OVERLAP_STARTS_BEFORE_LOCK
	1013	* OVERLAP_ENDS_AFTER_LOCK
	1014	*
	1015	* Implicit Returns:
	1016	* *prev The address of the next pointer in the
	1017	* lock previous to the overlapping lock;
	1018	* this is generally used to relink the
	1019	* lock list, avoiding a second iteration.
	1020	* *overlap The pointer to the overlapping lock
	1021	* itself; this is used to return data in
	1022	* the check == OTHERS case, and for the
	1023	* caller to modify the overlapping lock,
	1024	* in the check == SELF case
	1025	*
	1026	* Note: This returns only the FIRST overlapping lock. There may be
	1027	* more than one. lf_getlock will return the first blocking lock,
	1028	* while lf_setlock will iterate over all overlapping locks to
	1029	*
	1030	* The check parameter can be SELF, meaning we are looking for
	1031	* overlapping locks owned by us, or it can be OTHERS, meaning
	1032	* we are looking for overlapping locks owned by someone else so
	1033	* we can report a blocking lock on an F_GETLK request.
	1034	*
	1035	* The value of overlap and prev are modified, even if there is
	1036	* no overlapping lock found; always check the return code.
	1037	*/
	1038	static overlap_t
	1039	lf_findoverlap(struct lockf lf, struct lockf lock, int type,
	1040	struct lockf *prev, struct lockf overlap)
	1041	{
	1042	off_t start, end;
	1043	int found_self = 0;
	1044
	1045	*overlap = lf;
	1046	if (lf == NOLOCKF)
	1047	return (0);
	1048	#ifdef LOCKF_DEBUGGING
	1049	if (lockf_debug & 2)
	1050	lf_print("lf_findoverlap: looking for overlap in", lock);
	1051	#endif /* LOCKF_DEBUGGING */
	1052	start = lock->lf_start;
	1053	end = lock->lf_end;
	1054	while (lf != NOLOCKF) {
	1055	if (((type & SELF) && lf->lf_id != lock->lf_id) \|\|
	1056	((type & OTHERS) && lf->lf_id == lock->lf_id)) {
	1057	/*
	1058	* Locks belonging to one process are adjacent on the
	1059	* list, so if we've found any locks belonging to us,
	1060	* and we're now seeing something else, then we've
	1061	* examined all "self" locks. Note that bailing out
	1062	* here is quite important; for coalescing, we assume
	1063	* numerically adjacent locks from the same owner to
	1064	* be adjacent on the list.
	1065	*/
	1066	if ((type & SELF) && found_self) {
	1067	return OVERLAP_NONE;
	1068	}
	1069
	1070	*prev = &lf->lf_next;
	1071	*overlap = lf = lf->lf_next;
	1072	continue;
	1073	}
	1074
	1075	if ((type & SELF)) {
	1076	found_self = 1;
	1077	}
	1078
	1079	#ifdef LOCKF_DEBUGGING
	1080	if (lockf_debug & 2)
	1081	lf_print("\tchecking", lf);
	1082	#endif /* LOCKF_DEBUGGING */
	1083	/*
	1084	* OK, check for overlap
	1085	*/
	1086	if ((lf->lf_end != -1 && start > lf->lf_end) \|\|
	1087	(end != -1 && lf->lf_start > end)) {
	1088	/* Case 0 */
	1089	LOCKF_DEBUG(2, "no overlap\n");
	1090
	1091	/*
	1092	* NOTE: assumes that locks for the same process are
	1093	* nonintersecting and ordered.
	1094	*/
	1095	if ((type & SELF) && end != -1 && lf->lf_start > end)
	1096	return (OVERLAP_NONE);
	1097	*prev = &lf->lf_next;
	1098	*overlap = lf = lf->lf_next;
	1099	continue;
	1100	}
	1101	if ((lf->lf_start == start) && (lf->lf_end == end)) {
	1102	LOCKF_DEBUG(2, "overlap == lock\n");
	1103	return (OVERLAP_EQUALS_LOCK);
	1104	}
	1105	if ((lf->lf_start <= start) &&
	1106	(end != -1) &&
	1107	((lf->lf_end >= end) \|\| (lf->lf_end == -1))) {
	1108	LOCKF_DEBUG(2, "overlap contains lock\n");
	1109	return (OVERLAP_CONTAINS_LOCK);
	1110	}
	1111	if (start <= lf->lf_start &&
	1112	(end == -1 \|\|
	1113	(lf->lf_end != -1 && end >= lf->lf_end))) {
	1114	LOCKF_DEBUG(2, "lock contains overlap\n");
	1115	return (OVERLAP_CONTAINED_BY_LOCK);
	1116	}
	1117	if ((lf->lf_start < start) &&
	1118	((lf->lf_end >= start) \|\| (lf->lf_end == -1))) {
	1119	LOCKF_DEBUG(2, "overlap starts before lock\n");
	1120	return (OVERLAP_STARTS_BEFORE_LOCK);
	1121	}
	1122	if ((lf->lf_start > start) &&
	1123	(end != -1) &&
	1124	((lf->lf_end > end) \|\| (lf->lf_end == -1))) {
	1125	LOCKF_DEBUG(2, "overlap ends after lock\n");
	1126	return (OVERLAP_ENDS_AFTER_LOCK);
	1127	}
	1128	panic("lf_findoverlap: default");
	1129	}
	1130	return (OVERLAP_NONE);
	1131	}
	1132
	1133
	1134	/*
	1135	* lf_split
	1136	*
	1137	* Description: Split a lock and a contained region into two or three locks
	1138	* as necessary.
	1139	*
	1140	* Parameters: lock1 Lock to split
	1141	* lock2 Overlapping lock region requiring the
	1142	* split (upgrade/downgrade/unlock)
	1143	*
	1144	* Returns: 0 Success
	1145	* ENOLCK No memory for new lock
	1146	*
	1147	* Implicit Returns:
	1148	* *lock1 Modified original lock
	1149	* *lock2 Overlapping lock (inserted into list)
	1150	* (new lock) Potential new lock inserted into list
	1151	* if split results in 3 locks
	1152	*
	1153	* Notes: This operation can only fail if the split would result in three
	1154	* locks, and there is insufficient memory to allocate the third
	1155	* lock; in that case, neither of the locks will be modified.
	1156	*/
	1157	static int
	1158	lf_split(struct lockf lock1, struct lockf lock2)
	1159	{
	1160	struct lockf *splitlock;
	1161
	1162	#ifdef LOCKF_DEBUGGING
	1163	if (lockf_debug & 2) {
	1164	lf_print("lf_split", lock1);
	1165	lf_print("splitting from", lock2);
	1166	}
	1167	#endif /* LOCKF_DEBUGGING */
	1168	/*
	1169	* Check to see if spliting into only two pieces.
	1170	*/
	1171	if (lock1->lf_start == lock2->lf_start) {
	1172	lock1->lf_start = lock2->lf_end + 1;
	1173	lock2->lf_next = lock1;
	1174	return (0);
	1175	}
	1176	if (lock1->lf_end == lock2->lf_end) {
	1177	lock1->lf_end = lock2->lf_start - 1;
	1178	lock2->lf_next = lock1->lf_next;
	1179	lock1->lf_next = lock2;
	1180	return (0);
	1181	}
	1182	/*
	1183	* Make a new lock consisting of the last part of
	1184	* the encompassing lock
	1185	*/
	1186	MALLOC(splitlock, struct lockf , sizeof splitlock, M_LOCKF, M_WAITOK);
	1187	if (splitlock == NULL)
	1188	return (ENOLCK);
	1189	bcopy(lock1, splitlock, sizeof *splitlock);
	1190	splitlock->lf_start = lock2->lf_end + 1;
	1191	TAILQ_INIT(&splitlock->lf_blkhd);
	1192	lock1->lf_end = lock2->lf_start - 1;
	1193	/*
	1194	* OK, now link it in
	1195	*/
	1196	splitlock->lf_next = lock1->lf_next;
	1197	lock2->lf_next = splitlock;
	1198	lock1->lf_next = lock2;
	1199
	1200	return (0);
	1201	}
	1202
	1203
	1204	/*
	1205	* lf_wakelock
	1206	*
	1207	* Wakeup a blocklist in the case of a downgrade or unlock, since others
	1208	* waiting on the lock may now be able to acquire it.
	1209	*
	1210	* Parameters: listhead Lock list head on which waiters may
	1211	* have pending locks
	1212	*
	1213	* Returns: <void>
	1214	*
	1215	* Notes: This function iterates a list of locks and wakes all waiters,
	1216	* rather than only waiters for the contended regions. Because
	1217	* of this, for heavily contended files, this can result in a
	1218	* "thundering herd" situation. Refactoring the code could make
	1219	* this operation more efficient, if heavy contention ever results
	1220	* in a real-world performance problem.
	1221	*/
	1222	static void
	1223	lf_wakelock(struct lockf *listhead, boolean_t force_all)
	1224	{
	1225	struct lockf *wakelock;
	1226	boolean_t wake_all = TRUE;
	1227
	1228	if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE))
	1229	wake_all = FALSE;
	1230
	1231	while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
	1232	wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
	1233	TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
	1234
	1235	wakelock->lf_next = NOLOCKF;
	1236	#ifdef LOCKF_DEBUGGING
	1237	if (lockf_debug & 2)
	1238	lf_print("lf_wakelock: awakening", wakelock);
	1239	#endif /* LOCKF_DEBUGGING */
	1240	if (wake_all == FALSE) {
	1241	/*
	1242	* If there are items on the list head block list,
	1243	* move them to the wakelock list instead, and then
	1244	* correct their lf_next pointers.
	1245	*/
	1246	if (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
	1247	TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block);
	1248
	1249	struct lockf *tlock;
	1250
	1251	TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
	1252	if (TAILQ_NEXT(tlock, lf_block) == tlock) {
	1253	/* See rdar://10887303 */
	1254	panic("cycle in wakelock list");
	1255	}
	1256	tlock->lf_next = wakelock;
	1257	}
	1258	}
	1259	}
	1260	wakeup(wakelock);
	1261
	1262	if (wake_all == FALSE)
	1263	break;
	1264	}
	1265	}
	1266
	1267
	1268	#ifdef LOCKF_DEBUGGING
	1269	/*
	1270	* lf_print DEBUG
	1271	*
	1272	* Print out a lock; lock information is prefixed by the string in 'tag'
	1273	*
	1274	* Parameters: tag A string tag for debugging
	1275	* lock The lock whose information should be
	1276	* displayed
	1277	*
	1278	* Returns: <void>
	1279	*/
	1280	void
	1281	lf_print(const char tag, struct lockf lock)
	1282	{
	1283	printf("%s: lock %p for ", tag, (void *)lock);
	1284	if (lock->lf_flags & F_POSIX)
	1285	printf("proc %ld", (long)((struct proc *)lock->lf_id)->p_pid);
	1286	else
	1287	printf("id %p", (void *)lock->lf_id);
	1288	if (lock->lf_vnode != 0)
	1289	printf(" in vno %p, %s, start 0x%016llx, end 0x%016llx",
	1290	lock->lf_vnode,
	1291	lock->lf_type == F_RDLCK ? "shared" :
	1292	lock->lf_type == F_WRLCK ? "exclusive" :
	1293	lock->lf_type == F_UNLCK ? "unlock" : "unknown",
	1294	(intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
	1295	else
	1296	printf(" %s, start 0x%016llx, end 0x%016llx",
	1297	lock->lf_type == F_RDLCK ? "shared" :
	1298	lock->lf_type == F_WRLCK ? "exclusive" :
	1299	lock->lf_type == F_UNLCK ? "unlock" : "unknown",
	1300	(intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
	1301	if (!TAILQ_EMPTY(&lock->lf_blkhd))
	1302	printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
	1303	else
	1304	printf("\n");
	1305	}
	1306
	1307
	1308	/*
	1309	* lf_printlist DEBUG
	1310	*
	1311	* Print out a lock list for the vnode associated with 'lock'; lock information
	1312	* is prefixed by the string in 'tag'
	1313	*
	1314	* Parameters: tag A string tag for debugging
	1315	* lock The lock whose vnode's lock list should
	1316	* be displayed
	1317	*
	1318	* Returns: <void>
	1319	*/
	1320	void
	1321	lf_printlist(const char tag, struct lockf lock)
	1322	{
	1323	struct lockf lf, blk;
	1324
	1325	if (lock->lf_vnode == 0)
	1326	return;
	1327
	1328	printf("%s: Lock list for vno %p:\n",
	1329	tag, lock->lf_vnode);
	1330	for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) {
	1331	printf("\tlock %p for ",(void *)lf);
	1332	if (lf->lf_flags & F_POSIX)
	1333	printf("proc %ld",
	1334	(long)((struct proc *)lf->lf_id)->p_pid);
	1335	else
	1336	printf("id %p", (void *)lf->lf_id);
	1337	printf(", %s, start 0x%016llx, end 0x%016llx",
	1338	lf->lf_type == F_RDLCK ? "shared" :
	1339	lf->lf_type == F_WRLCK ? "exclusive" :
	1340	lf->lf_type == F_UNLCK ? "unlock" :
	1341	"unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
	1342	TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
	1343	printf("\n\t\tlock request %p for ", (void *)blk);
	1344	if (blk->lf_flags & F_POSIX)
	1345	printf("proc %ld",
	1346	(long)((struct proc *)blk->lf_id)->p_pid);
	1347	else
	1348	printf("id %p", (void *)blk->lf_id);
	1349	printf(", %s, start 0x%016llx, end 0x%016llx",
	1350	blk->lf_type == F_RDLCK ? "shared" :
	1351	blk->lf_type == F_WRLCK ? "exclusive" :
	1352	blk->lf_type == F_UNLCK ? "unlock" :
	1353	"unknown", (intmax_t)blk->lf_start,
	1354	(intmax_t)blk->lf_end);
	1355	if (!TAILQ_EMPTY(&blk->lf_blkhd))
	1356	panic("lf_printlist: bad list");
	1357	}
	1358	printf("\n");
	1359	}
	1360	}
	1361	#endif /* LOCKF_DEBUGGING */
	1362
	1363	#if IMPORTANCE_INHERITANCE
	1364
	1365	/*
	1366	* lf_hold_assertion
	1367	*
	1368	* Call task importance hold assertion on the owner of the lock.
	1369	*
	1370	* Parameters: block_task Owner of the lock blocking
	1371	* current thread.
	1372	*
	1373	* block lock on which the current thread
	1374	* is blocking on.
	1375	*
	1376	* Returns: <void>
	1377	*
	1378	* Notes: The task reference on block_task is not needed to be hold since
	1379	* the current thread has vnode lock and block_task has a file
	1380	* lock, thus removing file lock in exit requires block_task to
	1381	* grab the vnode lock.
	1382	*/
	1383	static void
	1384	lf_hold_assertion(task_t block_task, struct lockf *block)
	1385	{
	1386	task_importance_hold_internal_assertion(block_task, 1);
	1387	block->lf_boosted = LF_BOOSTED;
	1388	}
	1389
	1390
	1391	/*
	1392	* lf_jump_to_queue_head
	1393	*
	1394	* Jump the lock from the tail of the block queue to the head of
	1395	* the queue.
	1396	*
	1397	* Parameters: block lockf struct containing the
	1398	* block queue.
	1399	* lock lockf struct to be jumped to the
	1400	* front.
	1401	*
	1402	* Returns: <void>
	1403	*/
	1404	static void
	1405	lf_jump_to_queue_head(struct lockf block, struct lockf lock)
	1406	{
	1407	/* Move the lock to the head of the block queue. */
	1408	TAILQ_REMOVE(&block->lf_blkhd, lock, lf_block);
	1409	TAILQ_INSERT_HEAD(&block->lf_blkhd, lock, lf_block);
	1410	}
	1411
	1412
	1413	/*
	1414	* lf_drop_assertion
	1415	*
	1416	* Drops the task hold assertion.
	1417	*
	1418	* Parameters: block lockf struct holding the assertion.
	1419	*
	1420	* Returns: <void>
	1421	*/
	1422	static void
	1423	lf_drop_assertion(struct lockf *block)
	1424	{
	1425	task_t current_task;
	1426
	1427	current_task = proc_task((proc_t) block->lf_id);
	1428	task_importance_drop_internal_assertion(current_task, 1);
	1429	block->lf_boosted = LF_NOT_BOOSTED;
	1430	}
	1431
	1432	#endif /* IMPORTANCE_INHERITANCE */