git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2015 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* Copyright (c) 1982, 1986, 1989, 1993
	30	* The Regents of the University of California. All rights reserved.
	31	*
	32	* This code is derived from software contributed to Berkeley by
	33	* Scooter Morris at Genentech Inc.
	34	*
	35	* Redistribution and use in source and binary forms, with or without
	36	* modification, are permitted provided that the following conditions
	37	* are met:
	38	* 1. Redistributions of source code must retain the above copyright
	39	* notice, this list of conditions and the following disclaimer.
	40	* 2. Redistributions in binary form must reproduce the above copyright
	41	* notice, this list of conditions and the following disclaimer in the
	42	* documentation and/or other materials provided with the distribution.
	43	* 4. Neither the name of the University nor the names of its contributors
	44	* may be used to endorse or promote products derived from this software
	45	* without specific prior written permission.
	46	*
	47	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	48	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	49	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	50	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	51	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	52	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	53	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	54	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	55	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	56	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	57	* SUCH DAMAGE.
	58	*
	59	* @(#)ufs_lockf.c 8.3 (Berkeley) 1/6/94
	60	*/
	61
	62	#include <sys/cdefs.h>
	63	#include <sys/param.h>
	64	#include <sys/systm.h>
	65	#include <sys/kernel.h>
	66	#include <sys/lock.h>
	67	#include <sys/mount.h>
	68	#include <sys/proc.h>
	69	#include <sys/signalvar.h>
	70	#include <sys/unistd.h>
	71	#include <sys/user.h>
	72	#include <sys/vnode.h>
	73	#include <sys/vnode_internal.h>
	74	#include <sys/vnode_if.h>
	75	#include <sys/malloc.h>
	76	#include <sys/fcntl.h>
	77	#include <sys/lockf.h>
	78	#include <sys/sdt.h>
	79	#include <kern/task.h>
	80
	81	#include <sys/file_internal.h>
	82
	83	/*
	84	* This variable controls the maximum number of processes that will
	85	* be checked in doing deadlock detection.
	86	*/
	87	static int maxlockdepth = MAXDEPTH;
	88
	89	#if (DEVELOPMENT \|\| DEBUG)
	90	#define LOCKF_DEBUGGING 1
	91	#endif
	92
	93	#ifdef LOCKF_DEBUGGING
	94	#include <sys/sysctl.h>
	95	void lf_print(const char tag, struct lockf lock);
	96	void lf_printlist(const char tag, struct lockf lock);
	97
	98	#define LF_DBG_LOCKOP (1 << 0) /* setlk, getlk, clearlk */
	99	#define LF_DBG_LIST (1 << 1) /* split, coalesce */
	100	#define LF_DBG_IMPINH (1 << 2) /* importance inheritance */
	101	#define LF_DBG_TRACE (1 << 3) /* errors, exit */
	102
	103	static int lockf_debug = 0; /* was 2, could be 3 ;-) */
	104	SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW \| CTLFLAG_LOCKED, &lockf_debug, 0, "");
	105
	106	/*
	107	* If there is no mask bit selector, or there is one, and the selector is
	108	* set, then output the debugging diagnostic.
	109	*/
	110	#define LOCKF_DEBUG(mask, ...) \
	111	do { \
	112	if( !(mask) \|\| ((mask) & lockf_debug)) { \
	113	printf(__VA_ARGS__); \
	114	} \
	115	} while(0)
	116	#else /* !LOCKF_DEBUGGING */
	117	#define LOCKF_DEBUG(mask, ...) /* mask */
	118	#endif /* !LOCKF_DEBUGGING */
	119
	120	MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures");
	121
	122	#define NOLOCKF (struct lockf *)0
	123	#define SELF 0x1
	124	#define OTHERS 0x2
	125	#define OFF_MAX 0x7fffffffffffffffULL /* max off_t */
	126
	127	/*
	128	* Overlapping lock states
	129	*/
	130	typedef enum {
	131	OVERLAP_NONE = 0,
	132	OVERLAP_EQUALS_LOCK,
	133	OVERLAP_CONTAINS_LOCK,
	134	OVERLAP_CONTAINED_BY_LOCK,
	135	OVERLAP_STARTS_BEFORE_LOCK,
	136	OVERLAP_ENDS_AFTER_LOCK
	137	} overlap_t;
	138
	139	static int lf_clearlock(struct lockf *);
	140	static overlap_t lf_findoverlap(struct lockf *,
	141	struct lockf , int, struct lockf , struct lockf );
	142	static struct lockf lf_getblock(struct lockf , pid_t);
	143	static int lf_getlock(struct lockf , struct flock , pid_t);
	144	static int lf_setlock(struct lockf , struct timespec );
	145	static int lf_split(struct lockf , struct lockf );
	146	static void lf_wakelock(struct lockf *, boolean_t);
	147	#if IMPORTANCE_INHERITANCE
	148	static void lf_hold_assertion(task_t, struct lockf *);
	149	static void lf_jump_to_queue_head(struct lockf , struct lockf );
	150	static void lf_drop_assertion(struct lockf *);
	151	static void lf_boost_blocking_proc(struct lockf , struct lockf );
	152	#endif /* IMPORTANCE_INHERITANCE */
	153
	154	/*
	155	* lf_advlock
	156	*
	157	* Description: Advisory record locking support
	158	*
	159	* Parameters: ap Argument pointer to a vnop_advlock_args
	160	* argument descriptor structure for the
	161	* lock operation to be attempted.
	162	*
	163	* Returns: 0 Success
	164	* EOVERFLOW
	165	* EINVAL
	166	* ENOLCK Number of locked regions exceeds limit
	167	* lf_setlock:EAGAIN
	168	* lf_setlock:EDEADLK
	169	* lf_setlock:EINTR
	170	* lf_setlock:ENOLCK
	171	* lf_setlock:ETIMEDOUT
	172	* lf_clearlock:ENOLCK
	173	* vnode_size:???
	174	*
	175	* Notes: We return ENOLCK when we run out of memory to support locks; as
	176	* such, there is no specific expectation limit other than the
	177	* amount of available resources.
	178	*/
	179	int
	180	lf_advlock(struct vnop_advlock_args *ap)
	181	{
	182	struct vnode *vp = ap->a_vp;
	183	struct flock *fl = ap->a_fl;
	184	vfs_context_t context = ap->a_context;
	185	struct lockf *lock;
	186	off_t start, end, oadd;
	187	u_quad_t size;
	188	int error;
	189	struct lockf **head = &vp->v_lockf;
	190
	191	/* XXX HFS may need a !vnode_isreg(vp) EISDIR error here */
	192
	193	/*
	194	* Avoid the common case of unlocking when inode has no locks.
	195	*/
	196	if (head == (struct lockf )0) {
	197	if (ap->a_op != F_SETLK) {
	198	fl->l_type = F_UNLCK;
	199	LOCKF_DEBUG(LF_DBG_TRACE,
	200	"lf_advlock: '%s' unlock without lock\n",
	201	vfs_context_proc(context)->p_comm);
	202	return (0);
	203	}
	204	}
	205
	206	/*
	207	* Convert the flock structure into a start and end.
	208	*/
	209	switch (fl->l_whence) {
	210
	211	case SEEK_SET:
	212	case SEEK_CUR:
	213	/*
	214	* Caller is responsible for adding any necessary offset
	215	* when SEEK_CUR is used.
	216	*/
	217	start = fl->l_start;
	218	break;
	219
	220	case SEEK_END:
	221
	222	/*
	223	* It's OK to cast the u_quad_t to and off_t here, since they
	224	* are the same storage size, and the value of the returned
	225	* contents will never overflow into the sign bit. We need to
	226	* do this because we will use size to force range checks.
	227	*/
	228	if ((error = vnode_size(vp, (off_t *)&size, context))) {
	229	LOCKF_DEBUG(LF_DBG_TRACE,
	230	"lf_advlock: vnode_getattr failed: %d\n", error);
	231	return (error);
	232	}
	233
	234	if (size > OFF_MAX \|\|
	235	(fl->l_start > 0 &&
	236	size > (u_quad_t)(OFF_MAX - fl->l_start)))
	237	return (EOVERFLOW);
	238	start = size + fl->l_start;
	239	break;
	240
	241	default:
	242	LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: unknown whence %d\n",
	243	fl->l_whence);
	244	return (EINVAL);
	245	}
	246	if (start < 0) {
	247	LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: start < 0 (%qd)\n",
	248	start);
	249	return (EINVAL);
	250	}
	251	if (fl->l_len < 0) {
	252	if (start == 0) {
	253	LOCKF_DEBUG(LF_DBG_TRACE,
	254	"lf_advlock: len < 0 & start == 0\n");
	255	return (EINVAL);
	256	}
	257	end = start - 1;
	258	start += fl->l_len;
	259	if (start < 0) {
	260	LOCKF_DEBUG(LF_DBG_TRACE,
	261	"lf_advlock: start < 0 (%qd)\n", start);
	262	return (EINVAL);
	263	}
	264	} else if (fl->l_len == 0)
	265	end = -1;
	266	else {
	267	oadd = fl->l_len - 1;
	268	if (oadd > (off_t)(OFF_MAX - start)) {
	269	LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: overflow\n");
	270	return (EOVERFLOW);
	271	}
	272	end = start + oadd;
	273	}
	274	/*
	275	* Create the lockf structure
	276	*/
	277	MALLOC(lock, struct lockf , sizeof lock, M_LOCKF, M_WAITOK);
	278	if (lock == NULL)
	279	return (ENOLCK);
	280	lock->lf_start = start;
	281	lock->lf_end = end;
	282	lock->lf_id = ap->a_id;
	283	lock->lf_vnode = vp;
	284	lock->lf_type = fl->l_type;
	285	lock->lf_head = head;
	286	lock->lf_next = (struct lockf *)0;
	287	TAILQ_INIT(&lock->lf_blkhd);
	288	lock->lf_flags = ap->a_flags;
	289	#if IMPORTANCE_INHERITANCE
	290	lock->lf_boosted = LF_NOT_BOOSTED;
	291	#endif
	292	if (ap->a_flags & F_POSIX)
	293	lock->lf_owner = (struct proc *)lock->lf_id;
	294	else
	295	lock->lf_owner = NULL;
	296
	297	if (ap->a_flags & F_FLOCK)
	298	lock->lf_flags \|= F_WAKE1_SAFE;
	299
	300	lck_mtx_lock(&vp->v_lock); /* protect the lockf list */
	301	/*
	302	* Do the requested operation.
	303	*/
	304	switch(ap->a_op) {
	305	case F_SETLK:
	306	/*
	307	* For F_OFD_* locks, lf_id is the fileglob.
	308	* Record an "lf_owner" iff this is a confined fd
	309	* i.e. it cannot escape this process and will be
	310	* F_UNLCKed before the owner exits. (This is
	311	* the implicit guarantee needed to ensure lf_owner
	312	* remains a valid reference here.)
	313	*/
	314	if (ap->a_flags & F_OFD_LOCK) {
	315	struct fileglob fg = (void )lock->lf_id;
	316	if (fg->fg_lflags & FG_CONFINED)
	317	lock->lf_owner = current_proc();
	318	}
	319	error = lf_setlock(lock, ap->a_timeout);
	320	break;
	321
	322	case F_UNLCK:
	323	error = lf_clearlock(lock);
	324	FREE(lock, M_LOCKF);
	325	break;
	326
	327	case F_GETLK:
	328	error = lf_getlock(lock, fl, -1);
	329	FREE(lock, M_LOCKF);
	330	break;
	331
	332
	333	default:
	334	FREE(lock, M_LOCKF);
	335	error = EINVAL;
	336	break;
	337	}
	338	lck_mtx_unlock(&vp->v_lock); /* done manipulating the list */
	339
	340	LOCKF_DEBUG(LF_DBG_TRACE, "lf_advlock: normal exit: %d\n", error);
	341	return (error);
	342	}
	343
	344	/*
	345	* Empty the queue of msleeping requests for a lock on the given vnode.
	346	* Called with the vnode already locked. Used for forced unmount, where
	347	* a flock(2) invoker sleeping on a blocked lock holds an iocount reference
	348	* that prevents the vnode from ever being drained. Force unmounting wins.
	349	*/
	350	void
	351	lf_abort_advlocks(vnode_t vp)
	352	{
	353	struct lockf *lock;
	354
	355	if ((lock = vp->v_lockf) == NULL)
	356	return;
	357
	358	lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED);
	359
	360	if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
	361	struct lockf *tlock;
	362
	363	TAILQ_FOREACH(tlock, &lock->lf_blkhd, lf_block) {
	364	/*
	365	* Setting this flag should cause all
	366	* currently blocked F_SETLK request to
	367	* return to userland with an errno.
	368	*/
	369	tlock->lf_flags \|= F_ABORT;
	370	}
	371	lf_wakelock(lock, TRUE);
	372	}
	373	}
	374
	375	/*
	376	* Take any lock attempts which are currently blocked by a given lock ("from")
	377	* and mark them as blocked by a different lock ("to"). Used in the case
	378	* where a byte range currently occupied by "from" is to be occupied by "to."
	379	*/
	380	static void
	381	lf_move_blocked(struct lockf to, struct lockf from)
	382	{
	383	struct lockf *tlock;
	384
	385	TAILQ_FOREACH(tlock, &from->lf_blkhd, lf_block) {
	386	tlock->lf_next = to;
	387	}
	388
	389	TAILQ_CONCAT(&to->lf_blkhd, &from->lf_blkhd, lf_block);
	390	}
	391
	392	/*
	393	* lf_coalesce_adjacent
	394	*
	395	* Description: Helper function: when setting a lock, coalesce adjacent
	396	* locks. Needed because adjacent locks are not overlapping,
	397	* but POSIX requires that they be coalesced.
	398	*
	399	* Parameters: lock The new lock which may be adjacent
	400	* to already locked regions, and which
	401	* should therefore be coalesced with them
	402	*
	403	* Returns: <void>
	404	*/
	405	static void
	406	lf_coalesce_adjacent(struct lockf *lock)
	407	{
	408	struct lockf **lf = lock->lf_head;
	409
	410	while (*lf != NOLOCKF) {
	411	/* reject locks that obviously could not be coalesced */
	412	if ((*lf == lock) \|\|
	413	((*lf)->lf_id != lock->lf_id) \|\|
	414	((*lf)->lf_type != lock->lf_type)) {
	415	lf = &(*lf)->lf_next;
	416	continue;
	417	}
	418
	419	/*
	420	* NOTE: Assumes that if two locks are adjacent on the number line
	421	* and belong to the same owner, then they are adjacent on the list.
	422	*/
	423	if ((*lf)->lf_end != -1 &&
	424	((*lf)->lf_end + 1) == lock->lf_start) {
	425	struct lockf adjacent = lf;
	426
	427	LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent previous\n");
	428	lock->lf_start = (*lf)->lf_start;
	429	*lf = lock;
	430	lf = &(*lf)->lf_next;
	431
	432	lf_move_blocked(lock, adjacent);
	433
	434	FREE(adjacent, M_LOCKF);
	435	continue;
	436	}
	437	/* If the lock starts adjacent to us, we can coalesce it */
	438	if (lock->lf_end != -1 &&
	439	(lock->lf_end + 1) == (*lf)->lf_start) {
	440	struct lockf adjacent = lf;
	441
	442	LOCKF_DEBUG(LF_DBG_LIST, "lf_coalesce_adjacent: coalesce adjacent following\n");
	443	lock->lf_end = (*lf)->lf_end;
	444	lock->lf_next = (*lf)->lf_next;
	445	lf = &lock->lf_next;
	446
	447	lf_move_blocked(lock, adjacent);
	448
	449	FREE(adjacent, M_LOCKF);
	450	continue;
	451	}
	452
	453	/* no matching conditions; go on to next lock */
	454	lf = &(*lf)->lf_next;
	455	}
	456	}
	457
	458	/*
	459	* lf_setlock
	460	*
	461	* Description: Set a byte-range lock.
	462	*
	463	* Parameters: lock The lock structure describing the lock
	464	* to be set; allocated by the caller, it
	465	* will be linked into the lock list if
	466	* the set is successful, and freed if the
	467	* set is unsuccessful.
	468	*
	469	* timeout Timeout specified in the case of
	470	* SETLKWTIMEOUT.
	471	*
	472	* Returns: 0 Success
	473	* EAGAIN
	474	* EDEADLK
	475	* lf_split:ENOLCK
	476	* lf_clearlock:ENOLCK
	477	* msleep:EINTR
	478	* msleep:ETIMEDOUT
	479	*
	480	* Notes: We add the lock to the provisional lock list. We do not
	481	* coalesce at this time; this has implications for other lock
	482	* requestors in the blocker search mechanism.
	483	*/
	484	static int
	485	lf_setlock(struct lockf lock, struct timespec timeout)
	486	{
	487	struct lockf *block;
	488	struct lockf **head = lock->lf_head;
	489	struct lockf *prev, overlap, *ltmp;
	490	static char lockstr[] = "lockf";
	491	int priority, needtolink, error;
	492	struct vnode *vp = lock->lf_vnode;
	493	overlap_t ovcase;
	494
	495	#ifdef LOCKF_DEBUGGING
	496	if (lockf_debug & LF_DBG_LOCKOP) {
	497	lf_print("lf_setlock", lock);
	498	lf_printlist("lf_setlock(in)", lock);
	499	}
	500	#endif /* LOCKF_DEBUGGING */
	501
	502	/*
	503	* Set the priority
	504	*/
	505	priority = PLOCK;
	506	if (lock->lf_type == F_WRLCK)
	507	priority += 4;
	508	priority \|= PCATCH;
	509	/*
	510	* Scan lock list for this file looking for locks that would block us.
	511	*/
	512	while ((block = lf_getblock(lock, -1))) {
	513	/*
	514	* Free the structure and return if nonblocking.
	515	*/
	516	if ((lock->lf_flags & F_WAIT) == 0) {
	517	DTRACE_FSINFO(advlock__nowait, vnode_t, vp);
	518	FREE(lock, M_LOCKF);
	519	return (EAGAIN);
	520	}
	521
	522	/*
	523	* We are blocked. Since flock style locks cover
	524	* the whole file, there is no chance for deadlock.
	525	*
	526	* OFD byte-range locks currently do NOT support
	527	* deadlock detection.
	528	*
	529	* For POSIX byte-range locks we must check for deadlock.
	530	*
	531	* Deadlock detection is done by looking through the
	532	* wait channels to see if there are any cycles that
	533	* involve us. MAXDEPTH is set just to make sure we
	534	* do not go off into neverland.
	535	*/
	536	if ((lock->lf_flags & F_POSIX) &&
	537	(block->lf_flags & F_POSIX)) {
	538	struct proc wproc, bproc;
	539	struct uthread *ut;
	540	struct lockf *waitblock;
	541	int i = 0;
	542
	543	/* The block is waiting on something */
	544	wproc = block->lf_owner;
	545	proc_lock(wproc);
	546	TAILQ_FOREACH(ut, &wproc->p_uthlist, uu_list) {
	547	/*
	548	* While the thread is asleep (uu_wchan != 0)
	549	* in this code (uu_wmesg == lockstr)
	550	* and we have not exceeded the maximum cycle
	551	* depth (i < maxlockdepth), then check for a
	552	* cycle to see if the lock is blocked behind
	553	* someone blocked behind us.
	554	*/
	555	while (((waitblock = (struct lockf *)ut->uu_wchan) != NULL) &&
	556	ut->uu_wmesg == lockstr &&
	557	(i++ < maxlockdepth)) {
	558	waitblock = (struct lockf *)ut->uu_wchan;
	559	/*
	560	* Get the lock blocking the lock
	561	* which would block us, and make
	562	* certain it hasn't come unblocked
	563	* (been granted, e.g. between the time
	564	* we called lf_getblock, and the time
	565	* we successfully acquired the
	566	* proc_lock).
	567	*/
	568	waitblock = waitblock->lf_next;
	569	if (waitblock == NULL)
	570	break;
	571
	572	/*
	573	* Make sure it's an advisory range
	574	* lock and not any other kind of lock;
	575	* if we mix lock types, it's our own
	576	* fault.
	577	*/
	578	if ((waitblock->lf_flags & F_POSIX) == 0)
	579	break;
	580
	581	/*
	582	* If the owner of the lock that's
	583	* blocking a lock that's blocking us
	584	* getting the requested lock, then we
	585	* would deadlock, so error out.
	586	*/
	587	bproc = waitblock->lf_owner;
	588	if (bproc == lock->lf_owner) {
	589	proc_unlock(wproc);
	590	FREE(lock, M_LOCKF);
	591	return (EDEADLK);
	592	}
	593	}
	594	}
	595	proc_unlock(wproc);
	596	}
	597
	598	/*
	599	* For flock type locks, we must first remove
	600	* any shared locks that we hold before we sleep
	601	* waiting for an exclusive lock.
	602	*/
	603	if ((lock->lf_flags & F_FLOCK) &&
	604	lock->lf_type == F_WRLCK) {
	605	lock->lf_type = F_UNLCK;
	606	if ((error = lf_clearlock(lock)) != 0) {
	607	FREE(lock, M_LOCKF);
	608	return (error);
	609	}
	610	lock->lf_type = F_WRLCK;
	611	}
	612	/*
	613	* Add our lock to the blocked list and sleep until we're free.
	614	* Remember who blocked us (for deadlock detection).
	615	*/
	616	lock->lf_next = block;
	617	TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
	618
	619	if ( !(lock->lf_flags & F_FLOCK))
	620	block->lf_flags &= ~F_WAKE1_SAFE;
	621
	622	#if IMPORTANCE_INHERITANCE
	623	/*
	624	* Importance donation is done only for cases where the
	625	* owning task can be unambiguously determined.
	626	*
	627	* POSIX type locks are not inherited by child processes;
	628	* we maintain a 1:1 mapping between a lock and its owning
	629	* process.
	630	*
	631	* Flock type locks are inherited across fork() and there is
	632	* no 1:1 mapping in the general case. However, the fileglobs
	633	* used by OFD locks may be confined to the process that
	634	* created them, and thus have an "owner", in which case
	635	* we also attempt importance donation.
	636	*/
	637	if ((lock->lf_flags & block->lf_flags & F_POSIX) != 0)
	638	lf_boost_blocking_proc(lock, block);
	639	else if ((lock->lf_flags & block->lf_flags & F_OFD_LOCK) &&
	640	lock->lf_owner != block->lf_owner &&
	641	NULL != lock->lf_owner && NULL != block->lf_owner)
	642	lf_boost_blocking_proc(lock, block);
	643	#endif /* IMPORTANCE_INHERITANCE */
	644
	645	#ifdef LOCKF_DEBUGGING
	646	if (lockf_debug & LF_DBG_LOCKOP) {
	647	lf_print("lf_setlock: blocking on", block);
	648	lf_printlist("lf_setlock(block)", block);
	649	}
	650	#endif /* LOCKF_DEBUGGING */
	651	DTRACE_FSINFO(advlock__wait, vnode_t, vp);
	652
	653	error = msleep(lock, &vp->v_lock, priority, lockstr, timeout);
	654
	655	if (error == 0 && (lock->lf_flags & F_ABORT) != 0)
	656	error = EBADF;
	657
	658	if (lock->lf_next) {
	659	/*
	660	* lf_wakelock() always sets wakelock->lf_next to
	661	* NULL before a wakeup; so we've been woken early
	662	* - perhaps by a debugger, signal or other event.
	663	*
	664	* Remove 'lock' from the block list (avoids double-add
	665	* in the spurious case, which would create a cycle)
	666	*/
	667	TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
	668	lock->lf_next = NULL;
	669
	670	if (error == 0) {
	671	/*
	672	* If this was a spurious wakeup, retry
	673	*/
	674	printf("%s: spurious wakeup, retrying lock\n",
	675	__func__);
	676	continue;
	677	}
	678	}
	679
	680	if (!TAILQ_EMPTY(&lock->lf_blkhd)) {
	681	if ((block = lf_getblock(lock, -1)) != NULL)
	682	lf_move_blocked(block, lock);
	683	}
	684
	685	if (error) {
	686	if (!TAILQ_EMPTY(&lock->lf_blkhd))
	687	lf_wakelock(lock, TRUE);
	688	FREE(lock, M_LOCKF);
	689	/* Return ETIMEDOUT if timeout occoured. */
	690	if (error == EWOULDBLOCK) {
	691	error = ETIMEDOUT;
	692	}
	693	return (error);
	694	}
	695	}
	696
	697	/*
	698	* No blocks!! Add the lock. Note that we will
	699	* downgrade or upgrade any overlapping locks this
	700	* process already owns.
	701	*
	702	* Skip over locks owned by other processes.
	703	* Handle any locks that overlap and are owned by ourselves.
	704	*/
	705	prev = head;
	706	block = *head;
	707	needtolink = 1;
	708	for (;;) {
	709	ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap);
	710	if (ovcase)
	711	block = overlap->lf_next;
	712	/*
	713	* Six cases:
	714	* 0) no overlap
	715	* 1) overlap == lock
	716	* 2) overlap contains lock
	717	* 3) lock contains overlap
	718	* 4) overlap starts before lock
	719	* 5) overlap ends after lock
	720	*/
	721	switch (ovcase) {
	722	case OVERLAP_NONE:
	723	if (needtolink) {
	724	*prev = lock;
	725	lock->lf_next = overlap;
	726	}
	727	break;
	728
	729	case OVERLAP_EQUALS_LOCK:
	730	/*
	731	* If downgrading lock, others may be
	732	* able to acquire it.
	733	*/
	734	if (lock->lf_type == F_RDLCK &&
	735	overlap->lf_type == F_WRLCK)
	736	lf_wakelock(overlap, TRUE);
	737	overlap->lf_type = lock->lf_type;
	738	FREE(lock, M_LOCKF);
	739	lock = overlap; /* for lf_coalesce_adjacent() */
	740	break;
	741
	742	case OVERLAP_CONTAINS_LOCK:
	743	/*
	744	* Check for common starting point and different types.
	745	*/
	746	if (overlap->lf_type == lock->lf_type) {
	747	FREE(lock, M_LOCKF);
	748	lock = overlap; /* for lf_coalesce_adjacent() */
	749	break;
	750	}
	751	if (overlap->lf_start == lock->lf_start) {
	752	*prev = lock;
	753	lock->lf_next = overlap;
	754	overlap->lf_start = lock->lf_end + 1;
	755	} else {
	756	/*
	757	* If we can't split the lock, we can't
	758	* grant it. Claim a system limit for the
	759	* resource shortage.
	760	*/
	761	if (lf_split(overlap, lock)) {
	762	FREE(lock, M_LOCKF);
	763	return (ENOLCK);
	764	}
	765	}
	766	lf_wakelock(overlap, TRUE);
	767	break;
	768
	769	case OVERLAP_CONTAINED_BY_LOCK:
	770	/*
	771	* If downgrading lock, others may be able to
	772	* acquire it, otherwise take the list.
	773	*/
	774	if (lock->lf_type == F_RDLCK &&
	775	overlap->lf_type == F_WRLCK) {
	776	lf_wakelock(overlap, TRUE);
	777	} else {
	778	while (!TAILQ_EMPTY(&overlap->lf_blkhd)) {
	779	ltmp = TAILQ_FIRST(&overlap->lf_blkhd);
	780	TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
	781	lf_block);
	782	TAILQ_INSERT_TAIL(&lock->lf_blkhd,
	783	ltmp, lf_block);
	784	ltmp->lf_next = lock;
	785	}
	786	}
	787	/*
	788	* Add the new lock if necessary and delete the overlap.
	789	*/
	790	if (needtolink) {
	791	*prev = lock;
	792	lock->lf_next = overlap->lf_next;
	793	prev = &lock->lf_next;
	794	needtolink = 0;
	795	} else
	796	*prev = overlap->lf_next;
	797	FREE(overlap, M_LOCKF);
	798	continue;
	799
	800	case OVERLAP_STARTS_BEFORE_LOCK:
	801	/*
	802	* Add lock after overlap on the list.
	803	*/
	804	lock->lf_next = overlap->lf_next;
	805	overlap->lf_next = lock;
	806	overlap->lf_end = lock->lf_start - 1;
	807	prev = &lock->lf_next;
	808	lf_wakelock(overlap, TRUE);
	809	needtolink = 0;
	810	continue;
	811
	812	case OVERLAP_ENDS_AFTER_LOCK:
	813	/*
	814	* Add the new lock before overlap.
	815	*/
	816	if (needtolink) {
	817	*prev = lock;
	818	lock->lf_next = overlap;
	819	}
	820	overlap->lf_start = lock->lf_end + 1;
	821	lf_wakelock(overlap, TRUE);
	822	break;
	823	}
	824	break;
	825	}
	826	/* Coalesce adjacent locks with identical attributes */
	827	lf_coalesce_adjacent(lock);
	828	#ifdef LOCKF_DEBUGGING
	829	if (lockf_debug & LF_DBG_LOCKOP) {
	830	lf_print("lf_setlock: got the lock", lock);
	831	lf_printlist("lf_setlock(out)", lock);
	832	}
	833	#endif /* LOCKF_DEBUGGING */
	834	return (0);
	835	}
	836
	837
	838	/*
	839	* lf_clearlock
	840	*
	841	* Description: Remove a byte-range lock on an vnode. Generally, find the
	842	* lock (or an overlap to that lock) and remove it (or shrink
	843	* it), then wakeup anyone we can.
	844	*
	845	* Parameters: unlock The lock to clear
	846	*
	847	* Returns: 0 Success
	848	* lf_split:ENOLCK
	849	*
	850	* Notes: A caller may unlock all the locks owned by the caller by
	851	* specifying the entire file range; locks owned by other
	852	* callers are not effected by this operation.
	853	*/
	854	static int
	855	lf_clearlock(struct lockf *unlock)
	856	{
	857	struct lockf **head = unlock->lf_head;
	858	struct lockf lf = head;
	859	struct lockf overlap, *prev;
	860	overlap_t ovcase;
	861
	862	if (lf == NOLOCKF)
	863	return (0);
	864	#ifdef LOCKF_DEBUGGING
	865	if (unlock->lf_type != F_UNLCK)
	866	panic("lf_clearlock: bad type");
	867	if (lockf_debug & LF_DBG_LOCKOP)
	868	lf_print("lf_clearlock", unlock);
	869	#endif /* LOCKF_DEBUGGING */
	870	prev = head;
	871	while ((ovcase = lf_findoverlap(lf, unlock, SELF, &prev, &overlap)) != OVERLAP_NONE) {
	872	/*
	873	* Wakeup the list of locks to be retried.
	874	*/
	875	lf_wakelock(overlap, FALSE);
	876	#if IMPORTANCE_INHERITANCE
	877	if (overlap->lf_boosted == LF_BOOSTED) {
	878	lf_drop_assertion(overlap);
	879	}
	880	#endif /* IMPORTANCE_INHERITANCE */
	881
	882	switch (ovcase) {
	883	case OVERLAP_NONE: /* satisfy compiler enum/switch */
	884	break;
	885
	886	case OVERLAP_EQUALS_LOCK:
	887	*prev = overlap->lf_next;
	888	FREE(overlap, M_LOCKF);
	889	break;
	890
	891	case OVERLAP_CONTAINS_LOCK: /* split it */
	892	if (overlap->lf_start == unlock->lf_start) {
	893	overlap->lf_start = unlock->lf_end + 1;
	894	break;
	895	}
	896	/*
	897	* If we can't split the lock, we can't grant it.
	898	* Claim a system limit for the resource shortage.
	899	*/
	900	if (lf_split(overlap, unlock))
	901	return (ENOLCK);
	902	overlap->lf_next = unlock->lf_next;
	903	break;
	904
	905	case OVERLAP_CONTAINED_BY_LOCK:
	906	*prev = overlap->lf_next;
	907	lf = overlap->lf_next;
	908	FREE(overlap, M_LOCKF);
	909	continue;
	910
	911	case OVERLAP_STARTS_BEFORE_LOCK:
	912	overlap->lf_end = unlock->lf_start - 1;
	913	prev = &overlap->lf_next;
	914	lf = overlap->lf_next;
	915	continue;
	916
	917	case OVERLAP_ENDS_AFTER_LOCK:
	918	overlap->lf_start = unlock->lf_end + 1;
	919	break;
	920	}
	921	break;
	922	}
	923	#ifdef LOCKF_DEBUGGING
	924	if (lockf_debug & LF_DBG_LOCKOP)
	925	lf_printlist("lf_clearlock", unlock);
	926	#endif /* LOCKF_DEBUGGING */
	927	return (0);
	928	}
	929
	930
	931	/*
	932	* lf_getlock
	933	*
	934	* Description: Check whether there is a blocking lock, and if so return
	935	* its process identifier into the lock being requested.
	936	*
	937	* Parameters: lock Pointer to lock to test for blocks
	938	* fl Pointer to flock structure to receive
	939	* the blocking lock information, if a
	940	* blocking lock is found.
	941	* matchpid -1, or pid value to match in lookup.
	942	*
	943	* Returns: 0 Success
	944	*
	945	* Implicit Returns:
	946	* *fl Contents modified to reflect the
	947	* blocking lock, if one is found; not
	948	* modified otherwise
	949	*
	950	* Notes: fl->l_pid will be (-1) for file locks and will only be set to
	951	* the blocking process ID for advisory record locks.
	952	*/
	953	static int
	954	lf_getlock(struct lockf lock, struct flock fl, pid_t matchpid)
	955	{
	956	struct lockf *block;
	957
	958	#ifdef LOCKF_DEBUGGING
	959	if (lockf_debug & LF_DBG_LOCKOP)
	960	lf_print("lf_getlock", lock);
	961	#endif /* LOCKF_DEBUGGING */
	962
	963	if ((block = lf_getblock(lock, matchpid))) {
	964	fl->l_type = block->lf_type;
	965	fl->l_whence = SEEK_SET;
	966	fl->l_start = block->lf_start;
	967	if (block->lf_end == -1)
	968	fl->l_len = 0;
	969	else
	970	fl->l_len = block->lf_end - block->lf_start + 1;
	971	if (NULL != block->lf_owner) {
	972	/*
	973	* lf_owner is only non-NULL when the lock
	974	* "owner" can be unambiguously determined
	975	*/
	976	fl->l_pid = proc_pid(block->lf_owner);
	977	} else
	978	fl->l_pid = -1;
	979	} else {
	980	fl->l_type = F_UNLCK;
	981	}
	982	return (0);
	983	}
	984
	985	/*
	986	* lf_getblock
	987	*
	988	* Description: Walk the list of locks for an inode and return the first
	989	* blocking lock. A lock is considered blocking if we are not
	990	* the lock owner; otherwise, we are permitted to upgrade or
	991	* downgrade it, and it's not considered blocking.
	992	*
	993	* Parameters: lock The lock for which we are interested
	994	* in obtaining the blocking lock, if any
	995	* matchpid -1, or pid value to match in lookup.
	996	*
	997	* Returns: NOLOCKF No blocking lock exists
	998	* !NOLOCKF The address of the blocking lock's
	999	* struct lockf.
	1000	*/
	1001	static struct lockf *
	1002	lf_getblock(struct lockf *lock, pid_t matchpid)
	1003	{
	1004	struct lockf *prev, overlap, lf = (lock->lf_head);
	1005
	1006	for (prev = lock->lf_head;
	1007	lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != OVERLAP_NONE;
	1008	lf = overlap->lf_next) {
	1009	/*
	1010	* Found an overlap.
	1011	*
	1012	* If we're matching pids, and it's a record lock,
	1013	* or it's an OFD lock on a process-confined fd,
	1014	* but the pid doesn't match, then keep on looking ..
	1015	*/
	1016	if (matchpid != -1 &&
	1017	(overlap->lf_flags & (F_POSIX\|F_OFD_LOCK)) != 0 &&
	1018	proc_pid(overlap->lf_owner) != matchpid)
	1019	continue;
	1020
	1021	/*
	1022	* does it block us?
	1023	*/
	1024	if ((lock->lf_type == F_WRLCK \|\| overlap->lf_type == F_WRLCK))
	1025	return (overlap);
	1026	}
	1027	return (NOLOCKF);
	1028	}
	1029
	1030
	1031	/*
	1032	* lf_findoverlap
	1033	*
	1034	* Description: Walk the list of locks to find an overlapping lock (if any).
	1035	*
	1036	* Parameters: lf First lock on lock list
	1037	* lock The lock we are checking for an overlap
	1038	* check Check type
	1039	* prev pointer to pointer pointer to contain
	1040	* address of pointer to previous lock
	1041	* pointer to overlapping lock, if overlap
	1042	* overlap pointer to pointer to contain address
	1043	* of overlapping lock
	1044	*
	1045	* Returns: OVERLAP_NONE
	1046	* OVERLAP_EQUALS_LOCK
	1047	* OVERLAP_CONTAINS_LOCK
	1048	* OVERLAP_CONTAINED_BY_LOCK
	1049	* OVERLAP_STARTS_BEFORE_LOCK
	1050	* OVERLAP_ENDS_AFTER_LOCK
	1051	*
	1052	* Implicit Returns:
	1053	* *prev The address of the next pointer in the
	1054	* lock previous to the overlapping lock;
	1055	* this is generally used to relink the
	1056	* lock list, avoiding a second iteration.
	1057	* *overlap The pointer to the overlapping lock
	1058	* itself; this is used to return data in
	1059	* the check == OTHERS case, and for the
	1060	* caller to modify the overlapping lock,
	1061	* in the check == SELF case
	1062	*
	1063	* Note: This returns only the FIRST overlapping lock. There may be
	1064	* more than one. lf_getlock will return the first blocking lock,
	1065	* while lf_setlock will iterate over all overlapping locks to
	1066	*
	1067	* The check parameter can be SELF, meaning we are looking for
	1068	* overlapping locks owned by us, or it can be OTHERS, meaning
	1069	* we are looking for overlapping locks owned by someone else so
	1070	* we can report a blocking lock on an F_GETLK request.
	1071	*
	1072	* The value of overlap and prev are modified, even if there is
	1073	* no overlapping lock found; always check the return code.
	1074	*/
	1075	static overlap_t
	1076	lf_findoverlap(struct lockf lf, struct lockf lock, int type,
	1077	struct lockf *prev, struct lockf overlap)
	1078	{
	1079	off_t start, end;
	1080	int found_self = 0;
	1081
	1082	*overlap = lf;
	1083	if (lf == NOLOCKF)
	1084	return (0);
	1085	#ifdef LOCKF_DEBUGGING
	1086	if (lockf_debug & LF_DBG_LIST)
	1087	lf_print("lf_findoverlap: looking for overlap in", lock);
	1088	#endif /* LOCKF_DEBUGGING */
	1089	start = lock->lf_start;
	1090	end = lock->lf_end;
	1091	while (lf != NOLOCKF) {
	1092	if (((type & SELF) && lf->lf_id != lock->lf_id) \|\|
	1093	((type & OTHERS) && lf->lf_id == lock->lf_id)) {
	1094	/*
	1095	* Locks belonging to one process are adjacent on the
	1096	* list, so if we've found any locks belonging to us,
	1097	* and we're now seeing something else, then we've
	1098	* examined all "self" locks. Note that bailing out
	1099	* here is quite important; for coalescing, we assume
	1100	* numerically adjacent locks from the same owner to
	1101	* be adjacent on the list.
	1102	*/
	1103	if ((type & SELF) && found_self) {
	1104	return OVERLAP_NONE;
	1105	}
	1106
	1107	*prev = &lf->lf_next;
	1108	*overlap = lf = lf->lf_next;
	1109	continue;
	1110	}
	1111
	1112	if ((type & SELF)) {
	1113	found_self = 1;
	1114	}
	1115
	1116	#ifdef LOCKF_DEBUGGING
	1117	if (lockf_debug & LF_DBG_LIST)
	1118	lf_print("\tchecking", lf);
	1119	#endif /* LOCKF_DEBUGGING */
	1120	/*
	1121	* OK, check for overlap
	1122	*/
	1123	if ((lf->lf_end != -1 && start > lf->lf_end) \|\|
	1124	(end != -1 && lf->lf_start > end)) {
	1125	/* Case 0 */
	1126	LOCKF_DEBUG(LF_DBG_LIST, "no overlap\n");
	1127
	1128	/*
	1129	* NOTE: assumes that locks for the same process are
	1130	* nonintersecting and ordered.
	1131	*/
	1132	if ((type & SELF) && end != -1 && lf->lf_start > end)
	1133	return (OVERLAP_NONE);
	1134	*prev = &lf->lf_next;
	1135	*overlap = lf = lf->lf_next;
	1136	continue;
	1137	}
	1138	if ((lf->lf_start == start) && (lf->lf_end == end)) {
	1139	LOCKF_DEBUG(LF_DBG_LIST, "overlap == lock\n");
	1140	return (OVERLAP_EQUALS_LOCK);
	1141	}
	1142	if ((lf->lf_start <= start) &&
	1143	(end != -1) &&
	1144	((lf->lf_end >= end) \|\| (lf->lf_end == -1))) {
	1145	LOCKF_DEBUG(LF_DBG_LIST, "overlap contains lock\n");
	1146	return (OVERLAP_CONTAINS_LOCK);
	1147	}
	1148	if (start <= lf->lf_start &&
	1149	(end == -1 \|\|
	1150	(lf->lf_end != -1 && end >= lf->lf_end))) {
	1151	LOCKF_DEBUG(LF_DBG_LIST, "lock contains overlap\n");
	1152	return (OVERLAP_CONTAINED_BY_LOCK);
	1153	}
	1154	if ((lf->lf_start < start) &&
	1155	((lf->lf_end >= start) \|\| (lf->lf_end == -1))) {
	1156	LOCKF_DEBUG(LF_DBG_LIST, "overlap starts before lock\n");
	1157	return (OVERLAP_STARTS_BEFORE_LOCK);
	1158	}
	1159	if ((lf->lf_start > start) &&
	1160	(end != -1) &&
	1161	((lf->lf_end > end) \|\| (lf->lf_end == -1))) {
	1162	LOCKF_DEBUG(LF_DBG_LIST, "overlap ends after lock\n");
	1163	return (OVERLAP_ENDS_AFTER_LOCK);
	1164	}
	1165	panic("lf_findoverlap: default");
	1166	}
	1167	return (OVERLAP_NONE);
	1168	}
	1169
	1170
	1171	/*
	1172	* lf_split
	1173	*
	1174	* Description: Split a lock and a contained region into two or three locks
	1175	* as necessary.
	1176	*
	1177	* Parameters: lock1 Lock to split
	1178	* lock2 Overlapping lock region requiring the
	1179	* split (upgrade/downgrade/unlock)
	1180	*
	1181	* Returns: 0 Success
	1182	* ENOLCK No memory for new lock
	1183	*
	1184	* Implicit Returns:
	1185	* *lock1 Modified original lock
	1186	* *lock2 Overlapping lock (inserted into list)
	1187	* (new lock) Potential new lock inserted into list
	1188	* if split results in 3 locks
	1189	*
	1190	* Notes: This operation can only fail if the split would result in three
	1191	* locks, and there is insufficient memory to allocate the third
	1192	* lock; in that case, neither of the locks will be modified.
	1193	*/
	1194	static int
	1195	lf_split(struct lockf lock1, struct lockf lock2)
	1196	{
	1197	struct lockf *splitlock;
	1198
	1199	#ifdef LOCKF_DEBUGGING
	1200	if (lockf_debug & LF_DBG_LIST) {
	1201	lf_print("lf_split", lock1);
	1202	lf_print("splitting from", lock2);
	1203	}
	1204	#endif /* LOCKF_DEBUGGING */
	1205	/*
	1206	* Check to see if splitting into only two pieces.
	1207	*/
	1208	if (lock1->lf_start == lock2->lf_start) {
	1209	lock1->lf_start = lock2->lf_end + 1;
	1210	lock2->lf_next = lock1;
	1211	return (0);
	1212	}
	1213	if (lock1->lf_end == lock2->lf_end) {
	1214	lock1->lf_end = lock2->lf_start - 1;
	1215	lock2->lf_next = lock1->lf_next;
	1216	lock1->lf_next = lock2;
	1217	return (0);
	1218	}
	1219	/*
	1220	* Make a new lock consisting of the last part of
	1221	* the encompassing lock
	1222	*/
	1223	MALLOC(splitlock, struct lockf , sizeof splitlock, M_LOCKF, M_WAITOK);
	1224	if (splitlock == NULL)
	1225	return (ENOLCK);
	1226	bcopy(lock1, splitlock, sizeof *splitlock);
	1227	splitlock->lf_start = lock2->lf_end + 1;
	1228	TAILQ_INIT(&splitlock->lf_blkhd);
	1229	lock1->lf_end = lock2->lf_start - 1;
	1230	/*
	1231	* OK, now link it in
	1232	*/
	1233	splitlock->lf_next = lock1->lf_next;
	1234	lock2->lf_next = splitlock;
	1235	lock1->lf_next = lock2;
	1236
	1237	return (0);
	1238	}
	1239
	1240
	1241	/*
	1242	* lf_wakelock
	1243	*
	1244	* Wakeup a blocklist in the case of a downgrade or unlock, since others
	1245	* waiting on the lock may now be able to acquire it.
	1246	*
	1247	* Parameters: listhead Lock list head on which waiters may
	1248	* have pending locks
	1249	*
	1250	* Returns: <void>
	1251	*
	1252	* Notes: This function iterates a list of locks and wakes all waiters,
	1253	* rather than only waiters for the contended regions. Because
	1254	* of this, for heavily contended files, this can result in a
	1255	* "thundering herd" situation. Refactoring the code could make
	1256	* this operation more efficient, if heavy contention ever results
	1257	* in a real-world performance problem.
	1258	*/
	1259	static void
	1260	lf_wakelock(struct lockf *listhead, boolean_t force_all)
	1261	{
	1262	struct lockf *wakelock;
	1263	boolean_t wake_all = TRUE;
	1264
	1265	if (force_all == FALSE && (listhead->lf_flags & F_WAKE1_SAFE))
	1266	wake_all = FALSE;
	1267
	1268	while (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
	1269	wakelock = TAILQ_FIRST(&listhead->lf_blkhd);
	1270	TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
	1271
	1272	wakelock->lf_next = NOLOCKF;
	1273	#ifdef LOCKF_DEBUGGING
	1274	if (lockf_debug & LF_DBG_LOCKOP)
	1275	lf_print("lf_wakelock: awakening", wakelock);
	1276	#endif /* LOCKF_DEBUGGING */
	1277	if (wake_all == FALSE) {
	1278	/*
	1279	* If there are items on the list head block list,
	1280	* move them to the wakelock list instead, and then
	1281	* correct their lf_next pointers.
	1282	*/
	1283	if (!TAILQ_EMPTY(&listhead->lf_blkhd)) {
	1284	TAILQ_CONCAT(&wakelock->lf_blkhd, &listhead->lf_blkhd, lf_block);
	1285
	1286	struct lockf *tlock;
	1287
	1288	TAILQ_FOREACH(tlock, &wakelock->lf_blkhd, lf_block) {
	1289	if (TAILQ_NEXT(tlock, lf_block) == tlock) {
	1290	/* See rdar://10887303 */
	1291	panic("cycle in wakelock list");
	1292	}
	1293	tlock->lf_next = wakelock;
	1294	}
	1295	}
	1296	}
	1297	wakeup(wakelock);
	1298
	1299	if (wake_all == FALSE)
	1300	break;
	1301	}
	1302	}
	1303
	1304
	1305	#ifdef LOCKF_DEBUGGING
	1306	#define GET_LF_OWNER_PID(lf) (proc_pid((lf)->lf_owner))
	1307
	1308	/*
	1309	* lf_print DEBUG
	1310	*
	1311	* Print out a lock; lock information is prefixed by the string in 'tag'
	1312	*
	1313	* Parameters: tag A string tag for debugging
	1314	* lock The lock whose information should be
	1315	* displayed
	1316	*
	1317	* Returns: <void>
	1318	*/
	1319	void
	1320	lf_print(const char tag, struct lockf lock)
	1321	{
	1322	printf("%s: lock %p for ", tag, (void *)lock);
	1323	if (lock->lf_flags & F_POSIX)
	1324	printf("proc %p (owner %d)",
	1325	lock->lf_id, GET_LF_OWNER_PID(lock));
	1326	else if (lock->lf_flags & F_OFD_LOCK)
	1327	printf("fg %p (owner %d)",
	1328	lock->lf_id, GET_LF_OWNER_PID(lock));
	1329	else
	1330	printf("id %p", (void *)lock->lf_id);
	1331	if (lock->lf_vnode != 0)
	1332	printf(" in vno %p, %s, start 0x%016llx, end 0x%016llx",
	1333	lock->lf_vnode,
	1334	lock->lf_type == F_RDLCK ? "shared" :
	1335	lock->lf_type == F_WRLCK ? "exclusive" :
	1336	lock->lf_type == F_UNLCK ? "unlock" : "unknown",
	1337	(intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
	1338	else
	1339	printf(" %s, start 0x%016llx, end 0x%016llx",
	1340	lock->lf_type == F_RDLCK ? "shared" :
	1341	lock->lf_type == F_WRLCK ? "exclusive" :
	1342	lock->lf_type == F_UNLCK ? "unlock" : "unknown",
	1343	(intmax_t)lock->lf_start, (intmax_t)lock->lf_end);
	1344	if (!TAILQ_EMPTY(&lock->lf_blkhd))
	1345	printf(" block %p\n", (void *)TAILQ_FIRST(&lock->lf_blkhd));
	1346	else
	1347	printf("\n");
	1348	}
	1349
	1350
	1351	/*
	1352	* lf_printlist DEBUG
	1353	*
	1354	* Print out a lock list for the vnode associated with 'lock'; lock information
	1355	* is prefixed by the string in 'tag'
	1356	*
	1357	* Parameters: tag A string tag for debugging
	1358	* lock The lock whose vnode's lock list should
	1359	* be displayed
	1360	*
	1361	* Returns: <void>
	1362	*/
	1363	void
	1364	lf_printlist(const char tag, struct lockf lock)
	1365	{
	1366	struct lockf lf, blk;
	1367
	1368	if (lock->lf_vnode == 0)
	1369	return;
	1370
	1371	printf("%s: Lock list for vno %p:\n",
	1372	tag, lock->lf_vnode);
	1373	for (lf = lock->lf_vnode->v_lockf; lf; lf = lf->lf_next) {
	1374	printf("\tlock %p for ",(void *)lf);
	1375	if (lf->lf_flags & F_POSIX)
	1376	printf("proc %p (owner %d)",
	1377	lf->lf_id, GET_LF_OWNER_PID(lf));
	1378	else if (lf->lf_flags & F_OFD_LOCK)
	1379	printf("fg %p (owner %d)",
	1380	lf->lf_id, GET_LF_OWNER_PID(lf));
	1381	else
	1382	printf("id %p", (void *)lf->lf_id);
	1383	printf(", %s, start 0x%016llx, end 0x%016llx",
	1384	lf->lf_type == F_RDLCK ? "shared" :
	1385	lf->lf_type == F_WRLCK ? "exclusive" :
	1386	lf->lf_type == F_UNLCK ? "unlock" :
	1387	"unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end);
	1388	TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) {
	1389	printf("\n\t\tlock request %p for ", (void *)blk);
	1390	if (blk->lf_flags & F_POSIX)
	1391	printf("proc %p (owner %d)",
	1392	blk->lf_id, GET_LF_OWNER_PID(blk));
	1393	else if (blk->lf_flags & F_OFD_LOCK)
	1394	printf("fg %p (owner %d)",
	1395	blk->lf_id, GET_LF_OWNER_PID(blk));
	1396	else
	1397	printf("id %p", (void *)blk->lf_id);
	1398	printf(", %s, start 0x%016llx, end 0x%016llx",
	1399	blk->lf_type == F_RDLCK ? "shared" :
	1400	blk->lf_type == F_WRLCK ? "exclusive" :
	1401	blk->lf_type == F_UNLCK ? "unlock" :
	1402	"unknown", (intmax_t)blk->lf_start,
	1403	(intmax_t)blk->lf_end);
	1404	if (!TAILQ_EMPTY(&blk->lf_blkhd))
	1405	panic("lf_printlist: bad list");
	1406	}
	1407	printf("\n");
	1408	}
	1409	}
	1410	#endif /* LOCKF_DEBUGGING */
	1411
	1412	#if IMPORTANCE_INHERITANCE
	1413
	1414	/*
	1415	* lf_hold_assertion
	1416	*
	1417	* Call task importance hold assertion on the owner of the lock.
	1418	*
	1419	* Parameters: block_task Owner of the lock blocking
	1420	* current thread.
	1421	*
	1422	* block lock on which the current thread
	1423	* is blocking on.
	1424	*
	1425	* Returns: <void>
	1426	*
	1427	* Notes: The task reference on block_task is not needed to be hold since
	1428	* the current thread has vnode lock and block_task has a file
	1429	* lock, thus removing file lock in exit requires block_task to
	1430	* grab the vnode lock.
	1431	*/
	1432	static void
	1433	lf_hold_assertion(task_t block_task, struct lockf *block)
	1434	{
	1435	if (task_importance_hold_file_lock_assertion(block_task, 1)) {
	1436	block->lf_boosted = LF_BOOSTED;
	1437	LOCKF_DEBUG(LF_DBG_IMPINH,
	1438	"lf: importance hold file lock assert on pid %d lock %p\n",
	1439	proc_pid(block->lf_owner), block);
	1440	}
	1441	}
	1442
	1443
	1444	/*
	1445	* lf_jump_to_queue_head
	1446	*
	1447	* Jump the lock from the tail of the block queue to the head of
	1448	* the queue.
	1449	*
	1450	* Parameters: block lockf struct containing the
	1451	* block queue.
	1452	* lock lockf struct to be jumped to the
	1453	* front.
	1454	*
	1455	* Returns: <void>
	1456	*/
	1457	static void
	1458	lf_jump_to_queue_head(struct lockf block, struct lockf lock)
	1459	{
	1460	/* Move the lock to the head of the block queue. */
	1461	TAILQ_REMOVE(&block->lf_blkhd, lock, lf_block);
	1462	TAILQ_INSERT_HEAD(&block->lf_blkhd, lock, lf_block);
	1463	}
	1464
	1465
	1466	/*
	1467	* lf_drop_assertion
	1468	*
	1469	* Drops the task hold assertion.
	1470	*
	1471	* Parameters: block lockf struct holding the assertion.
	1472	*
	1473	* Returns: <void>
	1474	*/
	1475	static void
	1476	lf_drop_assertion(struct lockf *block)
	1477	{
	1478	LOCKF_DEBUG(LF_DBG_IMPINH, "lf: %d: dropping assertion for lock %p\n",
	1479	proc_pid(block->lf_owner), block);
	1480
	1481	task_t current_task = proc_task(block->lf_owner);
	1482	task_importance_drop_file_lock_assertion(current_task, 1);
	1483	block->lf_boosted = LF_NOT_BOOSTED;
	1484	}
	1485
	1486	static void
	1487	lf_boost_blocking_proc(struct lockf lock, struct lockf block)
	1488	{
	1489	task_t ltask = proc_task(lock->lf_owner);
	1490	task_t btask = proc_task(block->lf_owner);
	1491
	1492	/*
	1493	* Check if ltask can donate importance. The
	1494	* check of imp_donor bit is done without holding
	1495	* any lock. The value may change after you read it,
	1496	* but it is ok to boost a task while someone else is
	1497	* unboosting you.
	1498	*
	1499	* TODO: Support live inheritance on file locks.
	1500	*/
	1501	if (task_is_importance_donor(ltask)) {
	1502	LOCKF_DEBUG(LF_DBG_IMPINH,
	1503	"lf: %d: attempt to boost pid %d that holds lock %p\n",
	1504	proc_pid(lock->lf_owner), proc_pid(block->lf_owner), block);
	1505
	1506	if (block->lf_boosted != LF_BOOSTED &&
	1507	task_is_importance_receiver_type(btask)) {
	1508	lf_hold_assertion(btask, block);
	1509	}
	1510	lf_jump_to_queue_head(block, lock);
	1511	}
	1512	}
	1513	#endif /* IMPORTANCE_INHERITANCE */