git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2002-2010 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*-
	29	* Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
	30	*
	31	* Redistribution and use in source and binary forms, with or without
	32	* modification, are permitted provided that the following conditions
	33	* are met:
	34	* 1. Redistributions of source code must retain the above copyright
	35	* notice, this list of conditions and the following disclaimer.
	36	* 2. Redistributions in binary form must reproduce the above copyright
	37	* notice, this list of conditions and the following disclaimer in the
	38	* documentation and/or other materials provided with the distribution.
	39	* 3. Berkeley Software Design Inc's name may not be used to endorse or
	40	* promote products derived from this software without specific prior
	41	* written permission.
	42	*
	43	* THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
	44	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	45	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	46	* ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
	47	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	48	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	49	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	50	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	51	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	52	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	53	* SUCH DAMAGE.
	54	*
	55	* from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
	56	*/
	57
	58	#include <sys/cdefs.h>
	59	#include <sys/param.h>
	60	#include <sys/systm.h>
	61	#include <sys/fcntl.h>
	62	#include <sys/kernel.h> /* for hz */
	63	#include <sys/file_internal.h>
	64	#include <sys/malloc.h>
	65	#include <sys/lockf.h> /* for hz / / Must come after sys/malloc.h */
	66	#include <sys/kpi_mbuf.h>
	67	#include <sys/mount_internal.h>
	68	#include <sys/proc_internal.h> /* for p_start */
	69	#include <sys/kauth.h>
	70	#include <sys/resourcevar.h>
	71	#include <sys/socket.h>
	72	#include <sys/unistd.h>
	73	#include <sys/user.h>
	74	#include <sys/vnode_internal.h>
	75
	76	#include <kern/thread.h>
	77	#include <kern/host.h>
	78
	79	#include <machine/limits.h>
	80
	81	#include <net/if.h>
	82
	83	#include <nfs/rpcv2.h>
	84	#include <nfs/nfsproto.h>
	85	#include <nfs/nfs.h>
	86	#include <nfs/nfs_gss.h>
	87	#include <nfs/nfsmount.h>
	88	#include <nfs/nfsnode.h>
	89	#include <nfs/nfs_lock.h>
	90
	91	#include <mach/host_priv.h>
	92	#include <mach/mig_errors.h>
	93	#include <mach/host_special_ports.h>
	94	#include <lockd/lockd_mach.h>
	95
	96	extern void ipc_port_release_send(ipc_port_t);
	97
	98	/*
	99	* pending lock request messages are kept in this queue which is
	100	* kept sorted by transaction ID (xid).
	101	*/
	102	static uint64_t nfs_lockxid = 0;
	103	static LOCKD_MSG_QUEUE nfs_pendlockq;
	104
	105	/* list of mounts that are (potentially) making lockd requests */
	106	TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list;
	107
	108	static lck_grp_t *nfs_lock_lck_grp;
	109	static lck_mtx_t *nfs_lock_mutex;
	110
	111	void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
	112	void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
	113	int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST , struct lockd_ans );
	114	LOCKD_MSG_REQUEST nfs_lockdmsg_find_by_answer(struct lockd_ans );
	115	LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
	116	uint64_t nfs_lockxid_get(void);
	117	int nfs_lockd_send_request(LOCKD_MSG *, int);
	118
	119	/*
	120	* initialize global nfs lock state
	121	*/
	122	void
	123	nfs_lockinit(void)
	124	{
	125	TAILQ_INIT(&nfs_pendlockq);
	126	TAILQ_INIT(&nfs_lockd_mount_list);
	127
	128	nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
	129	nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
	130	}
	131
	132	/*
	133	* Register a mount as (potentially) making lockd requests.
	134	*/
	135	void
	136	nfs_lockd_mount_register(struct nfsmount *nmp)
	137	{
	138	lck_mtx_lock(nfs_lock_mutex);
	139	TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
	140	nfs_lockd_mounts++;
	141	lck_mtx_unlock(nfs_lock_mutex);
	142	}
	143
	144	/*
	145	* Unregister a mount as (potentially) making lockd requests.
	146	*
	147	* When the lockd mount count drops to zero, then send a shutdown request to
	148	* lockd if we've sent any requests to it.
	149	*/
	150	void
	151	nfs_lockd_mount_unregister(struct nfsmount *nmp)
	152	{
	153	int send_shutdown;
	154	mach_port_t lockd_port = IPC_PORT_NULL;
	155	kern_return_t kr;
	156
	157	lck_mtx_lock(nfs_lock_mutex);
	158	TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
	159	nfs_lockd_mounts--;
	160
	161	/* send a shutdown request if there are no more lockd mounts */
	162	send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
	163	if (send_shutdown)
	164	nfs_lockd_request_sent = 0;
	165
	166	lck_mtx_unlock(nfs_lock_mutex);
	167
	168	if (!send_shutdown)
	169	return;
	170
	171	/*
	172	* Let lockd know that it is no longer needed for any NFS mounts
	173	*/
	174	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
	175	if ((kr != KERN_SUCCESS) \|\| !IPC_PORT_VALID(lockd_port)) {
	176	printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
	177	kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
	178	(lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
	179	return;
	180	}
	181
	182	kr = lockd_shutdown(lockd_port);
	183	if (kr != KERN_SUCCESS)
	184	printf("nfs_lockd_mount_change: shutdown %d\n", kr);
	185
	186	ipc_port_release_send(lockd_port);
	187	}
	188
	189	/*
	190	* insert a lock request message into the pending queue
	191	* (nfs_lock_mutex must be held)
	192	*/
	193	void
	194	nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
	195	{
	196	LOCKD_MSG_REQUEST *mr;
	197
	198	mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
	199	if (!mr \|\| (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
	200	/* fast path: empty queue or new largest xid */
	201	TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
	202	return;
	203	}
	204	/* slow path: need to walk list to find insertion point */
	205	while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
	206	mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
	207	}
	208	if (mr) {
	209	TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
	210	} else {
	211	TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
	212	}
	213	}
	214
	215	/*
	216	* remove a lock request message from the pending queue
	217	* (nfs_lock_mutex must be held)
	218	*/
	219	void
	220	nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
	221	{
	222	TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
	223	}
	224
	225	/*
	226	* find a pending lock request message by xid
	227	*
	228	* We search from the head of the list assuming that the message we're
	229	* looking for is for an older request (because we have an answer to it).
	230	* This assumes that lock request will be answered primarily in FIFO order.
	231	* However, this may not be the case if there are blocked requests. We may
	232	* want to move blocked requests to a separate queue (but that'll complicate
	233	* duplicate xid checking).
	234	*
	235	* (nfs_lock_mutex must be held)
	236	*/
	237	LOCKD_MSG_REQUEST *
	238	nfs_lockdmsg_find_by_xid(uint64_t lockxid)
	239	{
	240	LOCKD_MSG_REQUEST *mr;
	241
	242	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
	243	if (mr->lmr_msg.lm_xid == lockxid)
	244	return mr;
	245	if (mr->lmr_msg.lm_xid > lockxid)
	246	return NULL;
	247	}
	248	return mr;
	249	}
	250
	251	/*
	252	* Because we can't depend on nlm_granted messages containing the same
	253	* cookie we sent with the original lock request, we need code to test
	254	* if an nlm_granted answer matches the lock request. We also need code
	255	* that can find a lockd message based solely on the nlm_granted answer.
	256	*/
	257
	258	/*
	259	* compare lockd message to answer
	260	*
	261	* returns 0 on equality and 1 if different
	262	*/
	263	int
	264	nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST msgreq, struct lockd_ans ansp)
	265	{
	266	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
	267	return 1;
	268	if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
	269	return 1;
	270	if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
	271	return 1;
	272	if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
	273	return 1;
	274	if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
	275	return 1;
	276	if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
	277	return 1;
	278	return 0;
	279	}
	280
	281	/*
	282	* find a pending lock request message based on the lock info provided
	283	* in the lockd_ans/nlm_granted data. We need this because we can't
	284	* depend on nlm_granted messages containing the same cookie we sent
	285	* with the original lock request.
	286	*
	287	* We search from the head of the list assuming that the message we're
	288	* looking for is for an older request (because we have an answer to it).
	289	* This assumes that lock request will be answered primarily in FIFO order.
	290	* However, this may not be the case if there are blocked requests. We may
	291	* want to move blocked requests to a separate queue (but that'll complicate
	292	* duplicate xid checking).
	293	*
	294	* (nfs_lock_mutex must be held)
	295	*/
	296	LOCKD_MSG_REQUEST *
	297	nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
	298	{
	299	LOCKD_MSG_REQUEST *mr;
	300
	301	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
	302	return NULL;
	303	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
	304	if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
	305	break;
	306	}
	307	return mr;
	308	}
	309
	310	/*
	311	* return the next unique lock request transaction ID
	312	* (nfs_lock_mutex must be held)
	313	*/
	314	uint64_t
	315	nfs_lockxid_get(void)
	316	{
	317	LOCKD_MSG_REQUEST *mr;
	318
	319	/* derive initial lock xid from system time */
	320	if (!nfs_lockxid) {
	321	/*
	322	* Note: it's OK if this code inits nfs_lockxid to 0 (for example,
	323	* due to a broken clock) because we immediately increment it
	324	* and we guarantee to never use xid 0. So, nfs_lockxid should only
	325	* ever be 0 the first time this function is called.
	326	*/
	327	struct timeval tv;
	328	microtime(&tv);
	329	nfs_lockxid = (uint64_t)tv.tv_sec << 12;
	330	}
	331
	332	/* make sure we get a unique xid */
	333	do {
	334	/* Skip zero xid if it should ever happen. */
	335	if (++nfs_lockxid == 0)
	336	nfs_lockxid++;
	337	if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) \|\|
	338	(mr->lmr_msg.lm_xid < nfs_lockxid)) {
	339	/* fast path: empty queue or new largest xid */
	340	break;
	341	}
	342	/* check if xid is already in use */
	343	} while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
	344
	345	return nfs_lockxid;
	346	}
	347
	348	#define MACH_MAX_TRIES 3
	349
	350	int
	351	nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
	352	{
	353	kern_return_t kr;
	354	int retries = 0;
	355	mach_port_t lockd_port = IPC_PORT_NULL;
	356
	357	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
	358	if (kr != KERN_SUCCESS \|\| !IPC_PORT_VALID(lockd_port))
	359	return (ENOTSUP);
	360
	361	do {
	362	/* In the kernel all mach messaging is interruptable */
	363	do {
	364	kr = lockd_request(
	365	lockd_port,
	366	msg->lm_version,
	367	msg->lm_flags,
	368	msg->lm_xid,
	369	msg->lm_fl.l_start,
	370	msg->lm_fl.l_len,
	371	msg->lm_fl.l_pid,
	372	msg->lm_fl.l_type,
	373	msg->lm_fl.l_whence,
	374	(uint32_t *)&msg->lm_addr,
	375	(uint32_t *)&msg->lm_cred,
	376	msg->lm_fh_len,
	377	msg->lm_fh);
	378	if (kr != KERN_SUCCESS)
	379	printf("lockd_request received %d!\n", kr);
	380	} while (!interruptable && kr == MACH_SEND_INTERRUPTED);
	381	} while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
	382
	383	ipc_port_release_send(lockd_port);
	384	switch (kr) {
	385	case MACH_SEND_INTERRUPTED:
	386	return (EINTR);
	387	default:
	388	/*
	389	* Other MACH or MIG errors we will retry. Eventually
	390	* we will call nfs_down and allow the user to disable
	391	* locking.
	392	*/
	393	return (EAGAIN);
	394	}
	395	return (kr);
	396	}
	397
	398
	399	/*
	400	* NFS advisory byte-level locks (client)
	401	*/
	402	int
	403	nfs3_lockd_request(
	404	nfsnode_t np,
	405	int type,
	406	LOCKD_MSG_REQUEST *msgreq,
	407	int flags,
	408	thread_t thd)
	409	{
	410	LOCKD_MSG *msg = &msgreq->lmr_msg;
	411	int error, error2;
	412	int interruptable, slpflag;
	413	struct nfsmount *nmp;
	414	struct timeval now;
	415	int timeo, starttime, endtime, lastmsg, wentdown = 0;
	416	struct timespec ts;
	417	struct sockaddr *saddr;
	418
	419	nmp = NFSTONMP(np);
	420	if (!nmp \|\| !nmp->nm_saddr)
	421	return (ENXIO);
	422
	423	lck_mtx_lock(&nmp->nm_lock);
	424	saddr = nmp->nm_saddr;
	425	bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
	426	if (nmp->nm_vers == NFS_VER3)
	427	msg->lm_flags \|= LOCKD_MSG_NFSV3;
	428
	429	if (nmp->nm_sotype != SOCK_DGRAM)
	430	msg->lm_flags \|= LOCKD_MSG_TCP;
	431
	432	microuptime(&now);
	433	starttime = now.tv_sec;
	434	lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
	435	interruptable = NMFLAG(nmp, INTR);
	436	lck_mtx_unlock(&nmp->nm_lock);
	437
	438	lck_mtx_lock(nfs_lock_mutex);
	439
	440	/* allocate unique xid */
	441	msg->lm_xid = nfs_lockxid_get();
	442	nfs_lockdmsg_enqueue(msgreq);
	443
	444	timeo = 4;
	445
	446	for (;;) {
	447	nfs_lockd_request_sent = 1;
	448
	449	/* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
	450	lck_mtx_unlock(nfs_lock_mutex);
	451	error = nfs_lockd_send_request(msg, interruptable);
	452	lck_mtx_lock(nfs_lock_mutex);
	453	if (error && error != EAGAIN)
	454	break;
	455
	456	/*
	457	* Always wait for an answer. Not waiting for unlocks could
	458	* cause a lock to be left if the unlock request gets dropped.
	459	*/
	460
	461	/*
	462	* Retry if it takes too long to get a response.
	463	*
	464	* The timeout numbers were picked out of thin air... they start
	465	* at 4 and double each timeout with a max of 30 seconds.
	466	*
	467	* In order to maintain responsiveness, we pass a small timeout
	468	* to msleep and calculate the timeouts ourselves. This allows
	469	* us to pick up on mount changes quicker.
	470	*/
	471	wait_for_granted:
	472	error = EWOULDBLOCK;
	473	slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
	474	ts.tv_sec = 2;
	475	ts.tv_nsec = 0;
	476	microuptime(&now);
	477	endtime = now.tv_sec + timeo;
	478	while (now.tv_sec < endtime) {
	479	error = error2 = 0;
	480	if (!msgreq->lmr_answered) {
	481	error = msleep(msgreq, nfs_lock_mutex, slpflag \| PUSER, "lockd", &ts);
	482	slpflag = 0;
	483	}
	484	if (msgreq->lmr_answered) {
	485	/*
	486	* Note: it's possible to have a lock granted at
	487	* essentially the same time that we get interrupted.
	488	* Since the lock may be granted, we can't return an
	489	* error from this request or we might not unlock the
	490	* lock that's been granted.
	491	*/
	492	nmp = NFSTONMP(np);
	493	if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
	494	(nmp->nm_state & NFSSTA_LOCKSWORK)) {
	495	/*
	496	* We have evidence that locks work, yet lockd
	497	* returned ENOTSUP. This is probably because
	498	* it was unable to contact the server's lockd
	499	* to send it the request.
	500	*
	501	* Because we know locks work, we'll consider
	502	* this failure to be a timeout.
	503	*/
	504	error = EWOULDBLOCK;
	505	} else {
	506	error = 0;
	507	}
	508	break;
	509	}
	510	if (error != EWOULDBLOCK)
	511	break;
	512	/* check that we still have our mount... */
	513	/* ...and that we still support locks */
	514	/* ...and that there isn't a recovery pending */
	515	nmp = NFSTONMP(np);
	516	if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
	517	error = error2;
	518	if (type == F_UNLCK)
	519	printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
	520	break;
	521	}
	522	lck_mtx_lock(&nmp->nm_lock);
	523	if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
	524	lck_mtx_unlock(&nmp->nm_lock);
	525	break;
	526	}
	527	if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
	528	/* recovery pending... return an error that'll get this operation restarted */
	529	error = NFSERR_GRACE;
	530	lck_mtx_unlock(&nmp->nm_lock);
	531	break;
	532	}
	533	interruptable = NMFLAG(nmp, INTR);
	534	lck_mtx_unlock(&nmp->nm_lock);
	535	microuptime(&now);
	536	}
	537	if (error) {
	538	/* check that we still have our mount... */
	539	nmp = NFSTONMP(np);
	540	if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
	541	error = error2;
	542	if (error2 != EINTR) {
	543	if (type == F_UNLCK)
	544	printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
	545	break;
	546	}
	547	}
	548	/* ...and that we still support locks */
	549	lck_mtx_lock(&nmp->nm_lock);
	550	if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
	551	if (error == EWOULDBLOCK)
	552	error = ENOTSUP;
	553	lck_mtx_unlock(&nmp->nm_lock);
	554	break;
	555	}
	556	/* ...and that there isn't a recovery pending */
	557	if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
	558	/* recovery pending... return to allow recovery to occur */
	559	error = NFSERR_DENIED;
	560	lck_mtx_unlock(&nmp->nm_lock);
	561	break;
	562	}
	563	interruptable = NMFLAG(nmp, INTR);
	564	if ((error != EWOULDBLOCK) \|\|
	565	((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) \|\|
	566	((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
	567	if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
	568	/* give up if this is for recovery and taking too long */
	569	error = ETIMEDOUT;
	570	} else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
	571	/* recovery pending... return an error that'll get this operation restarted */
	572	error = NFSERR_GRACE;
	573	}
	574	lck_mtx_unlock(&nmp->nm_lock);
	575	/*
	576	* We're going to bail on this request.
	577	* If we were a blocked lock request, send a cancel.
	578	*/
	579	if ((msgreq->lmr_errno == EINPROGRESS) &&
	580	!(msg->lm_flags & LOCKD_MSG_CANCEL)) {
	581	/* set this request up as a cancel */
	582	msg->lm_flags \|= LOCKD_MSG_CANCEL;
	583	nfs_lockdmsg_dequeue(msgreq);
	584	msg->lm_xid = nfs_lockxid_get();
	585	nfs_lockdmsg_enqueue(msgreq);
	586	msgreq->lmr_saved_errno = error;
	587	msgreq->lmr_errno = 0;
	588	msgreq->lmr_answered = 0;
	589	/* reset timeout */
	590	timeo = 2;
	591	/* send cancel request */
	592	continue;
	593	}
	594	break;
	595	}
	596
	597	/* warn if we're not getting any response */
	598	microuptime(&now);
	599	if ((msgreq->lmr_errno != EINPROGRESS) &&
	600	!(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
	601	(nmp->nm_tprintf_initial_delay != 0) &&
	602	((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
	603	lck_mtx_unlock(&nmp->nm_lock);
	604	lastmsg = now.tv_sec;
	605	nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding");
	606	wentdown = 1;
	607	} else
	608	lck_mtx_unlock(&nmp->nm_lock);
	609
	610	if (msgreq->lmr_errno == EINPROGRESS) {
	611	/*
	612	* We've got a blocked lock request that we are
	613	* going to retry. First, we'll want to try to
	614	* send a cancel for the previous request.
	615	*
	616	* Clear errno so if we don't get a response
	617	* to the resend we'll call nfs_down().
	618	* Also reset timeout because we'll expect a
	619	* quick response to the cancel/resend (even if
	620	* it is NLM_BLOCKED).
	621	*/
	622	msg->lm_flags \|= LOCKD_MSG_CANCEL;
	623	nfs_lockdmsg_dequeue(msgreq);
	624	msg->lm_xid = nfs_lockxid_get();
	625	nfs_lockdmsg_enqueue(msgreq);
	626	msgreq->lmr_saved_errno = msgreq->lmr_errno;
	627	msgreq->lmr_errno = 0;
	628	msgreq->lmr_answered = 0;
	629	timeo = 2;
	630	/* send cancel then resend request */
	631	continue;
	632	}
	633
	634	/*
	635	* We timed out, so we will resend the request.
	636	*/
	637	if (!(flags & R_RECOVER))
	638	timeo *= 2;
	639	if (timeo > 30)
	640	timeo = 30;
	641	/* resend request */
	642	continue;
	643	}
	644
	645	/* we got a reponse, so the server's lockd is OK */
	646	nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
	647	wentdown ? "lockd alive again" : NULL);
	648	wentdown = 0;
	649
	650	if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
	651	/*
	652	* The lock request was denied because the server lockd is
	653	* still in its grace period. So, we need to try the
	654	* request again in a little bit. Return the GRACE error so
	655	* the higher levels can perform the retry.
	656	*/
	657	msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
	658	}
	659
	660	if (msgreq->lmr_errno == EINPROGRESS) {
	661	/* got NLM_BLOCKED response */
	662	/* need to wait for NLM_GRANTED */
	663	timeo = 30;
	664	msgreq->lmr_answered = 0;
	665	goto wait_for_granted;
	666	}
	667
	668	if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
	669	(msgreq->lmr_saved_errno == EINPROGRESS)) {
	670	/*
	671	* We just got a successful reply to the
	672	* cancel of the previous blocked lock request.
	673	* Now, go ahead and return a DENIED error so the
	674	* higher levels can resend the request.
	675	*/
	676	msg->lm_flags &= ~LOCKD_MSG_CANCEL;
	677	nfs_lockdmsg_dequeue(msgreq);
	678	error = NFSERR_DENIED;
	679	break;
	680	}
	681
	682	/*
	683	* If the blocked lock request was cancelled.
	684	* Restore the error condition from when we
	685	* originally bailed on the request.
	686	*/
	687	if (msg->lm_flags & LOCKD_MSG_CANCEL) {
	688	msg->lm_flags &= ~LOCKD_MSG_CANCEL;
	689	error = msgreq->lmr_saved_errno;
	690	} else {
	691	error = msgreq->lmr_errno;
	692	}
	693
	694	nmp = NFSTONMP(np);
	695	if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
	696	/*
	697	* We have NO evidence that locks work and lockd
	698	* returned ENOTSUP. Let's take this as a hint
	699	* that locks aren't supported and disable them
	700	* for this mount.
	701	*/
	702	nfs_lockdmsg_dequeue(msgreq);
	703	lck_mtx_unlock(nfs_lock_mutex);
	704	lck_mtx_lock(&nmp->nm_lock);
	705	if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
	706	nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
	707	nfs_lockd_mount_unregister(nmp);
	708	}
	709	nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
	710	lck_mtx_unlock(&nmp->nm_lock);
	711	printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
	712	vfs_statfs(nmp->nm_mountp)->f_mntfromname);
	713	return (error);
	714	}
	715	if (!error) {
	716	/* record that NFS file locking has worked on this mount */
	717	if (nmp) {
	718	lck_mtx_lock(&nmp->nm_lock);
	719	if (!(nmp->nm_state & NFSSTA_LOCKSWORK))
	720	nmp->nm_state \|= NFSSTA_LOCKSWORK;
	721	lck_mtx_unlock(&nmp->nm_lock);
	722	}
	723	}
	724	break;
	725	}
	726
	727	nfs_lockdmsg_dequeue(msgreq);
	728
	729	lck_mtx_unlock(nfs_lock_mutex);
	730
	731	return (error);
	732	}
	733
	734	/*
	735	* Send an NLM LOCK message to the server
	736	*/
	737	int
	738	nfs3_setlock_rpc(
	739	nfsnode_t np,
	740	struct nfs_open_file *nofp,
	741	struct nfs_file_lock *nflp,
	742	int reclaim,
	743	int flags,
	744	thread_t thd,
	745	kauth_cred_t cred)
	746	{
	747	struct nfs_lock_owner *nlop = nflp->nfl_owner;
	748	struct nfsmount *nmp;
	749	int error;
	750	LOCKD_MSG_REQUEST msgreq;
	751	LOCKD_MSG *msg;
	752
	753	nmp = NFSTONMP(np);
	754	if (!nmp)
	755	return (ENXIO);
	756
	757	if (!nlop->nlo_open_owner) {
	758	nfs_open_owner_ref(nofp->nof_owner);
	759	nlop->nlo_open_owner = nofp->nof_owner;
	760	}
	761	if ((error = nfs_lock_owner_set_busy(nlop, thd)))
	762	return (error);
	763
	764	/* set up lock message request structure */
	765	bzero(&msgreq, sizeof(msgreq));
	766	msg = &msgreq.lmr_msg;
	767	msg->lm_version = LOCKD_MSG_VERSION;
	768	if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim)
	769	msg->lm_flags \|= LOCKD_MSG_BLOCK;
	770	if (reclaim)
	771	msg->lm_flags \|= LOCKD_MSG_RECLAIM;
	772	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
	773	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
	774	cru2x(cred, &msg->lm_cred);
	775
	776	msg->lm_fl.l_whence = SEEK_SET;
	777	msg->lm_fl.l_start = nflp->nfl_start;
	778	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
	779	msg->lm_fl.l_type = nflp->nfl_type;
	780	msg->lm_fl.l_pid = nlop->nlo_pid;
	781
	782	error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
	783
	784	nfs_lock_owner_clear_busy(nlop);
	785	return (error);
	786	}
	787
	788	/*
	789	* Send an NLM UNLOCK message to the server
	790	*/
	791	int
	792	nfs3_unlock_rpc(
	793	nfsnode_t np,
	794	struct nfs_lock_owner *nlop,
	795	__unused int type,
	796	uint64_t start,
	797	uint64_t end,
	798	int flags,
	799	thread_t thd,
	800	kauth_cred_t cred)
	801	{
	802	struct nfsmount *nmp;
	803	LOCKD_MSG_REQUEST msgreq;
	804	LOCKD_MSG *msg;
	805
	806	nmp = NFSTONMP(np);
	807	if (!nmp)
	808	return (ENXIO);
	809
	810	/* set up lock message request structure */
	811	bzero(&msgreq, sizeof(msgreq));
	812	msg = &msgreq.lmr_msg;
	813	msg->lm_version = LOCKD_MSG_VERSION;
	814	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
	815	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
	816	cru2x(cred, &msg->lm_cred);
	817
	818	msg->lm_fl.l_whence = SEEK_SET;
	819	msg->lm_fl.l_start = start;
	820	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
	821	msg->lm_fl.l_type = F_UNLCK;
	822	msg->lm_fl.l_pid = nlop->nlo_pid;
	823
	824	return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd));
	825	}
	826
	827	/*
	828	* Send an NLM LOCK TEST message to the server
	829	*/
	830	int
	831	nfs3_getlock_rpc(
	832	nfsnode_t np,
	833	struct nfs_lock_owner *nlop,
	834	struct flock *fl,
	835	uint64_t start,
	836	uint64_t end,
	837	vfs_context_t ctx)
	838	{
	839	struct nfsmount *nmp;
	840	int error;
	841	LOCKD_MSG_REQUEST msgreq;
	842	LOCKD_MSG *msg;
	843
	844	nmp = NFSTONMP(np);
	845	if (!nmp)
	846	return (ENXIO);
	847
	848	/* set up lock message request structure */
	849	bzero(&msgreq, sizeof(msgreq));
	850	msg = &msgreq.lmr_msg;
	851	msg->lm_version = LOCKD_MSG_VERSION;
	852	msg->lm_flags \|= LOCKD_MSG_TEST;
	853	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
	854	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
	855	cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
	856
	857	msg->lm_fl.l_whence = SEEK_SET;
	858	msg->lm_fl.l_start = start;
	859	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
	860	msg->lm_fl.l_type = fl->l_type;
	861	msg->lm_fl.l_pid = nlop->nlo_pid;
	862
	863	error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
	864
	865	if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
	866	if (msg->lm_fl.l_type != F_UNLCK) {
	867	fl->l_type = msg->lm_fl.l_type;
	868	fl->l_pid = msg->lm_fl.l_pid;
	869	fl->l_start = msg->lm_fl.l_start;
	870	fl->l_len = msg->lm_fl.l_len;
	871	fl->l_whence = SEEK_SET;
	872	} else
	873	fl->l_type = F_UNLCK;
	874	}
	875
	876	return (error);
	877	}
	878
	879	/*
	880	* nfslockdans --
	881	* NFS advisory byte-level locks answer from the lock daemon.
	882	*/
	883	int
	884	nfslockdans(proc_t p, struct lockd_ans *ansp)
	885	{
	886	LOCKD_MSG_REQUEST *msgreq;
	887	int error;
	888
	889	/* Let root make this call. */
	890	error = proc_suser(p);
	891	if (error)
	892	return (error);
	893
	894	/* the version should match, or we're out of sync */
	895	if (ansp->la_version != LOCKD_ANS_VERSION)
	896	return (EINVAL);
	897
	898	lck_mtx_lock(nfs_lock_mutex);
	899
	900	/* try to find the lockd message by transaction id (cookie) */
	901	msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
	902	if (ansp->la_flags & LOCKD_ANS_GRANTED) {
	903	/*
	904	* We can't depend on the granted message having our cookie,
	905	* so we check the answer against the lockd message found.
	906	* If no message was found or it doesn't match the answer,
	907	* we look for the lockd message by the answer's lock info.
	908	*/
	909	if (!msgreq \|\| nfs_lockdmsg_compare_to_answer(msgreq, ansp))
	910	msgreq = nfs_lockdmsg_find_by_answer(ansp);
	911	/*
	912	* We need to make sure this request isn't being cancelled
	913	* If it is, we don't want to accept the granted message.
	914	*/
	915	if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
	916	msgreq = NULL;
	917	}
	918	if (!msgreq) {
	919	lck_mtx_unlock(nfs_lock_mutex);
	920	return (EPIPE);
	921	}
	922
	923	msgreq->lmr_errno = ansp->la_errno;
	924	if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
	925	if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
	926	if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
	927	msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
	928	else
	929	msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
	930	msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
	931	msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
	932	msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
	933	} else {
	934	msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
	935	}
	936	}
	937	if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE)
	938	msgreq->lmr_msg.lm_flags \|= LOCKD_MSG_DENIED_GRACE;
	939
	940	msgreq->lmr_answered = 1;
	941	lck_mtx_unlock(nfs_lock_mutex);
	942	wakeup(msgreq);
	943
	944	return (0);
	945	}
	946
	947	/*
	948	* nfslockdnotify --
	949	* NFS host restart notification from the lock daemon.
	950	*
	951	* Used to initiate reclaiming of held locks when a server we
	952	* have mounted reboots.
	953	*/
	954	int
	955	nfslockdnotify(proc_t p, user_addr_t argp)
	956	{
	957	int error, i, headsize;
	958	struct lockd_notify ln;
	959	struct nfsmount *nmp;
	960	struct sockaddr *saddr;
	961
	962	/* Let root make this call. */
	963	error = proc_suser(p);
	964	if (error)
	965	return (error);
	966
	967	headsize = (char)&ln.ln_addr[0] - (char)&ln.ln_version;
	968	error = copyin(argp, &ln, headsize);
	969	if (error)
	970	return (error);
	971	if (ln.ln_version != LOCKD_NOTIFY_VERSION)
	972	return (EINVAL);
	973	if ((ln.ln_addrcount < 1) \|\| (ln.ln_addrcount > 128))
	974	return (EINVAL);
	975	argp += headsize;
	976	saddr = (struct sockaddr *)&ln.ln_addr[0];
	977
	978	lck_mtx_lock(nfs_lock_mutex);
	979
	980	for (i=0; i < ln.ln_addrcount; i++) {
	981	error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
	982	if (error)
	983	break;
	984	argp += sizeof(ln.ln_addr[0]);
	985	/* scan lockd mount list for match to this address */
	986	TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
	987	/* check if address matches this mount's server address */
	988	if (!nmp->nm_saddr \|\| nfs_sockaddr_cmp(saddr, nmp->nm_saddr))
	989	continue;
	990	/* We have a match! Mark it as needing recovery. */
	991	lck_mtx_lock(&nmp->nm_lock);
	992	nfs_need_recover(nmp, 0);
	993	lck_mtx_unlock(&nmp->nm_lock);
	994	}
	995	}
	996
	997	lck_mtx_unlock(nfs_lock_mutex);
	998
	999	return (error);
	1000	}
	1001