git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2002-2014 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*-
	29	* Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
	30	*
	31	* Redistribution and use in source and binary forms, with or without
	32	* modification, are permitted provided that the following conditions
	33	* are met:
	34	* 1. Redistributions of source code must retain the above copyright
	35	* notice, this list of conditions and the following disclaimer.
	36	* 2. Redistributions in binary form must reproduce the above copyright
	37	* notice, this list of conditions and the following disclaimer in the
	38	* documentation and/or other materials provided with the distribution.
	39	* 3. Berkeley Software Design Inc's name may not be used to endorse or
	40	* promote products derived from this software without specific prior
	41	* written permission.
	42	*
	43	* THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
	44	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	45	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	46	* ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
	47	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	48	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	49	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	50	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	51	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	52	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	53	* SUCH DAMAGE.
	54	*
	55	* from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
	56	*/
	57
	58	#include <sys/cdefs.h>
	59	#include <sys/param.h>
	60	#include <sys/systm.h>
	61	#include <sys/fcntl.h>
	62	#include <sys/kernel.h> /* for hz */
	63	#include <sys/file_internal.h>
	64	#include <sys/malloc.h>
	65	#include <sys/lockf.h> /* for hz / / Must come after sys/malloc.h */
	66	#include <sys/kpi_mbuf.h>
	67	#include <sys/mount_internal.h>
	68	#include <sys/proc_internal.h> /* for p_start */
	69	#include <sys/kauth.h>
	70	#include <sys/resourcevar.h>
	71	#include <sys/socket.h>
	72	#include <sys/unistd.h>
	73	#include <sys/user.h>
	74	#include <sys/vnode_internal.h>
	75
	76	#include <kern/thread.h>
	77	#include <kern/host.h>
	78
	79	#include <machine/limits.h>
	80
	81	#include <net/if.h>
	82
	83	#include <nfs/rpcv2.h>
	84	#include <nfs/nfsproto.h>
	85	#include <nfs/nfs.h>
	86	#include <nfs/nfs_gss.h>
	87	#include <nfs/nfsmount.h>
	88	#include <nfs/nfsnode.h>
	89	#include <nfs/nfs_lock.h>
	90
	91	#include <mach/host_priv.h>
	92	#include <mach/mig_errors.h>
	93	#include <mach/host_special_ports.h>
	94	#include <lockd/lockd_mach.h>
	95
	96	extern void ipc_port_release_send(ipc_port_t);
	97
	98	/*
	99	* pending lock request messages are kept in this queue which is
	100	* kept sorted by transaction ID (xid).
	101	*/
	102	static uint64_t nfs_lockxid = 0;
	103	static LOCKD_MSG_QUEUE nfs_pendlockq;
	104
	105	/* list of mounts that are (potentially) making lockd requests */
	106	TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list;
	107
	108	static lck_grp_t *nfs_lock_lck_grp;
	109	static lck_mtx_t *nfs_lock_mutex;
	110
	111	void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
	112	void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
	113	int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST , struct lockd_ans );
	114	LOCKD_MSG_REQUEST nfs_lockdmsg_find_by_answer(struct lockd_ans );
	115	LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
	116	uint64_t nfs_lockxid_get(void);
	117	int nfs_lockd_send_request(LOCKD_MSG *, int);
	118
	119	/*
	120	* initialize global nfs lock state
	121	*/
	122	void
	123	nfs_lockinit(void)
	124	{
	125	TAILQ_INIT(&nfs_pendlockq);
	126	TAILQ_INIT(&nfs_lockd_mount_list);
	127
	128	nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
	129	nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
	130	}
	131
	132	/*
	133	* Register a mount as (potentially) making lockd requests.
	134	*/
	135	void
	136	nfs_lockd_mount_register(struct nfsmount *nmp)
	137	{
	138	lck_mtx_lock(nfs_lock_mutex);
	139	TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
	140	nfs_lockd_mounts++;
	141	lck_mtx_unlock(nfs_lock_mutex);
	142	}
	143
	144	/*
	145	* Unregister a mount as (potentially) making lockd requests.
	146	*
	147	* When the lockd mount count drops to zero, then send a shutdown request to
	148	* lockd if we've sent any requests to it.
	149	*/
	150	void
	151	nfs_lockd_mount_unregister(struct nfsmount *nmp)
	152	{
	153	int send_shutdown;
	154	mach_port_t lockd_port = IPC_PORT_NULL;
	155	kern_return_t kr;
	156
	157	lck_mtx_lock(nfs_lock_mutex);
	158	if (nmp->nm_ldlink.tqe_next == NFSNOLIST) {
	159	lck_mtx_unlock(nfs_lock_mutex);
	160	return;
	161	}
	162
	163	TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
	164	nmp->nm_ldlink.tqe_next = NFSNOLIST;
	165
	166	nfs_lockd_mounts--;
	167
	168	/* send a shutdown request if there are no more lockd mounts */
	169	send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
	170	if (send_shutdown)
	171	nfs_lockd_request_sent = 0;
	172
	173	lck_mtx_unlock(nfs_lock_mutex);
	174
	175	if (!send_shutdown)
	176	return;
	177
	178	/*
	179	* Let lockd know that it is no longer needed for any NFS mounts
	180	*/
	181	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
	182	if ((kr != KERN_SUCCESS) \|\| !IPC_PORT_VALID(lockd_port)) {
	183	printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
	184	kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
	185	(lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
	186	return;
	187	}
	188
	189	kr = lockd_shutdown(lockd_port);
	190	if (kr != KERN_SUCCESS)
	191	printf("nfs_lockd_mount_change: shutdown %d\n", kr);
	192
	193	ipc_port_release_send(lockd_port);
	194	}
	195
	196	/*
	197	* insert a lock request message into the pending queue
	198	* (nfs_lock_mutex must be held)
	199	*/
	200	void
	201	nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
	202	{
	203	LOCKD_MSG_REQUEST *mr;
	204
	205	mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
	206	if (!mr \|\| (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
	207	/* fast path: empty queue or new largest xid */
	208	TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
	209	return;
	210	}
	211	/* slow path: need to walk list to find insertion point */
	212	while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
	213	mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
	214	}
	215	if (mr) {
	216	TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
	217	} else {
	218	TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
	219	}
	220	}
	221
	222	/*
	223	* remove a lock request message from the pending queue
	224	* (nfs_lock_mutex must be held)
	225	*/
	226	void
	227	nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
	228	{
	229	TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
	230	}
	231
	232	/*
	233	* find a pending lock request message by xid
	234	*
	235	* We search from the head of the list assuming that the message we're
	236	* looking for is for an older request (because we have an answer to it).
	237	* This assumes that lock request will be answered primarily in FIFO order.
	238	* However, this may not be the case if there are blocked requests. We may
	239	* want to move blocked requests to a separate queue (but that'll complicate
	240	* duplicate xid checking).
	241	*
	242	* (nfs_lock_mutex must be held)
	243	*/
	244	LOCKD_MSG_REQUEST *
	245	nfs_lockdmsg_find_by_xid(uint64_t lockxid)
	246	{
	247	LOCKD_MSG_REQUEST *mr;
	248
	249	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
	250	if (mr->lmr_msg.lm_xid == lockxid)
	251	return mr;
	252	if (mr->lmr_msg.lm_xid > lockxid)
	253	return NULL;
	254	}
	255	return mr;
	256	}
	257
	258	/*
	259	* Because we can't depend on nlm_granted messages containing the same
	260	* cookie we sent with the original lock request, we need code to test
	261	* if an nlm_granted answer matches the lock request. We also need code
	262	* that can find a lockd message based solely on the nlm_granted answer.
	263	*/
	264
	265	/*
	266	* compare lockd message to answer
	267	*
	268	* returns 0 on equality and 1 if different
	269	*/
	270	int
	271	nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST msgreq, struct lockd_ans ansp)
	272	{
	273	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
	274	return 1;
	275	if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
	276	return 1;
	277	if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
	278	return 1;
	279	if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
	280	return 1;
	281	if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
	282	return 1;
	283	if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
	284	return 1;
	285	return 0;
	286	}
	287
	288	/*
	289	* find a pending lock request message based on the lock info provided
	290	* in the lockd_ans/nlm_granted data. We need this because we can't
	291	* depend on nlm_granted messages containing the same cookie we sent
	292	* with the original lock request.
	293	*
	294	* We search from the head of the list assuming that the message we're
	295	* looking for is for an older request (because we have an answer to it).
	296	* This assumes that lock request will be answered primarily in FIFO order.
	297	* However, this may not be the case if there are blocked requests. We may
	298	* want to move blocked requests to a separate queue (but that'll complicate
	299	* duplicate xid checking).
	300	*
	301	* (nfs_lock_mutex must be held)
	302	*/
	303	LOCKD_MSG_REQUEST *
	304	nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
	305	{
	306	LOCKD_MSG_REQUEST *mr;
	307
	308	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
	309	return NULL;
	310	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
	311	if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
	312	break;
	313	}
	314	return mr;
	315	}
	316
	317	/*
	318	* return the next unique lock request transaction ID
	319	* (nfs_lock_mutex must be held)
	320	*/
	321	uint64_t
	322	nfs_lockxid_get(void)
	323	{
	324	LOCKD_MSG_REQUEST *mr;
	325
	326	/* derive initial lock xid from system time */
	327	if (!nfs_lockxid) {
	328	/*
	329	* Note: it's OK if this code inits nfs_lockxid to 0 (for example,
	330	* due to a broken clock) because we immediately increment it
	331	* and we guarantee to never use xid 0. So, nfs_lockxid should only
	332	* ever be 0 the first time this function is called.
	333	*/
	334	struct timeval tv;
	335	microtime(&tv);
	336	nfs_lockxid = (uint64_t)tv.tv_sec << 12;
	337	}
	338
	339	/* make sure we get a unique xid */
	340	do {
	341	/* Skip zero xid if it should ever happen. */
	342	if (++nfs_lockxid == 0)
	343	nfs_lockxid++;
	344	if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) \|\|
	345	(mr->lmr_msg.lm_xid < nfs_lockxid)) {
	346	/* fast path: empty queue or new largest xid */
	347	break;
	348	}
	349	/* check if xid is already in use */
	350	} while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
	351
	352	return nfs_lockxid;
	353	}
	354
	355	#define MACH_MAX_TRIES 3
	356
	357	int
	358	nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
	359	{
	360	kern_return_t kr;
	361	int retries = 0;
	362	mach_port_t lockd_port = IPC_PORT_NULL;
	363
	364	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
	365	if (kr != KERN_SUCCESS \|\| !IPC_PORT_VALID(lockd_port))
	366	return (ENOTSUP);
	367
	368	do {
	369	/* In the kernel all mach messaging is interruptable */
	370	do {
	371	kr = lockd_request(
	372	lockd_port,
	373	msg->lm_version,
	374	msg->lm_flags,
	375	msg->lm_xid,
	376	msg->lm_fl.l_start,
	377	msg->lm_fl.l_len,
	378	msg->lm_fl.l_pid,
	379	msg->lm_fl.l_type,
	380	msg->lm_fl.l_whence,
	381	(uint32_t *)&msg->lm_addr,
	382	(uint32_t *)&msg->lm_cred,
	383	msg->lm_fh_len,
	384	msg->lm_fh);
	385	if (kr != KERN_SUCCESS)
	386	printf("lockd_request received %d!\n", kr);
	387	} while (!interruptable && kr == MACH_SEND_INTERRUPTED);
	388	} while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
	389
	390	ipc_port_release_send(lockd_port);
	391	switch (kr) {
	392	case MACH_SEND_INTERRUPTED:
	393	return (EINTR);
	394	default:
	395	/*
	396	* Other MACH or MIG errors we will retry. Eventually
	397	* we will call nfs_down and allow the user to disable
	398	* locking.
	399	*/
	400	return (EAGAIN);
	401	}
	402	return (kr);
	403	}
	404
	405
	406	/*
	407	* NFS advisory byte-level locks (client)
	408	*/
	409	int
	410	nfs3_lockd_request(
	411	nfsnode_t np,
	412	int type,
	413	LOCKD_MSG_REQUEST *msgreq,
	414	int flags,
	415	thread_t thd)
	416	{
	417	LOCKD_MSG *msg = &msgreq->lmr_msg;
	418	int error, error2;
	419	int interruptable, slpflag;
	420	struct nfsmount *nmp;
	421	struct timeval now;
	422	int timeo, starttime, endtime, lastmsg, wentdown = 0;
	423	struct timespec ts;
	424	struct sockaddr *saddr;
	425
	426	nmp = NFSTONMP(np);
	427	if (!nmp \|\| !nmp->nm_saddr)
	428	return (ENXIO);
	429
	430	lck_mtx_lock(&nmp->nm_lock);
	431	saddr = nmp->nm_saddr;
	432	bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
	433	if (nmp->nm_vers == NFS_VER3)
	434	msg->lm_flags \|= LOCKD_MSG_NFSV3;
	435
	436	if (nmp->nm_sotype != SOCK_DGRAM)
	437	msg->lm_flags \|= LOCKD_MSG_TCP;
	438
	439	microuptime(&now);
	440	starttime = now.tv_sec;
	441	lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
	442	interruptable = NMFLAG(nmp, INTR);
	443	lck_mtx_unlock(&nmp->nm_lock);
	444
	445	lck_mtx_lock(nfs_lock_mutex);
	446
	447	/* allocate unique xid */
	448	msg->lm_xid = nfs_lockxid_get();
	449	nfs_lockdmsg_enqueue(msgreq);
	450
	451	timeo = 4;
	452
	453	for (;;) {
	454	nfs_lockd_request_sent = 1;
	455
	456	/* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
	457	lck_mtx_unlock(nfs_lock_mutex);
	458	error = nfs_lockd_send_request(msg, interruptable);
	459	lck_mtx_lock(nfs_lock_mutex);
	460	if (error && error != EAGAIN)
	461	break;
	462
	463	/*
	464	* Always wait for an answer. Not waiting for unlocks could
	465	* cause a lock to be left if the unlock request gets dropped.
	466	*/
	467
	468	/*
	469	* Retry if it takes too long to get a response.
	470	*
	471	* The timeout numbers were picked out of thin air... they start
	472	* at 4 and double each timeout with a max of 30 seconds.
	473	*
	474	* In order to maintain responsiveness, we pass a small timeout
	475	* to msleep and calculate the timeouts ourselves. This allows
	476	* us to pick up on mount changes quicker.
	477	*/
	478	wait_for_granted:
	479	error = EWOULDBLOCK;
	480	slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
	481	ts.tv_sec = 2;
	482	ts.tv_nsec = 0;
	483	microuptime(&now);
	484	endtime = now.tv_sec + timeo;
	485	while (now.tv_sec < endtime) {
	486	error = error2 = 0;
	487	if (!msgreq->lmr_answered) {
	488	error = msleep(msgreq, nfs_lock_mutex, slpflag \| PUSER, "lockd", &ts);
	489	slpflag = 0;
	490	}
	491	if (msgreq->lmr_answered) {
	492	/*
	493	* Note: it's possible to have a lock granted at
	494	* essentially the same time that we get interrupted.
	495	* Since the lock may be granted, we can't return an
	496	* error from this request or we might not unlock the
	497	* lock that's been granted.
	498	*/
	499	nmp = NFSTONMP(np);
	500	if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
	501	(nmp->nm_state & NFSSTA_LOCKSWORK)) {
	502	/*
	503	* We have evidence that locks work, yet lockd
	504	* returned ENOTSUP. This is probably because
	505	* it was unable to contact the server's lockd
	506	* to send it the request.
	507	*
	508	* Because we know locks work, we'll consider
	509	* this failure to be a timeout.
	510	*/
	511	error = EWOULDBLOCK;
	512	} else {
	513	error = 0;
	514	}
	515	break;
	516	}
	517	if (error != EWOULDBLOCK)
	518	break;
	519	/* check that we still have our mount... */
	520	/* ...and that we still support locks */
	521	/* ...and that there isn't a recovery pending */
	522	nmp = NFSTONMP(np);
	523	if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
	524	error = error2;
	525	if (type == F_UNLCK)
	526	printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
	527	break;
	528	}
	529	lck_mtx_lock(&nmp->nm_lock);
	530	if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
	531	lck_mtx_unlock(&nmp->nm_lock);
	532	break;
	533	}
	534	if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
	535	/* recovery pending... return an error that'll get this operation restarted */
	536	error = NFSERR_GRACE;
	537	lck_mtx_unlock(&nmp->nm_lock);
	538	break;
	539	}
	540	interruptable = NMFLAG(nmp, INTR);
	541	lck_mtx_unlock(&nmp->nm_lock);
	542	microuptime(&now);
	543	}
	544	if (error) {
	545	/* check that we still have our mount... */
	546	nmp = NFSTONMP(np);
	547	if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
	548	error = error2;
	549	if (error2 != EINTR) {
	550	if (type == F_UNLCK)
	551	printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
	552	break;
	553	}
	554	}
	555	/* ...and that we still support locks */
	556	lck_mtx_lock(&nmp->nm_lock);
	557	if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
	558	if (error == EWOULDBLOCK)
	559	error = ENOTSUP;
	560	lck_mtx_unlock(&nmp->nm_lock);
	561	break;
	562	}
	563	/* ...and that there isn't a recovery pending */
	564	if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
	565	/* recovery pending... return to allow recovery to occur */
	566	error = NFSERR_DENIED;
	567	lck_mtx_unlock(&nmp->nm_lock);
	568	break;
	569	}
	570	interruptable = NMFLAG(nmp, INTR);
	571	if ((error != EWOULDBLOCK) \|\|
	572	((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) \|\|
	573	((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
	574	if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
	575	/* give up if this is for recovery and taking too long */
	576	error = ETIMEDOUT;
	577	} else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
	578	/* recovery pending... return an error that'll get this operation restarted */
	579	error = NFSERR_GRACE;
	580	}
	581	lck_mtx_unlock(&nmp->nm_lock);
	582	/*
	583	* We're going to bail on this request.
	584	* If we were a blocked lock request, send a cancel.
	585	*/
	586	if ((msgreq->lmr_errno == EINPROGRESS) &&
	587	!(msg->lm_flags & LOCKD_MSG_CANCEL)) {
	588	/* set this request up as a cancel */
	589	msg->lm_flags \|= LOCKD_MSG_CANCEL;
	590	nfs_lockdmsg_dequeue(msgreq);
	591	msg->lm_xid = nfs_lockxid_get();
	592	nfs_lockdmsg_enqueue(msgreq);
	593	msgreq->lmr_saved_errno = error;
	594	msgreq->lmr_errno = 0;
	595	msgreq->lmr_answered = 0;
	596	/* reset timeout */
	597	timeo = 2;
	598	/* send cancel request */
	599	continue;
	600	}
	601	break;
	602	}
	603
	604	/* warn if we're not getting any response */
	605	microuptime(&now);
	606	if ((msgreq->lmr_errno != EINPROGRESS) &&
	607	!(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
	608	(nmp->nm_tprintf_initial_delay != 0) &&
	609	((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
	610	lck_mtx_unlock(&nmp->nm_lock);
	611	lastmsg = now.tv_sec;
	612	nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding", 0);
	613	wentdown = 1;
	614	} else
	615	lck_mtx_unlock(&nmp->nm_lock);
	616
	617	if (msgreq->lmr_errno == EINPROGRESS) {
	618	/*
	619	* We've got a blocked lock request that we are
	620	* going to retry. First, we'll want to try to
	621	* send a cancel for the previous request.
	622	*
	623	* Clear errno so if we don't get a response
	624	* to the resend we'll call nfs_down().
	625	* Also reset timeout because we'll expect a
	626	* quick response to the cancel/resend (even if
	627	* it is NLM_BLOCKED).
	628	*/
	629	msg->lm_flags \|= LOCKD_MSG_CANCEL;
	630	nfs_lockdmsg_dequeue(msgreq);
	631	msg->lm_xid = nfs_lockxid_get();
	632	nfs_lockdmsg_enqueue(msgreq);
	633	msgreq->lmr_saved_errno = msgreq->lmr_errno;
	634	msgreq->lmr_errno = 0;
	635	msgreq->lmr_answered = 0;
	636	timeo = 2;
	637	/* send cancel then resend request */
	638	continue;
	639	}
	640
	641	/*
	642	* We timed out, so we will resend the request.
	643	*/
	644	if (!(flags & R_RECOVER))
	645	timeo *= 2;
	646	if (timeo > 30)
	647	timeo = 30;
	648	/* resend request */
	649	continue;
	650	}
	651
	652	/* we got a reponse, so the server's lockd is OK */
	653	nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
	654	wentdown ? "lockd alive again" : NULL);
	655	wentdown = 0;
	656
	657	if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
	658	/*
	659	* The lock request was denied because the server lockd is
	660	* still in its grace period. So, we need to try the
	661	* request again in a little bit. Return the GRACE error so
	662	* the higher levels can perform the retry.
	663	*/
	664	msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
	665	}
	666
	667	if (msgreq->lmr_errno == EINPROGRESS) {
	668	/* got NLM_BLOCKED response */
	669	/* need to wait for NLM_GRANTED */
	670	timeo = 30;
	671	msgreq->lmr_answered = 0;
	672	goto wait_for_granted;
	673	}
	674
	675	if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
	676	(msgreq->lmr_saved_errno == EINPROGRESS)) {
	677	/*
	678	* We just got a successful reply to the
	679	* cancel of the previous blocked lock request.
	680	* Now, go ahead and return a DENIED error so the
	681	* higher levels can resend the request.
	682	*/
	683	msg->lm_flags &= ~LOCKD_MSG_CANCEL;
	684	nfs_lockdmsg_dequeue(msgreq);
	685	error = NFSERR_DENIED;
	686	break;
	687	}
	688
	689	/*
	690	* If the blocked lock request was cancelled.
	691	* Restore the error condition from when we
	692	* originally bailed on the request.
	693	*/
	694	if (msg->lm_flags & LOCKD_MSG_CANCEL) {
	695	msg->lm_flags &= ~LOCKD_MSG_CANCEL;
	696	error = msgreq->lmr_saved_errno;
	697	} else {
	698	error = msgreq->lmr_errno;
	699	}
	700
	701	nmp = NFSTONMP(np);
	702	if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
	703	/*
	704	* We have NO evidence that locks work and lockd
	705	* returned ENOTSUP. Let's take this as a hint
	706	* that locks aren't supported and disable them
	707	* for this mount.
	708	*/
	709	nfs_lockdmsg_dequeue(msgreq);
	710	lck_mtx_unlock(nfs_lock_mutex);
	711	lck_mtx_lock(&nmp->nm_lock);
	712	if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
	713	nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
	714	nfs_lockd_mount_unregister(nmp);
	715	}
	716	nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
	717	lck_mtx_unlock(&nmp->nm_lock);
	718	printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
	719	vfs_statfs(nmp->nm_mountp)->f_mntfromname);
	720	return (error);
	721	}
	722	if (!error) {
	723	/* record that NFS file locking has worked on this mount */
	724	if (nmp) {
	725	lck_mtx_lock(&nmp->nm_lock);
	726	if (!(nmp->nm_state & NFSSTA_LOCKSWORK))
	727	nmp->nm_state \|= NFSSTA_LOCKSWORK;
	728	lck_mtx_unlock(&nmp->nm_lock);
	729	}
	730	}
	731	break;
	732	}
	733
	734	nfs_lockdmsg_dequeue(msgreq);
	735
	736	lck_mtx_unlock(nfs_lock_mutex);
	737
	738	return (error);
	739	}
	740
	741	/*
	742	* Send an NLM LOCK message to the server
	743	*/
	744	int
	745	nfs3_setlock_rpc(
	746	nfsnode_t np,
	747	struct nfs_open_file *nofp,
	748	struct nfs_file_lock *nflp,
	749	int reclaim,
	750	int flags,
	751	thread_t thd,
	752	kauth_cred_t cred)
	753	{
	754	struct nfs_lock_owner *nlop = nflp->nfl_owner;
	755	struct nfsmount *nmp;
	756	int error;
	757	LOCKD_MSG_REQUEST msgreq;
	758	LOCKD_MSG *msg;
	759
	760	nmp = NFSTONMP(np);
	761	if (nfs_mount_gone(nmp))
	762	return (ENXIO);
	763
	764	if (!nlop->nlo_open_owner) {
	765	nfs_open_owner_ref(nofp->nof_owner);
	766	nlop->nlo_open_owner = nofp->nof_owner;
	767	}
	768	if ((error = nfs_lock_owner_set_busy(nlop, thd)))
	769	return (error);
	770
	771	/* set up lock message request structure */
	772	bzero(&msgreq, sizeof(msgreq));
	773	msg = &msgreq.lmr_msg;
	774	msg->lm_version = LOCKD_MSG_VERSION;
	775	if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim)
	776	msg->lm_flags \|= LOCKD_MSG_BLOCK;
	777	if (reclaim)
	778	msg->lm_flags \|= LOCKD_MSG_RECLAIM;
	779	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
	780	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
	781	cru2x(cred, &msg->lm_cred);
	782
	783	msg->lm_fl.l_whence = SEEK_SET;
	784	msg->lm_fl.l_start = nflp->nfl_start;
	785	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
	786	msg->lm_fl.l_type = nflp->nfl_type;
	787	msg->lm_fl.l_pid = nlop->nlo_pid;
	788
	789	error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
	790
	791	nfs_lock_owner_clear_busy(nlop);
	792	return (error);
	793	}
	794
	795	/*
	796	* Send an NLM UNLOCK message to the server
	797	*/
	798	int
	799	nfs3_unlock_rpc(
	800	nfsnode_t np,
	801	struct nfs_lock_owner *nlop,
	802	__unused int type,
	803	uint64_t start,
	804	uint64_t end,
	805	int flags,
	806	thread_t thd,
	807	kauth_cred_t cred)
	808	{
	809	struct nfsmount *nmp;
	810	LOCKD_MSG_REQUEST msgreq;
	811	LOCKD_MSG *msg;
	812
	813	nmp = NFSTONMP(np);
	814	if (!nmp)
	815	return (ENXIO);
	816
	817	/* set up lock message request structure */
	818	bzero(&msgreq, sizeof(msgreq));
	819	msg = &msgreq.lmr_msg;
	820	msg->lm_version = LOCKD_MSG_VERSION;
	821	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
	822	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
	823	cru2x(cred, &msg->lm_cred);
	824
	825	msg->lm_fl.l_whence = SEEK_SET;
	826	msg->lm_fl.l_start = start;
	827	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
	828	msg->lm_fl.l_type = F_UNLCK;
	829	msg->lm_fl.l_pid = nlop->nlo_pid;
	830
	831	return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd));
	832	}
	833
	834	/*
	835	* Send an NLM LOCK TEST message to the server
	836	*/
	837	int
	838	nfs3_getlock_rpc(
	839	nfsnode_t np,
	840	struct nfs_lock_owner *nlop,
	841	struct flock *fl,
	842	uint64_t start,
	843	uint64_t end,
	844	vfs_context_t ctx)
	845	{
	846	struct nfsmount *nmp;
	847	int error;
	848	LOCKD_MSG_REQUEST msgreq;
	849	LOCKD_MSG *msg;
	850
	851	nmp = NFSTONMP(np);
	852	if (nfs_mount_gone(nmp))
	853	return (ENXIO);
	854
	855	/* set up lock message request structure */
	856	bzero(&msgreq, sizeof(msgreq));
	857	msg = &msgreq.lmr_msg;
	858	msg->lm_version = LOCKD_MSG_VERSION;
	859	msg->lm_flags \|= LOCKD_MSG_TEST;
	860	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
	861	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
	862	cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
	863
	864	msg->lm_fl.l_whence = SEEK_SET;
	865	msg->lm_fl.l_start = start;
	866	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
	867	msg->lm_fl.l_type = fl->l_type;
	868	msg->lm_fl.l_pid = nlop->nlo_pid;
	869
	870	error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
	871
	872	if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
	873	if (msg->lm_fl.l_type != F_UNLCK) {
	874	fl->l_type = msg->lm_fl.l_type;
	875	fl->l_pid = msg->lm_fl.l_pid;
	876	fl->l_start = msg->lm_fl.l_start;
	877	fl->l_len = msg->lm_fl.l_len;
	878	fl->l_whence = SEEK_SET;
	879	} else
	880	fl->l_type = F_UNLCK;
	881	}
	882
	883	return (error);
	884	}
	885
	886	/*
	887	* nfslockdans --
	888	* NFS advisory byte-level locks answer from the lock daemon.
	889	*/
	890	int
	891	nfslockdans(proc_t p, struct lockd_ans *ansp)
	892	{
	893	LOCKD_MSG_REQUEST *msgreq;
	894	int error;
	895
	896	/* Let root make this call. */
	897	error = proc_suser(p);
	898	if (error)
	899	return (error);
	900
	901	/* the version should match, or we're out of sync */
	902	if (ansp->la_version != LOCKD_ANS_VERSION)
	903	return (EINVAL);
	904
	905	lck_mtx_lock(nfs_lock_mutex);
	906
	907	/* try to find the lockd message by transaction id (cookie) */
	908	msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
	909	if (ansp->la_flags & LOCKD_ANS_GRANTED) {
	910	/*
	911	* We can't depend on the granted message having our cookie,
	912	* so we check the answer against the lockd message found.
	913	* If no message was found or it doesn't match the answer,
	914	* we look for the lockd message by the answer's lock info.
	915	*/
	916	if (!msgreq \|\| nfs_lockdmsg_compare_to_answer(msgreq, ansp))
	917	msgreq = nfs_lockdmsg_find_by_answer(ansp);
	918	/*
	919	* We need to make sure this request isn't being cancelled
	920	* If it is, we don't want to accept the granted message.
	921	*/
	922	if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
	923	msgreq = NULL;
	924	}
	925	if (!msgreq) {
	926	lck_mtx_unlock(nfs_lock_mutex);
	927	return (EPIPE);
	928	}
	929
	930	msgreq->lmr_errno = ansp->la_errno;
	931	if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
	932	if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
	933	if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
	934	msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
	935	else
	936	msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
	937	msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
	938	msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
	939	msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
	940	} else {
	941	msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
	942	}
	943	}
	944	if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE)
	945	msgreq->lmr_msg.lm_flags \|= LOCKD_MSG_DENIED_GRACE;
	946
	947	msgreq->lmr_answered = 1;
	948	lck_mtx_unlock(nfs_lock_mutex);
	949	wakeup(msgreq);
	950
	951	return (0);
	952	}
	953
	954	/*
	955	* nfslockdnotify --
	956	* NFS host restart notification from the lock daemon.
	957	*
	958	* Used to initiate reclaiming of held locks when a server we
	959	* have mounted reboots.
	960	*/
	961	int
	962	nfslockdnotify(proc_t p, user_addr_t argp)
	963	{
	964	int error, i, headsize;
	965	struct lockd_notify ln;
	966	struct nfsmount *nmp;
	967	struct sockaddr *saddr;
	968
	969	/* Let root make this call. */
	970	error = proc_suser(p);
	971	if (error)
	972	return (error);
	973
	974	headsize = (char)&ln.ln_addr[0] - (char)&ln.ln_version;
	975	error = copyin(argp, &ln, headsize);
	976	if (error)
	977	return (error);
	978	if (ln.ln_version != LOCKD_NOTIFY_VERSION)
	979	return (EINVAL);
	980	if ((ln.ln_addrcount < 1) \|\| (ln.ln_addrcount > 128))
	981	return (EINVAL);
	982	argp += headsize;
	983	saddr = (struct sockaddr *)&ln.ln_addr[0];
	984
	985	lck_mtx_lock(nfs_lock_mutex);
	986
	987	for (i=0; i < ln.ln_addrcount; i++) {
	988	error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
	989	if (error)
	990	break;
	991	argp += sizeof(ln.ln_addr[0]);
	992	/* scan lockd mount list for match to this address */
	993	TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
	994	/* check if address matches this mount's server address */
	995	if (!nmp->nm_saddr \|\| nfs_sockaddr_cmp(saddr, nmp->nm_saddr))
	996	continue;
	997	/* We have a match! Mark it as needing recovery. */
	998	lck_mtx_lock(&nmp->nm_lock);
	999	nfs_need_recover(nmp, 0);
	1000	lck_mtx_unlock(&nmp->nm_lock);
	1001	}
	1002	}
	1003
	1004	lck_mtx_unlock(nfs_lock_mutex);
	1005
	1006	return (error);
	1007	}
	1008