git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2002-2016 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*-
	29	* Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
	30	*
	31	* Redistribution and use in source and binary forms, with or without
	32	* modification, are permitted provided that the following conditions
	33	* are met:
	34	* 1. Redistributions of source code must retain the above copyright
	35	* notice, this list of conditions and the following disclaimer.
	36	* 2. Redistributions in binary form must reproduce the above copyright
	37	* notice, this list of conditions and the following disclaimer in the
	38	* documentation and/or other materials provided with the distribution.
	39	* 3. Berkeley Software Design Inc's name may not be used to endorse or
	40	* promote products derived from this software without specific prior
	41	* written permission.
	42	*
	43	* THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
	44	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	45	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	46	* ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
	47	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	48	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	49	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	50	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	51	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	52	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	53	* SUCH DAMAGE.
	54	*
	55	* from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
	56	*/
	57
	58	#include <sys/cdefs.h>
	59	#include <sys/param.h>
	60	#include <sys/systm.h>
	61	#include <sys/fcntl.h>
	62	#include <sys/kernel.h> /* for hz */
	63	#include <sys/file_internal.h>
	64	#include <sys/malloc.h>
	65	#include <sys/lockf.h> /* for hz / / Must come after sys/malloc.h */
	66	#include <sys/kpi_mbuf.h>
	67	#include <sys/mount_internal.h>
	68	#include <sys/proc_internal.h> /* for p_start */
	69	#include <sys/kauth.h>
	70	#include <sys/resourcevar.h>
	71	#include <sys/socket.h>
	72	#include <sys/unistd.h>
	73	#include <sys/user.h>
	74	#include <sys/vnode_internal.h>
	75
	76	#include <kern/thread.h>
	77	#include <kern/host.h>
	78
	79	#include <machine/limits.h>
	80
	81	#include <net/if.h>
	82
	83	#include <nfs/rpcv2.h>
	84	#include <nfs/nfsproto.h>
	85	#include <nfs/nfs.h>
	86	#include <nfs/nfs_gss.h>
	87	#include <nfs/nfsmount.h>
	88	#include <nfs/nfsnode.h>
	89	#include <nfs/nfs_lock.h>
	90
	91	#include <mach/host_priv.h>
	92	#include <mach/mig_errors.h>
	93	#include <mach/host_special_ports.h>
	94	#include <lockd/lockd_mach.h>
	95
	96	extern void ipc_port_release_send(ipc_port_t);
	97
	98	/*
	99	* pending lock request messages are kept in this queue which is
	100	* kept sorted by transaction ID (xid).
	101	*/
	102	static uint64_t nfs_lockxid = 0;
	103	static LOCKD_MSG_QUEUE nfs_pendlockq;
	104
	105	/* list of mounts that are (potentially) making lockd requests */
	106	TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list;
	107
	108	static lck_grp_t *nfs_lock_lck_grp;
	109	static lck_mtx_t *nfs_lock_mutex;
	110
	111	void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
	112	void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
	113	int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST , struct lockd_ans );
	114	LOCKD_MSG_REQUEST nfs_lockdmsg_find_by_answer(struct lockd_ans );
	115	LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
	116	uint64_t nfs_lockxid_get(void);
	117	int nfs_lockd_send_request(LOCKD_MSG *, int);
	118
	119	/*
	120	* initialize global nfs lock state
	121	*/
	122	void
	123	nfs_lockinit(void)
	124	{
	125	TAILQ_INIT(&nfs_pendlockq);
	126	TAILQ_INIT(&nfs_lockd_mount_list);
	127
	128	nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
	129	nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
	130	}
	131
	132	/*
	133	* Register a mount as (potentially) making lockd requests.
	134	*/
	135	void
	136	nfs_lockd_mount_register(struct nfsmount *nmp)
	137	{
	138	lck_mtx_lock(nfs_lock_mutex);
	139	TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
	140	nfs_lockd_mounts++;
	141	lck_mtx_unlock(nfs_lock_mutex);
	142	}
	143
	144	/*
	145	* Unregister a mount as (potentially) making lockd requests.
	146	*
	147	* When the lockd mount count drops to zero, then send a shutdown request to
	148	* lockd if we've sent any requests to it.
	149	*/
	150	void
	151	nfs_lockd_mount_unregister(struct nfsmount *nmp)
	152	{
	153	int send_shutdown;
	154	mach_port_t lockd_port = IPC_PORT_NULL;
	155	kern_return_t kr;
	156
	157	lck_mtx_lock(nfs_lock_mutex);
	158	if (nmp->nm_ldlink.tqe_next == NFSNOLIST) {
	159	lck_mtx_unlock(nfs_lock_mutex);
	160	return;
	161	}
	162
	163	TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
	164	nmp->nm_ldlink.tqe_next = NFSNOLIST;
	165
	166	nfs_lockd_mounts--;
	167
	168	/* send a shutdown request if there are no more lockd mounts */
	169	send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
	170	if (send_shutdown)
	171	nfs_lockd_request_sent = 0;
	172
	173	lck_mtx_unlock(nfs_lock_mutex);
	174
	175	if (!send_shutdown)
	176	return;
	177
	178	/*
	179	* Let lockd know that it is no longer needed for any NFS mounts
	180	*/
	181	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
	182	if ((kr != KERN_SUCCESS) \|\| !IPC_PORT_VALID(lockd_port)) {
	183	printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
	184	kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
	185	(lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
	186	return;
	187	}
	188
	189	kr = lockd_shutdown(lockd_port);
	190	if (kr != KERN_SUCCESS)
	191	printf("nfs_lockd_mount_change: shutdown %d\n", kr);
	192
	193	ipc_port_release_send(lockd_port);
	194	}
	195
	196	/*
	197	* insert a lock request message into the pending queue
	198	* (nfs_lock_mutex must be held)
	199	*/
	200	void
	201	nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
	202	{
	203	LOCKD_MSG_REQUEST *mr;
	204
	205	mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
	206	if (!mr \|\| (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
	207	/* fast path: empty queue or new largest xid */
	208	TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
	209	return;
	210	}
	211	/* slow path: need to walk list to find insertion point */
	212	while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
	213	mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
	214	}
	215	if (mr) {
	216	TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
	217	} else {
	218	TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
	219	}
	220	}
	221
	222	/*
	223	* remove a lock request message from the pending queue
	224	* (nfs_lock_mutex must be held)
	225	*/
	226	void
	227	nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
	228	{
	229	TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
	230	}
	231
	232	/*
	233	* find a pending lock request message by xid
	234	*
	235	* We search from the head of the list assuming that the message we're
	236	* looking for is for an older request (because we have an answer to it).
	237	* This assumes that lock request will be answered primarily in FIFO order.
	238	* However, this may not be the case if there are blocked requests. We may
	239	* want to move blocked requests to a separate queue (but that'll complicate
	240	* duplicate xid checking).
	241	*
	242	* (nfs_lock_mutex must be held)
	243	*/
	244	LOCKD_MSG_REQUEST *
	245	nfs_lockdmsg_find_by_xid(uint64_t lockxid)
	246	{
	247	LOCKD_MSG_REQUEST *mr;
	248
	249	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
	250	if (mr->lmr_msg.lm_xid == lockxid)
	251	return mr;
	252	if (mr->lmr_msg.lm_xid > lockxid)
	253	return NULL;
	254	}
	255	return mr;
	256	}
	257
	258	/*
	259	* Because we can't depend on nlm_granted messages containing the same
	260	* cookie we sent with the original lock request, we need code to test
	261	* if an nlm_granted answer matches the lock request. We also need code
	262	* that can find a lockd message based solely on the nlm_granted answer.
	263	*/
	264
	265	/*
	266	* compare lockd message to answer
	267	*
	268	* returns 0 on equality and 1 if different
	269	*/
	270	int
	271	nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST msgreq, struct lockd_ans ansp)
	272	{
	273	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
	274	return 1;
	275	if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
	276	return 1;
	277	if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
	278	return 1;
	279	if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
	280	return 1;
	281	if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
	282	return 1;
	283	if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
	284	return 1;
	285	return 0;
	286	}
	287
	288	/*
	289	* find a pending lock request message based on the lock info provided
	290	* in the lockd_ans/nlm_granted data. We need this because we can't
	291	* depend on nlm_granted messages containing the same cookie we sent
	292	* with the original lock request.
	293	*
	294	* We search from the head of the list assuming that the message we're
	295	* looking for is for an older request (because we have an answer to it).
	296	* This assumes that lock request will be answered primarily in FIFO order.
	297	* However, this may not be the case if there are blocked requests. We may
	298	* want to move blocked requests to a separate queue (but that'll complicate
	299	* duplicate xid checking).
	300	*
	301	* (nfs_lock_mutex must be held)
	302	*/
	303	LOCKD_MSG_REQUEST *
	304	nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
	305	{
	306	LOCKD_MSG_REQUEST *mr;
	307
	308	if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
	309	return NULL;
	310	TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
	311	if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
	312	break;
	313	}
	314	return mr;
	315	}
	316
	317	/*
	318	* return the next unique lock request transaction ID
	319	* (nfs_lock_mutex must be held)
	320	*/
	321	uint64_t
	322	nfs_lockxid_get(void)
	323	{
	324	LOCKD_MSG_REQUEST *mr;
	325
	326	/* derive initial lock xid from system time */
	327	if (!nfs_lockxid) {
	328	/*
	329	* Note: it's OK if this code inits nfs_lockxid to 0 (for example,
	330	* due to a broken clock) because we immediately increment it
	331	* and we guarantee to never use xid 0. So, nfs_lockxid should only
	332	* ever be 0 the first time this function is called.
	333	*/
	334	struct timeval tv;
	335	microtime(&tv);
	336	nfs_lockxid = (uint64_t)tv.tv_sec << 12;
	337	}
	338
	339	/* make sure we get a unique xid */
	340	do {
	341	/* Skip zero xid if it should ever happen. */
	342	if (++nfs_lockxid == 0)
	343	nfs_lockxid++;
	344	if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) \|\|
	345	(mr->lmr_msg.lm_xid < nfs_lockxid)) {
	346	/* fast path: empty queue or new largest xid */
	347	break;
	348	}
	349	/* check if xid is already in use */
	350	} while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
	351
	352	return nfs_lockxid;
	353	}
	354
	355	#define MACH_MAX_TRIES 3
	356
	357	int
	358	nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
	359	{
	360	kern_return_t kr;
	361	int retries = 0;
	362	mach_port_t lockd_port = IPC_PORT_NULL;
	363
	364	kr = host_get_lockd_port(host_priv_self(), &lockd_port);
	365	if (kr != KERN_SUCCESS \|\| !IPC_PORT_VALID(lockd_port))
	366	return (ENOTSUP);
	367
	368	do {
	369	/* In the kernel all mach messaging is interruptable */
	370	do {
	371	kr = lockd_request(
	372	lockd_port,
	373	msg->lm_version,
	374	msg->lm_flags,
	375	msg->lm_xid,
	376	msg->lm_fl.l_start,
	377	msg->lm_fl.l_len,
	378	msg->lm_fl.l_pid,
	379	msg->lm_fl.l_type,
	380	msg->lm_fl.l_whence,
	381	(uint32_t *)&msg->lm_addr,
	382	(uint32_t *)&msg->lm_cred,
	383	msg->lm_fh_len,
	384	msg->lm_fh);
	385	if (kr != KERN_SUCCESS)
	386	printf("lockd_request received %d!\n", kr);
	387	} while (!interruptable && kr == MACH_SEND_INTERRUPTED);
	388	} while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
	389
	390	ipc_port_release_send(lockd_port);
	391	switch (kr) {
	392	case MACH_SEND_INTERRUPTED:
	393	return (EINTR);
	394	default:
	395	/*
	396	* Other MACH or MIG errors we will retry. Eventually
	397	* we will call nfs_down and allow the user to disable
	398	* locking.
	399	*/
	400	return (EAGAIN);
	401	}
	402	}
	403
	404	/*
	405	* NFS advisory byte-level locks (client)
	406	*/
	407	int
	408	nfs3_lockd_request(
	409	nfsnode_t np,
	410	int type,
	411	LOCKD_MSG_REQUEST *msgreq,
	412	int flags,
	413	thread_t thd)
	414	{
	415	LOCKD_MSG *msg = &msgreq->lmr_msg;
	416	int error, error2;
	417	int interruptable, slpflag;
	418	struct nfsmount *nmp;
	419	struct timeval now;
	420	int timeo, starttime, endtime, lastmsg, wentdown = 0;
	421	struct timespec ts;
	422	struct sockaddr *saddr;
	423
	424	nmp = NFSTONMP(np);
	425	if (!nmp \|\| !nmp->nm_saddr)
	426	return (ENXIO);
	427
	428	lck_mtx_lock(&nmp->nm_lock);
	429	saddr = nmp->nm_saddr;
	430	bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
	431	if (nmp->nm_vers == NFS_VER3)
	432	msg->lm_flags \|= LOCKD_MSG_NFSV3;
	433
	434	if (nmp->nm_sotype != SOCK_DGRAM)
	435	msg->lm_flags \|= LOCKD_MSG_TCP;
	436
	437	microuptime(&now);
	438	starttime = now.tv_sec;
	439	lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
	440	interruptable = NMFLAG(nmp, INTR);
	441	lck_mtx_unlock(&nmp->nm_lock);
	442
	443	lck_mtx_lock(nfs_lock_mutex);
	444
	445	/* allocate unique xid */
	446	msg->lm_xid = nfs_lockxid_get();
	447	nfs_lockdmsg_enqueue(msgreq);
	448
	449	timeo = 4;
	450
	451	for (;;) {
	452	nfs_lockd_request_sent = 1;
	453
	454	/* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
	455	lck_mtx_unlock(nfs_lock_mutex);
	456	error = nfs_lockd_send_request(msg, interruptable);
	457	lck_mtx_lock(nfs_lock_mutex);
	458	if (error && error != EAGAIN)
	459	break;
	460
	461	/*
	462	* Always wait for an answer. Not waiting for unlocks could
	463	* cause a lock to be left if the unlock request gets dropped.
	464	*/
	465
	466	/*
	467	* Retry if it takes too long to get a response.
	468	*
	469	* The timeout numbers were picked out of thin air... they start
	470	* at 4 and double each timeout with a max of 30 seconds.
	471	*
	472	* In order to maintain responsiveness, we pass a small timeout
	473	* to msleep and calculate the timeouts ourselves. This allows
	474	* us to pick up on mount changes quicker.
	475	*/
	476	wait_for_granted:
	477	error = EWOULDBLOCK;
	478	slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
	479	ts.tv_sec = 2;
	480	ts.tv_nsec = 0;
	481	microuptime(&now);
	482	endtime = now.tv_sec + timeo;
	483	while (now.tv_sec < endtime) {
	484	error = error2 = 0;
	485	if (!msgreq->lmr_answered) {
	486	error = msleep(msgreq, nfs_lock_mutex, slpflag \| PUSER, "lockd", &ts);
	487	slpflag = 0;
	488	}
	489	if (msgreq->lmr_answered) {
	490	/*
	491	* Note: it's possible to have a lock granted at
	492	* essentially the same time that we get interrupted.
	493	* Since the lock may be granted, we can't return an
	494	* error from this request or we might not unlock the
	495	* lock that's been granted.
	496	*/
	497	nmp = NFSTONMP(np);
	498	if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
	499	(nmp->nm_state & NFSSTA_LOCKSWORK)) {
	500	/*
	501	* We have evidence that locks work, yet lockd
	502	* returned ENOTSUP. This is probably because
	503	* it was unable to contact the server's lockd
	504	* to send it the request.
	505	*
	506	* Because we know locks work, we'll consider
	507	* this failure to be a timeout.
	508	*/
	509	error = EWOULDBLOCK;
	510	} else {
	511	error = 0;
	512	}
	513	break;
	514	}
	515	if (error != EWOULDBLOCK)
	516	break;
	517	/* check that we still have our mount... */
	518	/* ...and that we still support locks */
	519	/* ...and that there isn't a recovery pending */
	520	nmp = NFSTONMP(np);
	521	if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
	522	error = error2;
	523	if (type == F_UNLCK)
	524	printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
	525	break;
	526	}
	527	lck_mtx_lock(&nmp->nm_lock);
	528	if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
	529	lck_mtx_unlock(&nmp->nm_lock);
	530	break;
	531	}
	532	if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
	533	/* recovery pending... return an error that'll get this operation restarted */
	534	error = NFSERR_GRACE;
	535	lck_mtx_unlock(&nmp->nm_lock);
	536	break;
	537	}
	538	interruptable = NMFLAG(nmp, INTR);
	539	lck_mtx_unlock(&nmp->nm_lock);
	540	microuptime(&now);
	541	}
	542	if (error) {
	543	/* check that we still have our mount... */
	544	nmp = NFSTONMP(np);
	545	if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
	546	error = error2;
	547	if (error2 != EINTR) {
	548	if (type == F_UNLCK)
	549	printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
	550	break;
	551	}
	552	}
	553	/* ...and that we still support locks */
	554	lck_mtx_lock(&nmp->nm_lock);
	555	if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
	556	if (error == EWOULDBLOCK)
	557	error = ENOTSUP;
	558	lck_mtx_unlock(&nmp->nm_lock);
	559	break;
	560	}
	561	/* ...and that there isn't a recovery pending */
	562	if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
	563	/* recovery pending... return to allow recovery to occur */
	564	error = NFSERR_DENIED;
	565	lck_mtx_unlock(&nmp->nm_lock);
	566	break;
	567	}
	568	interruptable = NMFLAG(nmp, INTR);
	569	if ((error != EWOULDBLOCK) \|\|
	570	((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) \|\|
	571	((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
	572	if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
	573	/* give up if this is for recovery and taking too long */
	574	error = ETIMEDOUT;
	575	} else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
	576	/* recovery pending... return an error that'll get this operation restarted */
	577	error = NFSERR_GRACE;
	578	}
	579	lck_mtx_unlock(&nmp->nm_lock);
	580	/*
	581	* We're going to bail on this request.
	582	* If we were a blocked lock request, send a cancel.
	583	*/
	584	if ((msgreq->lmr_errno == EINPROGRESS) &&
	585	!(msg->lm_flags & LOCKD_MSG_CANCEL)) {
	586	/* set this request up as a cancel */
	587	msg->lm_flags \|= LOCKD_MSG_CANCEL;
	588	nfs_lockdmsg_dequeue(msgreq);
	589	msg->lm_xid = nfs_lockxid_get();
	590	nfs_lockdmsg_enqueue(msgreq);
	591	msgreq->lmr_saved_errno = error;
	592	msgreq->lmr_errno = 0;
	593	msgreq->lmr_answered = 0;
	594	/* reset timeout */
	595	timeo = 2;
	596	/* send cancel request */
	597	continue;
	598	}
	599	break;
	600	}
	601
	602	/* warn if we're not getting any response */
	603	microuptime(&now);
	604	if ((msgreq->lmr_errno != EINPROGRESS) &&
	605	!(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
	606	(nmp->nm_tprintf_initial_delay != 0) &&
	607	((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
	608	lck_mtx_unlock(&nmp->nm_lock);
	609	lastmsg = now.tv_sec;
	610	nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding", 1);
	611	wentdown = 1;
	612	} else
	613	lck_mtx_unlock(&nmp->nm_lock);
	614
	615	if (msgreq->lmr_errno == EINPROGRESS) {
	616	/*
	617	* We've got a blocked lock request that we are
	618	* going to retry. First, we'll want to try to
	619	* send a cancel for the previous request.
	620	*
	621	* Clear errno so if we don't get a response
	622	* to the resend we'll call nfs_down().
	623	* Also reset timeout because we'll expect a
	624	* quick response to the cancel/resend (even if
	625	* it is NLM_BLOCKED).
	626	*/
	627	msg->lm_flags \|= LOCKD_MSG_CANCEL;
	628	nfs_lockdmsg_dequeue(msgreq);
	629	msg->lm_xid = nfs_lockxid_get();
	630	nfs_lockdmsg_enqueue(msgreq);
	631	msgreq->lmr_saved_errno = msgreq->lmr_errno;
	632	msgreq->lmr_errno = 0;
	633	msgreq->lmr_answered = 0;
	634	timeo = 2;
	635	/* send cancel then resend request */
	636	continue;
	637	}
	638
	639	/*
	640	* We timed out, so we will resend the request.
	641	*/
	642	if (!(flags & R_RECOVER))
	643	timeo *= 2;
	644	if (timeo > 30)
	645	timeo = 30;
	646	/* resend request */
	647	continue;
	648	}
	649
	650	/* we got a reponse, so the server's lockd is OK */
	651	nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
	652	wentdown ? "lockd alive again" : NULL);
	653	wentdown = 0;
	654
	655	if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
	656	/*
	657	* The lock request was denied because the server lockd is
	658	* still in its grace period. So, we need to try the
	659	* request again in a little bit. Return the GRACE error so
	660	* the higher levels can perform the retry.
	661	*/
	662	msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
	663	}
	664
	665	if (msgreq->lmr_errno == EINPROGRESS) {
	666	/* got NLM_BLOCKED response */
	667	/* need to wait for NLM_GRANTED */
	668	timeo = 30;
	669	msgreq->lmr_answered = 0;
	670	goto wait_for_granted;
	671	}
	672
	673	if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
	674	(msgreq->lmr_saved_errno == EINPROGRESS)) {
	675	/*
	676	* We just got a successful reply to the
	677	* cancel of the previous blocked lock request.
	678	* Now, go ahead and return a DENIED error so the
	679	* higher levels can resend the request.
	680	*/
	681	msg->lm_flags &= ~LOCKD_MSG_CANCEL;
	682	nfs_lockdmsg_dequeue(msgreq);
	683	error = NFSERR_DENIED;
	684	break;
	685	}
	686
	687	/*
	688	* If the blocked lock request was cancelled.
	689	* Restore the error condition from when we
	690	* originally bailed on the request.
	691	*/
	692	if (msg->lm_flags & LOCKD_MSG_CANCEL) {
	693	msg->lm_flags &= ~LOCKD_MSG_CANCEL;
	694	error = msgreq->lmr_saved_errno;
	695	} else {
	696	error = msgreq->lmr_errno;
	697	}
	698
	699	nmp = NFSTONMP(np);
	700	if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
	701	/*
	702	* We have NO evidence that locks work and lockd
	703	* returned ENOTSUP. Let's take this as a hint
	704	* that locks aren't supported and disable them
	705	* for this mount.
	706	*/
	707	nfs_lockdmsg_dequeue(msgreq);
	708	lck_mtx_unlock(nfs_lock_mutex);
	709	lck_mtx_lock(&nmp->nm_lock);
	710	if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
	711	nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
	712	nfs_lockd_mount_unregister(nmp);
	713	}
	714	nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
	715	lck_mtx_unlock(&nmp->nm_lock);
	716	printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
	717	vfs_statfs(nmp->nm_mountp)->f_mntfromname);
	718	return (error);
	719	}
	720	if (!error) {
	721	/* record that NFS file locking has worked on this mount */
	722	if (nmp) {
	723	lck_mtx_lock(&nmp->nm_lock);
	724	if (!(nmp->nm_state & NFSSTA_LOCKSWORK))
	725	nmp->nm_state \|= NFSSTA_LOCKSWORK;
	726	lck_mtx_unlock(&nmp->nm_lock);
	727	}
	728	}
	729	break;
	730	}
	731
	732	nfs_lockdmsg_dequeue(msgreq);
	733
	734	lck_mtx_unlock(nfs_lock_mutex);
	735
	736	return (error);
	737	}
	738
	739	/*
	740	* Send an NLM LOCK message to the server
	741	*/
	742	int
	743	nfs3_setlock_rpc(
	744	nfsnode_t np,
	745	struct nfs_open_file *nofp,
	746	struct nfs_file_lock *nflp,
	747	int reclaim,
	748	int flags,
	749	thread_t thd,
	750	kauth_cred_t cred)
	751	{
	752	struct nfs_lock_owner *nlop = nflp->nfl_owner;
	753	struct nfsmount *nmp;
	754	int error;
	755	LOCKD_MSG_REQUEST msgreq;
	756	LOCKD_MSG *msg;
	757
	758	nmp = NFSTONMP(np);
	759	if (nfs_mount_gone(nmp))
	760	return (ENXIO);
	761
	762	if (!nlop->nlo_open_owner) {
	763	nfs_open_owner_ref(nofp->nof_owner);
	764	nlop->nlo_open_owner = nofp->nof_owner;
	765	}
	766	if ((error = nfs_lock_owner_set_busy(nlop, thd)))
	767	return (error);
	768
	769	/* set up lock message request structure */
	770	bzero(&msgreq, sizeof(msgreq));
	771	msg = &msgreq.lmr_msg;
	772	msg->lm_version = LOCKD_MSG_VERSION;
	773	if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim)
	774	msg->lm_flags \|= LOCKD_MSG_BLOCK;
	775	if (reclaim)
	776	msg->lm_flags \|= LOCKD_MSG_RECLAIM;
	777	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
	778	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
	779	cru2x(cred, &msg->lm_cred);
	780
	781	msg->lm_fl.l_whence = SEEK_SET;
	782	msg->lm_fl.l_start = nflp->nfl_start;
	783	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
	784	msg->lm_fl.l_type = nflp->nfl_type;
	785	msg->lm_fl.l_pid = nlop->nlo_pid;
	786
	787	error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
	788
	789	nfs_lock_owner_clear_busy(nlop);
	790	return (error);
	791	}
	792
	793	/*
	794	* Send an NLM UNLOCK message to the server
	795	*/
	796	int
	797	nfs3_unlock_rpc(
	798	nfsnode_t np,
	799	struct nfs_lock_owner *nlop,
	800	__unused int type,
	801	uint64_t start,
	802	uint64_t end,
	803	int flags,
	804	thread_t thd,
	805	kauth_cred_t cred)
	806	{
	807	struct nfsmount *nmp;
	808	LOCKD_MSG_REQUEST msgreq;
	809	LOCKD_MSG *msg;
	810
	811	nmp = NFSTONMP(np);
	812	if (!nmp)
	813	return (ENXIO);
	814
	815	/* set up lock message request structure */
	816	bzero(&msgreq, sizeof(msgreq));
	817	msg = &msgreq.lmr_msg;
	818	msg->lm_version = LOCKD_MSG_VERSION;
	819	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
	820	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
	821	cru2x(cred, &msg->lm_cred);
	822
	823	msg->lm_fl.l_whence = SEEK_SET;
	824	msg->lm_fl.l_start = start;
	825	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
	826	msg->lm_fl.l_type = F_UNLCK;
	827	msg->lm_fl.l_pid = nlop->nlo_pid;
	828
	829	return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd));
	830	}
	831
	832	/*
	833	* Send an NLM LOCK TEST message to the server
	834	*/
	835	int
	836	nfs3_getlock_rpc(
	837	nfsnode_t np,
	838	struct nfs_lock_owner *nlop,
	839	struct flock *fl,
	840	uint64_t start,
	841	uint64_t end,
	842	vfs_context_t ctx)
	843	{
	844	struct nfsmount *nmp;
	845	int error;
	846	LOCKD_MSG_REQUEST msgreq;
	847	LOCKD_MSG *msg;
	848
	849	nmp = NFSTONMP(np);
	850	if (nfs_mount_gone(nmp))
	851	return (ENXIO);
	852
	853	/* set up lock message request structure */
	854	bzero(&msgreq, sizeof(msgreq));
	855	msg = &msgreq.lmr_msg;
	856	msg->lm_version = LOCKD_MSG_VERSION;
	857	msg->lm_flags \|= LOCKD_MSG_TEST;
	858	msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
	859	bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
	860	cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
	861
	862	msg->lm_fl.l_whence = SEEK_SET;
	863	msg->lm_fl.l_start = start;
	864	msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
	865	msg->lm_fl.l_type = fl->l_type;
	866	msg->lm_fl.l_pid = nlop->nlo_pid;
	867
	868	error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
	869
	870	if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
	871	if (msg->lm_fl.l_type != F_UNLCK) {
	872	fl->l_type = msg->lm_fl.l_type;
	873	fl->l_pid = msg->lm_fl.l_pid;
	874	fl->l_start = msg->lm_fl.l_start;
	875	fl->l_len = msg->lm_fl.l_len;
	876	fl->l_whence = SEEK_SET;
	877	} else
	878	fl->l_type = F_UNLCK;
	879	}
	880
	881	return (error);
	882	}
	883
	884	/*
	885	* nfslockdans --
	886	* NFS advisory byte-level locks answer from the lock daemon.
	887	*/
	888	int
	889	nfslockdans(proc_t p, struct lockd_ans *ansp)
	890	{
	891	LOCKD_MSG_REQUEST *msgreq;
	892	int error;
	893
	894	/* Let root make this call. */
	895	error = proc_suser(p);
	896	if (error)
	897	return (error);
	898
	899	/* the version should match, or we're out of sync */
	900	if (ansp->la_version != LOCKD_ANS_VERSION)
	901	return (EINVAL);
	902
	903	lck_mtx_lock(nfs_lock_mutex);
	904
	905	/* try to find the lockd message by transaction id (cookie) */
	906	msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
	907	if (ansp->la_flags & LOCKD_ANS_GRANTED) {
	908	/*
	909	* We can't depend on the granted message having our cookie,
	910	* so we check the answer against the lockd message found.
	911	* If no message was found or it doesn't match the answer,
	912	* we look for the lockd message by the answer's lock info.
	913	*/
	914	if (!msgreq \|\| nfs_lockdmsg_compare_to_answer(msgreq, ansp))
	915	msgreq = nfs_lockdmsg_find_by_answer(ansp);
	916	/*
	917	* We need to make sure this request isn't being cancelled
	918	* If it is, we don't want to accept the granted message.
	919	*/
	920	if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
	921	msgreq = NULL;
	922	}
	923	if (!msgreq) {
	924	lck_mtx_unlock(nfs_lock_mutex);
	925	return (EPIPE);
	926	}
	927
	928	msgreq->lmr_errno = ansp->la_errno;
	929	if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
	930	if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
	931	if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
	932	msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
	933	else
	934	msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
	935	msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
	936	msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
	937	msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
	938	} else {
	939	msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
	940	}
	941	}
	942	if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE)
	943	msgreq->lmr_msg.lm_flags \|= LOCKD_MSG_DENIED_GRACE;
	944
	945	msgreq->lmr_answered = 1;
	946	lck_mtx_unlock(nfs_lock_mutex);
	947	wakeup(msgreq);
	948
	949	return (0);
	950	}
	951
	952	/*
	953	* nfslockdnotify --
	954	* NFS host restart notification from the lock daemon.
	955	*
	956	* Used to initiate reclaiming of held locks when a server we
	957	* have mounted reboots.
	958	*/
	959	int
	960	nfslockdnotify(proc_t p, user_addr_t argp)
	961	{
	962	int error, i, headsize;
	963	struct lockd_notify ln;
	964	struct nfsmount *nmp;
	965	struct sockaddr *saddr;
	966
	967	/* Let root make this call. */
	968	error = proc_suser(p);
	969	if (error)
	970	return (error);
	971
	972	headsize = (char)&ln.ln_addr[0] - (char)&ln.ln_version;
	973	error = copyin(argp, &ln, headsize);
	974	if (error)
	975	return (error);
	976	if (ln.ln_version != LOCKD_NOTIFY_VERSION)
	977	return (EINVAL);
	978	if ((ln.ln_addrcount < 1) \|\| (ln.ln_addrcount > 128))
	979	return (EINVAL);
	980	argp += headsize;
	981	saddr = (struct sockaddr *)&ln.ln_addr[0];
	982
	983	lck_mtx_lock(nfs_lock_mutex);
	984
	985	for (i=0; i < ln.ln_addrcount; i++) {
	986	error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
	987	if (error)
	988	break;
	989	argp += sizeof(ln.ln_addr[0]);
	990	/* scan lockd mount list for match to this address */
	991	TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
	992	/* check if address matches this mount's server address */
	993	if (!nmp->nm_saddr \|\| nfs_sockaddr_cmp(saddr, nmp->nm_saddr))
	994	continue;
	995	/* We have a match! Mark it as needing recovery. */
	996	lck_mtx_lock(&nmp->nm_lock);
	997	nfs_need_recover(nmp, 0);
	998	lck_mtx_unlock(&nmp->nm_lock);
	999	}
	1000	}
	1001
	1002	lck_mtx_unlock(nfs_lock_mutex);
	1003
	1004	return (error);
	1005	}
	1006