]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_lock.c
xnu-4903.221.2.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_lock.c
1 /*
2 * Copyright (c) 2002-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*-
29 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. Berkeley Software Design Inc's name may not be used to endorse or
40 * promote products derived from this software without specific prior
41 * written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 *
55 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
56 */
57
58 #include <sys/cdefs.h>
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/fcntl.h>
62 #include <sys/kernel.h> /* for hz */
63 #include <sys/file_internal.h>
64 #include <sys/malloc.h>
65 #include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
66 #include <sys/kpi_mbuf.h>
67 #include <sys/mount_internal.h>
68 #include <sys/proc_internal.h> /* for p_start */
69 #include <sys/kauth.h>
70 #include <sys/resourcevar.h>
71 #include <sys/socket.h>
72 #include <sys/unistd.h>
73 #include <sys/user.h>
74 #include <sys/vnode_internal.h>
75
76 #include <kern/thread.h>
77 #include <kern/host.h>
78
79 #include <machine/limits.h>
80
81 #include <net/if.h>
82
83 #include <nfs/rpcv2.h>
84 #include <nfs/nfsproto.h>
85 #include <nfs/nfs.h>
86 #include <nfs/nfs_gss.h>
87 #include <nfs/nfsmount.h>
88 #include <nfs/nfsnode.h>
89 #include <nfs/nfs_lock.h>
90
91 #include <mach/host_priv.h>
92 #include <mach/mig_errors.h>
93 #include <mach/host_special_ports.h>
94 #include <lockd/lockd_mach.h>
95
96 extern void ipc_port_release_send(ipc_port_t);
97
98 /*
99 * pending lock request messages are kept in this queue which is
100 * kept sorted by transaction ID (xid).
101 */
102 static uint64_t nfs_lockxid = 0;
103 static LOCKD_MSG_QUEUE nfs_pendlockq;
104
105 /* list of mounts that are (potentially) making lockd requests */
106 TAILQ_HEAD(nfs_lockd_mount_list,nfsmount) nfs_lockd_mount_list;
107
108 static lck_grp_t *nfs_lock_lck_grp;
109 static lck_mtx_t *nfs_lock_mutex;
110
111 void nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *);
112 void nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *);
113 int nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *, struct lockd_ans *);
114 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_answer(struct lockd_ans *);
115 LOCKD_MSG_REQUEST *nfs_lockdmsg_find_by_xid(uint64_t);
116 uint64_t nfs_lockxid_get(void);
117 int nfs_lockd_send_request(LOCKD_MSG *, int);
118
119 /*
120 * initialize global nfs lock state
121 */
122 void
123 nfs_lockinit(void)
124 {
125 TAILQ_INIT(&nfs_pendlockq);
126 TAILQ_INIT(&nfs_lockd_mount_list);
127
128 nfs_lock_lck_grp = lck_grp_alloc_init("nfs_lock", LCK_GRP_ATTR_NULL);
129 nfs_lock_mutex = lck_mtx_alloc_init(nfs_lock_lck_grp, LCK_ATTR_NULL);
130 }
131
132 /*
133 * Register a mount as (potentially) making lockd requests.
134 */
135 void
136 nfs_lockd_mount_register(struct nfsmount *nmp)
137 {
138 lck_mtx_lock(nfs_lock_mutex);
139 TAILQ_INSERT_HEAD(&nfs_lockd_mount_list, nmp, nm_ldlink);
140 nfs_lockd_mounts++;
141 lck_mtx_unlock(nfs_lock_mutex);
142 }
143
144 /*
145 * Unregister a mount as (potentially) making lockd requests.
146 *
147 * When the lockd mount count drops to zero, then send a shutdown request to
148 * lockd if we've sent any requests to it.
149 */
150 void
151 nfs_lockd_mount_unregister(struct nfsmount *nmp)
152 {
153 int send_shutdown;
154 mach_port_t lockd_port = IPC_PORT_NULL;
155 kern_return_t kr;
156
157 lck_mtx_lock(nfs_lock_mutex);
158 if (nmp->nm_ldlink.tqe_next == NFSNOLIST) {
159 lck_mtx_unlock(nfs_lock_mutex);
160 return;
161 }
162
163 TAILQ_REMOVE(&nfs_lockd_mount_list, nmp, nm_ldlink);
164 nmp->nm_ldlink.tqe_next = NFSNOLIST;
165
166 nfs_lockd_mounts--;
167
168 /* send a shutdown request if there are no more lockd mounts */
169 send_shutdown = ((nfs_lockd_mounts == 0) && nfs_lockd_request_sent);
170 if (send_shutdown)
171 nfs_lockd_request_sent = 0;
172
173 lck_mtx_unlock(nfs_lock_mutex);
174
175 if (!send_shutdown)
176 return;
177
178 /*
179 * Let lockd know that it is no longer needed for any NFS mounts
180 */
181 kr = host_get_lockd_port(host_priv_self(), &lockd_port);
182 if ((kr != KERN_SUCCESS) || !IPC_PORT_VALID(lockd_port)) {
183 printf("nfs_lockd_mount_change: shutdown couldn't get port, kr %d, port %s\n",
184 kr, (lockd_port == IPC_PORT_NULL) ? "NULL" :
185 (lockd_port == IPC_PORT_DEAD) ? "DEAD" : "VALID");
186 return;
187 }
188
189 kr = lockd_shutdown(lockd_port);
190 if (kr != KERN_SUCCESS)
191 printf("nfs_lockd_mount_change: shutdown %d\n", kr);
192
193 ipc_port_release_send(lockd_port);
194 }
195
196 /*
197 * insert a lock request message into the pending queue
198 * (nfs_lock_mutex must be held)
199 */
200 void
201 nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
202 {
203 LOCKD_MSG_REQUEST *mr;
204
205 mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
206 if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
207 /* fast path: empty queue or new largest xid */
208 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
209 return;
210 }
211 /* slow path: need to walk list to find insertion point */
212 while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
213 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
214 }
215 if (mr) {
216 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
217 } else {
218 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
219 }
220 }
221
222 /*
223 * remove a lock request message from the pending queue
224 * (nfs_lock_mutex must be held)
225 */
226 void
227 nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
228 {
229 TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
230 }
231
232 /*
233 * find a pending lock request message by xid
234 *
235 * We search from the head of the list assuming that the message we're
236 * looking for is for an older request (because we have an answer to it).
237 * This assumes that lock request will be answered primarily in FIFO order.
238 * However, this may not be the case if there are blocked requests. We may
239 * want to move blocked requests to a separate queue (but that'll complicate
240 * duplicate xid checking).
241 *
242 * (nfs_lock_mutex must be held)
243 */
244 LOCKD_MSG_REQUEST *
245 nfs_lockdmsg_find_by_xid(uint64_t lockxid)
246 {
247 LOCKD_MSG_REQUEST *mr;
248
249 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
250 if (mr->lmr_msg.lm_xid == lockxid)
251 return mr;
252 if (mr->lmr_msg.lm_xid > lockxid)
253 return NULL;
254 }
255 return mr;
256 }
257
258 /*
259 * Because we can't depend on nlm_granted messages containing the same
260 * cookie we sent with the original lock request, we need code to test
261 * if an nlm_granted answer matches the lock request. We also need code
262 * that can find a lockd message based solely on the nlm_granted answer.
263 */
264
265 /*
266 * compare lockd message to answer
267 *
268 * returns 0 on equality and 1 if different
269 */
270 int
271 nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
272 {
273 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
274 return 1;
275 if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
276 return 1;
277 if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
278 return 1;
279 if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
280 return 1;
281 if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
282 return 1;
283 if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
284 return 1;
285 return 0;
286 }
287
288 /*
289 * find a pending lock request message based on the lock info provided
290 * in the lockd_ans/nlm_granted data. We need this because we can't
291 * depend on nlm_granted messages containing the same cookie we sent
292 * with the original lock request.
293 *
294 * We search from the head of the list assuming that the message we're
295 * looking for is for an older request (because we have an answer to it).
296 * This assumes that lock request will be answered primarily in FIFO order.
297 * However, this may not be the case if there are blocked requests. We may
298 * want to move blocked requests to a separate queue (but that'll complicate
299 * duplicate xid checking).
300 *
301 * (nfs_lock_mutex must be held)
302 */
303 LOCKD_MSG_REQUEST *
304 nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
305 {
306 LOCKD_MSG_REQUEST *mr;
307
308 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
309 return NULL;
310 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
311 if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
312 break;
313 }
314 return mr;
315 }
316
317 /*
318 * return the next unique lock request transaction ID
319 * (nfs_lock_mutex must be held)
320 */
321 uint64_t
322 nfs_lockxid_get(void)
323 {
324 LOCKD_MSG_REQUEST *mr;
325
326 /* derive initial lock xid from system time */
327 if (!nfs_lockxid) {
328 /*
329 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
330 * due to a broken clock) because we immediately increment it
331 * and we guarantee to never use xid 0. So, nfs_lockxid should only
332 * ever be 0 the first time this function is called.
333 */
334 struct timeval tv;
335 microtime(&tv);
336 nfs_lockxid = (uint64_t)tv.tv_sec << 12;
337 }
338
339 /* make sure we get a unique xid */
340 do {
341 /* Skip zero xid if it should ever happen. */
342 if (++nfs_lockxid == 0)
343 nfs_lockxid++;
344 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
345 (mr->lmr_msg.lm_xid < nfs_lockxid)) {
346 /* fast path: empty queue or new largest xid */
347 break;
348 }
349 /* check if xid is already in use */
350 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
351
352 return nfs_lockxid;
353 }
354
355 #define MACH_MAX_TRIES 3
356
357 int
358 nfs_lockd_send_request(LOCKD_MSG *msg, int interruptable)
359 {
360 kern_return_t kr;
361 int retries = 0;
362 mach_port_t lockd_port = IPC_PORT_NULL;
363
364 kr = host_get_lockd_port(host_priv_self(), &lockd_port);
365 if (kr != KERN_SUCCESS || !IPC_PORT_VALID(lockd_port))
366 return (ENOTSUP);
367
368 do {
369 /* In the kernel all mach messaging is interruptable */
370 do {
371 kr = lockd_request(
372 lockd_port,
373 msg->lm_version,
374 msg->lm_flags,
375 msg->lm_xid,
376 msg->lm_fl.l_start,
377 msg->lm_fl.l_len,
378 msg->lm_fl.l_pid,
379 msg->lm_fl.l_type,
380 msg->lm_fl.l_whence,
381 (uint32_t *)&msg->lm_addr,
382 (uint32_t *)&msg->lm_cred,
383 msg->lm_fh_len,
384 msg->lm_fh);
385 if (kr != KERN_SUCCESS)
386 printf("lockd_request received %d!\n", kr);
387 } while (!interruptable && kr == MACH_SEND_INTERRUPTED);
388 } while (kr == MIG_SERVER_DIED && retries++ < MACH_MAX_TRIES);
389
390 ipc_port_release_send(lockd_port);
391 switch (kr) {
392 case MACH_SEND_INTERRUPTED:
393 return (EINTR);
394 default:
395 /*
396 * Other MACH or MIG errors we will retry. Eventually
397 * we will call nfs_down and allow the user to disable
398 * locking.
399 */
400 return (EAGAIN);
401 }
402 }
403
404 /*
405 * NFS advisory byte-level locks (client)
406 */
407 int
408 nfs3_lockd_request(
409 nfsnode_t np,
410 int type,
411 LOCKD_MSG_REQUEST *msgreq,
412 int flags,
413 thread_t thd)
414 {
415 LOCKD_MSG *msg = &msgreq->lmr_msg;
416 int error, error2;
417 int interruptable, slpflag;
418 struct nfsmount *nmp;
419 struct timeval now;
420 int timeo, starttime, endtime, lastmsg, wentdown = 0;
421 struct timespec ts;
422 struct sockaddr *saddr;
423
424 nmp = NFSTONMP(np);
425 if (!nmp || !nmp->nm_saddr)
426 return (ENXIO);
427
428 lck_mtx_lock(&nmp->nm_lock);
429 saddr = nmp->nm_saddr;
430 bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
431 if (nmp->nm_vers == NFS_VER3)
432 msg->lm_flags |= LOCKD_MSG_NFSV3;
433
434 if (nmp->nm_sotype != SOCK_DGRAM)
435 msg->lm_flags |= LOCKD_MSG_TCP;
436
437 microuptime(&now);
438 starttime = now.tv_sec;
439 lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
440 interruptable = NMFLAG(nmp, INTR);
441 lck_mtx_unlock(&nmp->nm_lock);
442
443 lck_mtx_lock(nfs_lock_mutex);
444
445 /* allocate unique xid */
446 msg->lm_xid = nfs_lockxid_get();
447 nfs_lockdmsg_enqueue(msgreq);
448
449 timeo = 4;
450
451 for (;;) {
452 nfs_lockd_request_sent = 1;
453
454 /* need to drop nfs_lock_mutex while calling nfs_lockd_send_request() */
455 lck_mtx_unlock(nfs_lock_mutex);
456 error = nfs_lockd_send_request(msg, interruptable);
457 lck_mtx_lock(nfs_lock_mutex);
458 if (error && error != EAGAIN)
459 break;
460
461 /*
462 * Always wait for an answer. Not waiting for unlocks could
463 * cause a lock to be left if the unlock request gets dropped.
464 */
465
466 /*
467 * Retry if it takes too long to get a response.
468 *
469 * The timeout numbers were picked out of thin air... they start
470 * at 4 and double each timeout with a max of 30 seconds.
471 *
472 * In order to maintain responsiveness, we pass a small timeout
473 * to msleep and calculate the timeouts ourselves. This allows
474 * us to pick up on mount changes quicker.
475 */
476 wait_for_granted:
477 error = EWOULDBLOCK;
478 slpflag = (interruptable && (type != F_UNLCK)) ? PCATCH : 0;
479 ts.tv_sec = 2;
480 ts.tv_nsec = 0;
481 microuptime(&now);
482 endtime = now.tv_sec + timeo;
483 while (now.tv_sec < endtime) {
484 error = error2 = 0;
485 if (!msgreq->lmr_answered) {
486 error = msleep(msgreq, nfs_lock_mutex, slpflag | PUSER, "lockd", &ts);
487 slpflag = 0;
488 }
489 if (msgreq->lmr_answered) {
490 /*
491 * Note: it's possible to have a lock granted at
492 * essentially the same time that we get interrupted.
493 * Since the lock may be granted, we can't return an
494 * error from this request or we might not unlock the
495 * lock that's been granted.
496 */
497 nmp = NFSTONMP(np);
498 if ((msgreq->lmr_errno == ENOTSUP) && nmp &&
499 (nmp->nm_state & NFSSTA_LOCKSWORK)) {
500 /*
501 * We have evidence that locks work, yet lockd
502 * returned ENOTSUP. This is probably because
503 * it was unable to contact the server's lockd
504 * to send it the request.
505 *
506 * Because we know locks work, we'll consider
507 * this failure to be a timeout.
508 */
509 error = EWOULDBLOCK;
510 } else {
511 error = 0;
512 }
513 break;
514 }
515 if (error != EWOULDBLOCK)
516 break;
517 /* check that we still have our mount... */
518 /* ...and that we still support locks */
519 /* ...and that there isn't a recovery pending */
520 nmp = NFSTONMP(np);
521 if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
522 error = error2;
523 if (type == F_UNLCK)
524 printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
525 break;
526 }
527 lck_mtx_lock(&nmp->nm_lock);
528 if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
529 lck_mtx_unlock(&nmp->nm_lock);
530 break;
531 }
532 if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
533 /* recovery pending... return an error that'll get this operation restarted */
534 error = NFSERR_GRACE;
535 lck_mtx_unlock(&nmp->nm_lock);
536 break;
537 }
538 interruptable = NMFLAG(nmp, INTR);
539 lck_mtx_unlock(&nmp->nm_lock);
540 microuptime(&now);
541 }
542 if (error) {
543 /* check that we still have our mount... */
544 nmp = NFSTONMP(np);
545 if ((error2 = nfs_sigintr(nmp, NULL, NULL, 0))) {
546 error = error2;
547 if (error2 != EINTR) {
548 if (type == F_UNLCK)
549 printf("nfs3_lockd_request: aborting unlock request, error %d\n", error);
550 break;
551 }
552 }
553 /* ...and that we still support locks */
554 lck_mtx_lock(&nmp->nm_lock);
555 if (nmp->nm_lockmode == NFS_LOCK_MODE_DISABLED) {
556 if (error == EWOULDBLOCK)
557 error = ENOTSUP;
558 lck_mtx_unlock(&nmp->nm_lock);
559 break;
560 }
561 /* ...and that there isn't a recovery pending */
562 if ((error == EWOULDBLOCK) && (nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
563 /* recovery pending... return to allow recovery to occur */
564 error = NFSERR_DENIED;
565 lck_mtx_unlock(&nmp->nm_lock);
566 break;
567 }
568 interruptable = NMFLAG(nmp, INTR);
569 if ((error != EWOULDBLOCK) ||
570 ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) ||
571 ((flags & R_RECOVER) && ((now.tv_sec - starttime) > 30))) {
572 if ((error == EWOULDBLOCK) && (flags & R_RECOVER)) {
573 /* give up if this is for recovery and taking too long */
574 error = ETIMEDOUT;
575 } else if ((nmp->nm_state & NFSSTA_RECOVER) && !(flags & R_RECOVER)) {
576 /* recovery pending... return an error that'll get this operation restarted */
577 error = NFSERR_GRACE;
578 }
579 lck_mtx_unlock(&nmp->nm_lock);
580 /*
581 * We're going to bail on this request.
582 * If we were a blocked lock request, send a cancel.
583 */
584 if ((msgreq->lmr_errno == EINPROGRESS) &&
585 !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
586 /* set this request up as a cancel */
587 msg->lm_flags |= LOCKD_MSG_CANCEL;
588 nfs_lockdmsg_dequeue(msgreq);
589 msg->lm_xid = nfs_lockxid_get();
590 nfs_lockdmsg_enqueue(msgreq);
591 msgreq->lmr_saved_errno = error;
592 msgreq->lmr_errno = 0;
593 msgreq->lmr_answered = 0;
594 /* reset timeout */
595 timeo = 2;
596 /* send cancel request */
597 continue;
598 }
599 break;
600 }
601
602 /* warn if we're not getting any response */
603 microuptime(&now);
604 if ((msgreq->lmr_errno != EINPROGRESS) &&
605 !(msg->lm_flags & LOCKD_MSG_DENIED_GRACE) &&
606 (nmp->nm_tprintf_initial_delay != 0) &&
607 ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
608 lck_mtx_unlock(&nmp->nm_lock);
609 lastmsg = now.tv_sec;
610 nfs_down(nmp, thd, 0, NFSSTA_LOCKTIMEO, "lockd not responding", 1);
611 wentdown = 1;
612 } else
613 lck_mtx_unlock(&nmp->nm_lock);
614
615 if (msgreq->lmr_errno == EINPROGRESS) {
616 /*
617 * We've got a blocked lock request that we are
618 * going to retry. First, we'll want to try to
619 * send a cancel for the previous request.
620 *
621 * Clear errno so if we don't get a response
622 * to the resend we'll call nfs_down().
623 * Also reset timeout because we'll expect a
624 * quick response to the cancel/resend (even if
625 * it is NLM_BLOCKED).
626 */
627 msg->lm_flags |= LOCKD_MSG_CANCEL;
628 nfs_lockdmsg_dequeue(msgreq);
629 msg->lm_xid = nfs_lockxid_get();
630 nfs_lockdmsg_enqueue(msgreq);
631 msgreq->lmr_saved_errno = msgreq->lmr_errno;
632 msgreq->lmr_errno = 0;
633 msgreq->lmr_answered = 0;
634 timeo = 2;
635 /* send cancel then resend request */
636 continue;
637 }
638
639 /*
640 * We timed out, so we will resend the request.
641 */
642 if (!(flags & R_RECOVER))
643 timeo *= 2;
644 if (timeo > 30)
645 timeo = 30;
646 /* resend request */
647 continue;
648 }
649
650 /* we got a reponse, so the server's lockd is OK */
651 nfs_up(NFSTONMP(np), thd, NFSSTA_LOCKTIMEO,
652 wentdown ? "lockd alive again" : NULL);
653 wentdown = 0;
654
655 if (msgreq->lmr_answered && (msg->lm_flags & LOCKD_MSG_DENIED_GRACE)) {
656 /*
657 * The lock request was denied because the server lockd is
658 * still in its grace period. So, we need to try the
659 * request again in a little bit. Return the GRACE error so
660 * the higher levels can perform the retry.
661 */
662 msgreq->lmr_saved_errno = msgreq->lmr_errno = error = NFSERR_GRACE;
663 }
664
665 if (msgreq->lmr_errno == EINPROGRESS) {
666 /* got NLM_BLOCKED response */
667 /* need to wait for NLM_GRANTED */
668 timeo = 30;
669 msgreq->lmr_answered = 0;
670 goto wait_for_granted;
671 }
672
673 if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
674 (msgreq->lmr_saved_errno == EINPROGRESS)) {
675 /*
676 * We just got a successful reply to the
677 * cancel of the previous blocked lock request.
678 * Now, go ahead and return a DENIED error so the
679 * higher levels can resend the request.
680 */
681 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
682 error = NFSERR_DENIED;
683 /* Will dequeue msgreq after the following break at the end of this routine */
684 break;
685 }
686
687 /*
688 * If the blocked lock request was cancelled.
689 * Restore the error condition from when we
690 * originally bailed on the request.
691 */
692 if (msg->lm_flags & LOCKD_MSG_CANCEL) {
693 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
694 error = msgreq->lmr_saved_errno;
695 } else {
696 error = msgreq->lmr_errno;
697 }
698
699 nmp = NFSTONMP(np);
700 if ((error == ENOTSUP) && nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK)) {
701 /*
702 * We have NO evidence that locks work and lockd
703 * returned ENOTSUP. Let's take this as a hint
704 * that locks aren't supported and disable them
705 * for this mount.
706 */
707 nfs_lockdmsg_dequeue(msgreq);
708 lck_mtx_unlock(nfs_lock_mutex);
709 lck_mtx_lock(&nmp->nm_lock);
710 if (nmp->nm_lockmode == NFS_LOCK_MODE_ENABLED) {
711 nmp->nm_lockmode = NFS_LOCK_MODE_DISABLED;
712 nfs_lockd_mount_unregister(nmp);
713 }
714 nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
715 lck_mtx_unlock(&nmp->nm_lock);
716 printf("lockd returned ENOTSUP, disabling locks for nfs server: %s\n",
717 vfs_statfs(nmp->nm_mountp)->f_mntfromname);
718 return (error);
719 }
720 if (!error) {
721 /* record that NFS file locking has worked on this mount */
722 if (nmp) {
723 lck_mtx_lock(&nmp->nm_lock);
724 if (!(nmp->nm_state & NFSSTA_LOCKSWORK))
725 nmp->nm_state |= NFSSTA_LOCKSWORK;
726 lck_mtx_unlock(&nmp->nm_lock);
727 }
728 }
729 break;
730 }
731
732 nfs_lockdmsg_dequeue(msgreq);
733
734 lck_mtx_unlock(nfs_lock_mutex);
735
736 return (error);
737 }
738
739 /*
740 * Send an NLM LOCK message to the server
741 */
742 int
743 nfs3_setlock_rpc(
744 nfsnode_t np,
745 struct nfs_open_file *nofp,
746 struct nfs_file_lock *nflp,
747 int reclaim,
748 int flags,
749 thread_t thd,
750 kauth_cred_t cred)
751 {
752 struct nfs_lock_owner *nlop = nflp->nfl_owner;
753 struct nfsmount *nmp;
754 int error;
755 LOCKD_MSG_REQUEST msgreq;
756 LOCKD_MSG *msg;
757
758 nmp = NFSTONMP(np);
759 if (nfs_mount_gone(nmp))
760 return (ENXIO);
761
762 if (!nlop->nlo_open_owner) {
763 nfs_open_owner_ref(nofp->nof_owner);
764 nlop->nlo_open_owner = nofp->nof_owner;
765 }
766 if ((error = nfs_lock_owner_set_busy(nlop, thd)))
767 return (error);
768
769 /* set up lock message request structure */
770 bzero(&msgreq, sizeof(msgreq));
771 msg = &msgreq.lmr_msg;
772 msg->lm_version = LOCKD_MSG_VERSION;
773 if ((nflp->nfl_flags & NFS_FILE_LOCK_WAIT) && !reclaim)
774 msg->lm_flags |= LOCKD_MSG_BLOCK;
775 if (reclaim)
776 msg->lm_flags |= LOCKD_MSG_RECLAIM;
777 msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
778 bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
779 cru2x(cred, &msg->lm_cred);
780
781 msg->lm_fl.l_whence = SEEK_SET;
782 msg->lm_fl.l_start = nflp->nfl_start;
783 msg->lm_fl.l_len = NFS_FLOCK_LENGTH(nflp->nfl_start, nflp->nfl_end);
784 msg->lm_fl.l_type = nflp->nfl_type;
785 msg->lm_fl.l_pid = nlop->nlo_pid;
786
787 error = nfs3_lockd_request(np, 0, &msgreq, flags, thd);
788
789 nfs_lock_owner_clear_busy(nlop);
790 return (error);
791 }
792
793 /*
794 * Send an NLM UNLOCK message to the server
795 */
796 int
797 nfs3_unlock_rpc(
798 nfsnode_t np,
799 struct nfs_lock_owner *nlop,
800 __unused int type,
801 uint64_t start,
802 uint64_t end,
803 int flags,
804 thread_t thd,
805 kauth_cred_t cred)
806 {
807 struct nfsmount *nmp;
808 LOCKD_MSG_REQUEST msgreq;
809 LOCKD_MSG *msg;
810
811 nmp = NFSTONMP(np);
812 if (!nmp)
813 return (ENXIO);
814
815 /* set up lock message request structure */
816 bzero(&msgreq, sizeof(msgreq));
817 msg = &msgreq.lmr_msg;
818 msg->lm_version = LOCKD_MSG_VERSION;
819 msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
820 bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
821 cru2x(cred, &msg->lm_cred);
822
823 msg->lm_fl.l_whence = SEEK_SET;
824 msg->lm_fl.l_start = start;
825 msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
826 msg->lm_fl.l_type = F_UNLCK;
827 msg->lm_fl.l_pid = nlop->nlo_pid;
828
829 return (nfs3_lockd_request(np, F_UNLCK, &msgreq, flags, thd));
830 }
831
832 /*
833 * Send an NLM LOCK TEST message to the server
834 */
835 int
836 nfs3_getlock_rpc(
837 nfsnode_t np,
838 struct nfs_lock_owner *nlop,
839 struct flock *fl,
840 uint64_t start,
841 uint64_t end,
842 vfs_context_t ctx)
843 {
844 struct nfsmount *nmp;
845 int error;
846 LOCKD_MSG_REQUEST msgreq;
847 LOCKD_MSG *msg;
848
849 nmp = NFSTONMP(np);
850 if (nfs_mount_gone(nmp))
851 return (ENXIO);
852
853 /* set up lock message request structure */
854 bzero(&msgreq, sizeof(msgreq));
855 msg = &msgreq.lmr_msg;
856 msg->lm_version = LOCKD_MSG_VERSION;
857 msg->lm_flags |= LOCKD_MSG_TEST;
858 msg->lm_fh_len = (nmp->nm_vers == NFS_VER2) ? NFSX_V2FH : np->n_fhsize;
859 bcopy(np->n_fhp, msg->lm_fh, msg->lm_fh_len);
860 cru2x(vfs_context_ucred(ctx), &msg->lm_cred);
861
862 msg->lm_fl.l_whence = SEEK_SET;
863 msg->lm_fl.l_start = start;
864 msg->lm_fl.l_len = NFS_FLOCK_LENGTH(start, end);
865 msg->lm_fl.l_type = fl->l_type;
866 msg->lm_fl.l_pid = nlop->nlo_pid;
867
868 error = nfs3_lockd_request(np, 0, &msgreq, 0, vfs_context_thread(ctx));
869
870 if (!error && (msg->lm_flags & LOCKD_MSG_TEST) && !msgreq.lmr_errno) {
871 if (msg->lm_fl.l_type != F_UNLCK) {
872 fl->l_type = msg->lm_fl.l_type;
873 fl->l_pid = msg->lm_fl.l_pid;
874 fl->l_start = msg->lm_fl.l_start;
875 fl->l_len = msg->lm_fl.l_len;
876 fl->l_whence = SEEK_SET;
877 } else
878 fl->l_type = F_UNLCK;
879 }
880
881 return (error);
882 }
883
884 /*
885 * nfslockdans --
886 * NFS advisory byte-level locks answer from the lock daemon.
887 */
888 int
889 nfslockdans(proc_t p, struct lockd_ans *ansp)
890 {
891 LOCKD_MSG_REQUEST *msgreq;
892 int error;
893
894 /* Let root make this call. */
895 error = proc_suser(p);
896 if (error)
897 return (error);
898
899 /* the version should match, or we're out of sync */
900 if (ansp->la_version != LOCKD_ANS_VERSION)
901 return (EINVAL);
902
903 lck_mtx_lock(nfs_lock_mutex);
904
905 /* try to find the lockd message by transaction id (cookie) */
906 msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
907 if (ansp->la_flags & LOCKD_ANS_GRANTED) {
908 /*
909 * We can't depend on the granted message having our cookie,
910 * so we check the answer against the lockd message found.
911 * If no message was found or it doesn't match the answer,
912 * we look for the lockd message by the answer's lock info.
913 */
914 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp))
915 msgreq = nfs_lockdmsg_find_by_answer(ansp);
916 /*
917 * We need to make sure this request isn't being cancelled
918 * If it is, we don't want to accept the granted message.
919 */
920 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
921 msgreq = NULL;
922 }
923 if (!msgreq) {
924 lck_mtx_unlock(nfs_lock_mutex);
925 return (EPIPE);
926 }
927
928 msgreq->lmr_errno = ansp->la_errno;
929 if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
930 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
931 if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
932 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
933 else
934 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
935 msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
936 msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
937 msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
938 } else {
939 msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
940 }
941 }
942 if (ansp->la_flags & LOCKD_ANS_DENIED_GRACE)
943 msgreq->lmr_msg.lm_flags |= LOCKD_MSG_DENIED_GRACE;
944
945 msgreq->lmr_answered = 1;
946 lck_mtx_unlock(nfs_lock_mutex);
947 wakeup(msgreq);
948
949 return (0);
950 }
951
952 /*
953 * nfslockdnotify --
954 * NFS host restart notification from the lock daemon.
955 *
956 * Used to initiate reclaiming of held locks when a server we
957 * have mounted reboots.
958 */
959 int
960 nfslockdnotify(proc_t p, user_addr_t argp)
961 {
962 int error, i, headsize;
963 struct lockd_notify ln;
964 struct nfsmount *nmp;
965 struct sockaddr *saddr;
966
967 /* Let root make this call. */
968 error = proc_suser(p);
969 if (error)
970 return (error);
971
972 headsize = (char*)&ln.ln_addr[0] - (char*)&ln.ln_version;
973 error = copyin(argp, &ln, headsize);
974 if (error)
975 return (error);
976 if (ln.ln_version != LOCKD_NOTIFY_VERSION)
977 return (EINVAL);
978 if ((ln.ln_addrcount < 1) || (ln.ln_addrcount > 128))
979 return (EINVAL);
980 argp += headsize;
981 saddr = (struct sockaddr *)&ln.ln_addr[0];
982
983 lck_mtx_lock(nfs_lock_mutex);
984
985 for (i=0; i < ln.ln_addrcount; i++) {
986 error = copyin(argp, &ln.ln_addr[0], sizeof(ln.ln_addr[0]));
987 if (error)
988 break;
989 argp += sizeof(ln.ln_addr[0]);
990 /* scan lockd mount list for match to this address */
991 TAILQ_FOREACH(nmp, &nfs_lockd_mount_list, nm_ldlink) {
992 /* check if address matches this mount's server address */
993 if (!nmp->nm_saddr || nfs_sockaddr_cmp(saddr, nmp->nm_saddr))
994 continue;
995 /* We have a match! Mark it as needing recovery. */
996 lck_mtx_lock(&nmp->nm_lock);
997 nfs_need_recover(nmp, 0);
998 lck_mtx_unlock(&nmp->nm_lock);
999 }
1000 }
1001
1002 lck_mtx_unlock(nfs_lock_mutex);
1003
1004 return (error);
1005 }
1006