]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_lock.c
xnu-792.6.61.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_lock.c
1 /*
2 * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*-
23 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
24 *
25 * Redistribution and use in source and binary forms, with or without
26 * modification, are permitted provided that the following conditions
27 * are met:
28 * 1. Redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer.
30 * 2. Redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution.
33 * 3. Berkeley Software Design Inc's name may not be used to endorse or
34 * promote products derived from this software without specific prior
35 * written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 *
49 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
50 */
51
52 #include <sys/cdefs.h>
53 #include <sys/param.h>
54 #include <sys/systm.h>
55 #include <sys/fcntl.h>
56 #include <sys/kernel.h> /* for hz */
57 #include <sys/file_internal.h>
58 #include <sys/malloc.h>
59 #include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
60 #include <sys/kpi_mbuf.h>
61 #include <sys/mount_internal.h>
62 #include <sys/proc_internal.h> /* for p_start */
63 #include <sys/kauth.h>
64 #include <sys/resourcevar.h>
65 #include <sys/socket.h>
66 #include <sys/unistd.h>
67 #include <sys/user.h>
68 #include <sys/vnode_internal.h>
69
70 #include <kern/thread.h>
71
72 #include <machine/limits.h>
73
74 #include <net/if.h>
75
76 #include <nfs/rpcv2.h>
77 #include <nfs/nfsproto.h>
78 #include <nfs/nfs.h>
79 #include <nfs/nfsmount.h>
80 #include <nfs/nfsnode.h>
81 #include <nfs/nfs_lock.h>
82
83 #define OFF_MAX QUAD_MAX
84
85 /*
86 * globals for managing the lockd fifo
87 */
88 vnode_t nfslockdvnode = 0;
89 int nfslockdwaiting = 0;
90 time_t nfslockdstarttimeout = 0;
91 int nfslockdfifolock = 0;
92 #define NFSLOCKDFIFOLOCK_LOCKED 1
93 #define NFSLOCKDFIFOLOCK_WANT 2
94
95 /*
96 * pending lock request messages are kept in this queue which is
97 * kept sorted by transaction ID (xid).
98 */
99 uint64_t nfs_lockxid = 0;
100 LOCKD_MSG_QUEUE nfs_pendlockq;
101
102 /*
103 * This structure is used to identify processes which have acquired NFS locks.
104 * Knowing which processes have ever acquired locks allows us to short-circuit
105 * unlock requests for processes that have never had an NFS file lock. Thus
106 * avoiding a costly and unnecessary lockd request.
107 */
108 struct nfs_lock_pid {
109 TAILQ_ENTRY(nfs_lock_pid) lp_lru; /* LRU list */
110 LIST_ENTRY(nfs_lock_pid) lp_hash; /* hash chain */
111 int lp_valid; /* valid entry? */
112 int lp_time; /* last time seen valid */
113 pid_t lp_pid; /* The process ID. */
114 struct timeval lp_pid_start; /* Start time of process id */
115 };
116
117 #define NFS_LOCK_PID_HASH_SIZE 64 // XXX tune me
118 #define NFS_LOCK_PID_HASH(pid) \
119 (&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash])
120 LIST_HEAD(, nfs_lock_pid) *nfs_lock_pid_hash_tbl;
121 TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_lru;
122 u_long nfs_lock_pid_hash;
123 int nfs_lock_pid_lock;
124
125
126 /*
127 * initialize global nfs lock state
128 */
129 void
130 nfs_lockinit(void)
131 {
132 TAILQ_INIT(&nfs_pendlockq);
133 nfs_lock_pid_lock = 0;
134 nfs_lock_pid_hash_tbl = hashinit(NFS_LOCK_PID_HASH_SIZE,
135 M_TEMP, &nfs_lock_pid_hash);
136 TAILQ_INIT(&nfs_lock_pid_lru);
137 }
138
139 /*
140 * insert a lock request message into the pending queue
141 */
142 static inline void
143 nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
144 {
145 LOCKD_MSG_REQUEST *mr;
146
147 mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
148 if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
149 /* fast path: empty queue or new largest xid */
150 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
151 return;
152 }
153 /* slow path: need to walk list to find insertion point */
154 while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
155 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
156 }
157 if (mr) {
158 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
159 } else {
160 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
161 }
162 }
163
164 /*
165 * remove a lock request message from the pending queue
166 */
167 static inline void
168 nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
169 {
170 TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
171 }
172
173 /*
174 * find a pending lock request message by xid
175 *
176 * We search from the head of the list assuming that the message we're
177 * looking for is for an older request (because we have an answer to it).
178 * This assumes that lock request will be answered primarily in FIFO order.
179 * However, this may not be the case if there are blocked requests. We may
180 * want to move blocked requests to a separate queue (but that'll complicate
181 * duplicate xid checking).
182 */
183 static inline LOCKD_MSG_REQUEST *
184 nfs_lockdmsg_find_by_xid(uint64_t lockxid)
185 {
186 LOCKD_MSG_REQUEST *mr;
187
188 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
189 if (mr->lmr_msg.lm_xid == lockxid)
190 return mr;
191 if (mr->lmr_msg.lm_xid > lockxid)
192 return NULL;
193 }
194 return mr;
195 }
196
197 /*
198 * Because we can't depend on nlm_granted messages containing the same
199 * cookie we sent with the original lock request, we need code test if
200 * an nlm_granted answer matches the lock request. We also need code
201 * that can find a lockd message based solely on the nlm_granted answer.
202 */
203
204 /*
205 * compare lockd message to answer
206 *
207 * returns 0 on equality and 1 if different
208 */
209 static inline int
210 nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
211 {
212 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
213 return 1;
214 if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
215 return 1;
216 if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
217 return 1;
218 if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
219 return 1;
220 if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
221 return 1;
222 if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
223 return 1;
224 return 0;
225 }
226
227 /*
228 * find a pending lock request message based on the lock info provided
229 * in the lockd_ans/nlm_granted data. We need this because we can't
230 * depend on nlm_granted messages containing the same cookie we sent
231 * with the original lock request.
232 *
233 * We search from the head of the list assuming that the message we're
234 * looking for is for an older request (because we have an answer to it).
235 * This assumes that lock request will be answered primarily in FIFO order.
236 * However, this may not be the case if there are blocked requests. We may
237 * want to move blocked requests to a separate queue (but that'll complicate
238 * duplicate xid checking).
239 */
240 static inline LOCKD_MSG_REQUEST *
241 nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
242 {
243 LOCKD_MSG_REQUEST *mr;
244
245 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
246 return NULL;
247 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
248 if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
249 break;
250 }
251 return mr;
252 }
253
254 /*
255 * return the next unique lock request transaction ID
256 */
257 static inline uint64_t
258 nfs_lockxid_get(void)
259 {
260 LOCKD_MSG_REQUEST *mr;
261
262 /* derive initial lock xid from system time */
263 if (!nfs_lockxid) {
264 /*
265 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
266 * due to a broken clock) because we immediately increment it
267 * and we guarantee to never use xid 0. So, nfs_lockxid should only
268 * ever be 0 the first time this function is called.
269 */
270 struct timeval tv;
271 microtime(&tv);
272 nfs_lockxid = (uint64_t)tv.tv_sec << 12;
273 }
274
275 /* make sure we get a unique xid */
276 do {
277 /* Skip zero xid if it should ever happen. */
278 if (++nfs_lockxid == 0)
279 nfs_lockxid++;
280 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
281 (mr->lmr_msg.lm_xid < nfs_lockxid)) {
282 /* fast path: empty queue or new largest xid */
283 break;
284 }
285 /* check if xid is already in use */
286 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
287
288 return nfs_lockxid;
289 }
290
291
292 /*
293 * Check the nfs_lock_pid hash table for an entry and, if requested,
294 * add the entry if it is not found.
295 *
296 * (Also, if adding, try to clean up some stale entries.)
297 */
298 static int
299 nfs_lock_pid_check(proc_t p, int addflag, vnode_t vp)
300 {
301 struct nfs_lock_pid *lp, *lplru, *lplru_next;
302 proc_t plru;
303 int error = 0;
304 struct timeval now;
305
306 /* lock hash */
307 loop:
308 if (nfs_lock_pid_lock) {
309 struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
310 while (nfs_lock_pid_lock) {
311 nfs_lock_pid_lock = -1;
312 tsleep(&nfs_lock_pid_lock, PCATCH, "nfslockpid", 0);
313 if ((error = nfs_sigintr(nmp, NULL, p)))
314 return (error);
315 }
316 goto loop;
317 }
318 nfs_lock_pid_lock = 1;
319
320 /* Search hash chain */
321 error = ENOENT;
322 lp = NFS_LOCK_PID_HASH(proc_pid(p))->lh_first;
323 for (; lp != NULL; lp = lp->lp_hash.le_next)
324 if (lp->lp_pid == proc_pid(p)) {
325 /* found pid... */
326 if (timevalcmp(&lp->lp_pid_start, &p->p_stats->p_start, ==)) {
327 /* ...and it's valid */
328 /* move to tail of LRU */
329 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
330 microuptime(&now);
331 lp->lp_time = now.tv_sec;
332 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
333 error = 0;
334 break;
335 }
336 /* ...but it's no longer valid */
337 /* remove from hash, invalidate, and move to lru head */
338 LIST_REMOVE(lp, lp_hash);
339 lp->lp_valid = 0;
340 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
341 TAILQ_INSERT_HEAD(&nfs_lock_pid_lru, lp, lp_lru);
342 lp = NULL;
343 break;
344 }
345
346 /* if we didn't find it (valid) and we've been asked to add it */
347 if ((error == ENOENT) && addflag) {
348 /* scan lru list for invalid, stale entries to reuse/free */
349 int lrucnt = 0;
350 microuptime(&now);
351 for (lplru = TAILQ_FIRST(&nfs_lock_pid_lru); lplru; lplru = lplru_next) {
352 lplru_next = TAILQ_NEXT(lplru, lp_lru);
353 if (lplru->lp_valid && (lplru->lp_time >= (now.tv_sec - 2))) {
354 /*
355 * If the oldest LRU entry is relatively new, then don't
356 * bother scanning any further.
357 */
358 break;
359 }
360 /* remove entry from LRU, and check if it's still in use */
361 TAILQ_REMOVE(&nfs_lock_pid_lru, lplru, lp_lru);
362 if (!lplru->lp_valid || !(plru = pfind(lplru->lp_pid)) ||
363 timevalcmp(&lplru->lp_pid_start, &plru->p_stats->p_start, !=)) {
364 /* no longer in use */
365 LIST_REMOVE(lplru, lp_hash);
366 if (!lp) {
367 /* we'll reuse this one */
368 lp = lplru;
369 } else {
370 /* we can free this one */
371 FREE(lplru, M_TEMP);
372 }
373 } else {
374 /* still in use */
375 lplru->lp_time = now.tv_sec;
376 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lplru, lp_lru);
377 }
378 /* don't check too many entries at once */
379 if (++lrucnt > 8)
380 break;
381 }
382 if (!lp) {
383 /* we need to allocate a new one */
384 MALLOC(lp, struct nfs_lock_pid *, sizeof(struct nfs_lock_pid),
385 M_TEMP, M_WAITOK | M_ZERO);
386 }
387 if (!lp) {
388 error = ENOMEM;
389 } else {
390 /* (re)initialize nfs_lock_pid info */
391 lp->lp_pid = proc_pid(p);
392 lp->lp_pid_start = p->p_stats->p_start;
393 /* insert pid in hash */
394 LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp->lp_pid), lp, lp_hash);
395 lp->lp_valid = 1;
396 lp->lp_time = now.tv_sec;
397 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
398 error = 0;
399 }
400 }
401
402 /* unlock hash */
403 if (nfs_lock_pid_lock < 0) {
404 nfs_lock_pid_lock = 0;
405 wakeup(&nfs_lock_pid_lock);
406 } else
407 nfs_lock_pid_lock = 0;
408
409 return (error);
410 }
411
412
413 /*
414 * nfs_advlock --
415 * NFS advisory byte-level locks.
416 */
417 int
418 nfs_dolock(struct vnop_advlock_args *ap)
419 /* struct vnop_advlock_args {
420 struct vnodeop_desc *a_desc;
421 vnode_t a_vp;
422 caddr_t a_id;
423 int a_op;
424 struct flock *a_fl;
425 int a_flags;
426 vfs_context_t a_context;
427 }; */
428 {
429 LOCKD_MSG_REQUEST msgreq;
430 LOCKD_MSG *msg;
431 vnode_t vp, wvp;
432 struct nfsnode *np;
433 int error, error1;
434 struct flock *fl;
435 int fmode, ioflg;
436 struct nfsmount *nmp;
437 struct nfs_vattr nvattr;
438 off_t start, end;
439 struct timeval now;
440 int timeo, endtime, lastmsg, wentdown = 0;
441 int lockpidcheck;
442 kauth_cred_t cred;
443 proc_t p;
444 struct sockaddr *saddr;
445
446 p = vfs_context_proc(ap->a_context);
447 cred = vfs_context_ucred(ap->a_context);
448
449 vp = ap->a_vp;
450 fl = ap->a_fl;
451 np = VTONFS(vp);
452
453 nmp = VFSTONFS(vnode_mount(vp));
454 if (!nmp)
455 return (ENXIO);
456 if (nmp->nm_flag & NFSMNT_NOLOCKS)
457 return (ENOTSUP);
458
459 /*
460 * The NLM protocol doesn't allow the server to return an error
461 * on ranges, so we do it. Pre LFS (Large File Summit)
462 * standards required EINVAL for the range errors. More recent
463 * standards use EOVERFLOW, but their EINVAL wording still
464 * encompasses these errors.
465 * Any code sensitive to this is either:
466 * 1) written pre-LFS and so can handle only EINVAL, or
467 * 2) written post-LFS and thus ought to be tolerant of pre-LFS
468 * implementations.
469 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
470 */
471 if (fl->l_whence != SEEK_END) {
472 if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
473 fl->l_start < 0 ||
474 (fl->l_len > 0 && fl->l_len - 1 > OFF_MAX - fl->l_start) ||
475 (fl->l_len < 0 && fl->l_start + fl->l_len < 0))
476 return (EINVAL);
477 }
478 /*
479 * If daemon is running take a ref on its fifo vnode
480 */
481 if (!(wvp = nfslockdvnode)) {
482 if (!nfslockdwaiting && !nfslockdstarttimeout)
483 return (ENOTSUP);
484 /*
485 * Don't wake lock daemon if it hasn't been started yet and
486 * this is an unlock request (since we couldn't possibly
487 * actually have a lock on the file). This could be an
488 * uninformed unlock request due to closef()'s behavior of doing
489 * unlocks on all files if a process has had a lock on ANY file.
490 */
491 if (!nfslockdvnode && (fl->l_type == F_UNLCK))
492 return (EINVAL);
493 microuptime(&now);
494 if (nfslockdwaiting) {
495 /* wake up lock daemon */
496 nfslockdstarttimeout = now.tv_sec + 60;
497 (void)wakeup((void *)&nfslockdwaiting);
498 }
499 /* wait on nfslockdvnode for a while to allow daemon to start */
500 while (!nfslockdvnode && (now.tv_sec < nfslockdstarttimeout)) {
501 error = tsleep((void *)&nfslockdvnode, PCATCH | PUSER, "lockdstart", 2*hz);
502 if (error && (error != EWOULDBLOCK))
503 return (error);
504 /* check that we still have our mount... */
505 /* ...and that we still support locks */
506 nmp = VFSTONFS(vnode_mount(vp));
507 if (!nmp)
508 return (ENXIO);
509 if (nmp->nm_flag & NFSMNT_NOLOCKS)
510 return (ENOTSUP);
511 if (!error)
512 break;
513 microuptime(&now);
514 }
515 /*
516 * check for nfslockdvnode
517 * If it hasn't started by now, there's a problem.
518 */
519 if (!(wvp = nfslockdvnode))
520 return (ENOTSUP);
521 }
522 error = vnode_getwithref(wvp);
523 if (error)
524 return (ENOTSUP);
525 error = vnode_ref(wvp);
526 if (error) {
527 vnode_put(wvp);
528 return (ENOTSUP);
529 }
530
531 /*
532 * Need to check if this process has successfully acquired an NFS lock before.
533 * If not, and this is an unlock request we can simply return success here.
534 */
535 lockpidcheck = nfs_lock_pid_check(p, 0, vp);
536 if (lockpidcheck) {
537 if (lockpidcheck != ENOENT) {
538 vnode_rele(wvp);
539 vnode_put(wvp);
540 return (lockpidcheck);
541 }
542 if (ap->a_op == F_UNLCK) {
543 vnode_rele(wvp);
544 vnode_put(wvp);
545 return (0);
546 }
547 }
548
549 /*
550 * The NFS Lock Manager protocol doesn't directly handle
551 * negative lengths or SEEK_END, so we need to normalize
552 * things here where we have all the info.
553 * (Note: SEEK_CUR is already adjusted for at this point)
554 */
555 /* Convert the flock structure into a start and end. */
556 switch (fl->l_whence) {
557 case SEEK_SET:
558 case SEEK_CUR:
559 /*
560 * Caller is responsible for adding any necessary offset
561 * to fl->l_start when SEEK_CUR is used.
562 */
563 start = fl->l_start;
564 break;
565 case SEEK_END:
566 /* need to flush, and refetch attributes to make */
567 /* sure we have the correct end of file offset */
568 if (np->n_flag & NMODIFIED) {
569 NATTRINVALIDATE(np);
570 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
571 if (error) {
572 vnode_rele(wvp);
573 vnode_put(wvp);
574 return (error);
575 }
576 }
577 NATTRINVALIDATE(np);
578
579 error = nfs_getattr(vp, &nvattr, cred, p);
580 if (error) {
581 vnode_rele(wvp);
582 vnode_put(wvp);
583 return (error);
584 }
585 start = np->n_size + fl->l_start;
586 break;
587 default:
588 vnode_rele(wvp);
589 vnode_put(wvp);
590 return (EINVAL);
591 }
592 if (fl->l_len == 0)
593 end = -1;
594 else if (fl->l_len > 0)
595 end = start + fl->l_len - 1;
596 else { /* l_len is negative */
597 end = start - 1;
598 start += fl->l_len;
599 }
600 if (start < 0) {
601 vnode_rele(wvp);
602 vnode_put(wvp);
603 return (EINVAL);
604 }
605 if (!NFS_ISV3(vp) &&
606 ((start >= 0x80000000) || (end >= 0x80000000))) {
607 vnode_rele(wvp);
608 vnode_put(wvp);
609 return (EINVAL);
610 }
611
612 /*
613 * Fill in the information structure.
614 */
615 msgreq.lmr_answered = 0;
616 msgreq.lmr_errno = 0;
617 msgreq.lmr_saved_errno = 0;
618 msg = &msgreq.lmr_msg;
619 msg->lm_version = LOCKD_MSG_VERSION;
620 msg->lm_flags = 0;
621
622 msg->lm_fl = *fl;
623 msg->lm_fl.l_start = start;
624 if (end != -1)
625 msg->lm_fl.l_len = end - start + 1;
626 msg->lm_fl.l_pid = proc_pid(p);
627
628 if (ap->a_flags & F_WAIT)
629 msg->lm_flags |= LOCKD_MSG_BLOCK;
630 if (ap->a_op == F_GETLK)
631 msg->lm_flags |= LOCKD_MSG_TEST;
632
633 nmp = VFSTONFS(vnode_mount(vp));
634 if (!nmp) {
635 vnode_rele(wvp);
636 vnode_put(wvp);
637 return (ENXIO);
638 }
639
640 saddr = mbuf_data(nmp->nm_nam);
641 bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
642 msg->lm_fh_len = NFS_ISV3(vp) ? VTONFS(vp)->n_fhsize : NFSX_V2FH;
643 bcopy(VTONFS(vp)->n_fhp, msg->lm_fh, msg->lm_fh_len);
644 if (NFS_ISV3(vp))
645 msg->lm_flags |= LOCKD_MSG_NFSV3;
646 cru2x(cred, &msg->lm_cred);
647
648 microuptime(&now);
649 lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
650
651 fmode = FFLAGS(O_WRONLY);
652 if ((error = VNOP_OPEN(wvp, fmode, ap->a_context))) {
653 vnode_rele(wvp);
654 vnode_put(wvp);
655 return (error);
656 }
657 vnode_lock(wvp);
658 ++wvp->v_writecount;
659 vnode_unlock(wvp);
660
661 /* allocate unique xid */
662 msg->lm_xid = nfs_lockxid_get();
663 nfs_lockdmsg_enqueue(&msgreq);
664
665 timeo = 2*hz;
666 #define IO_NOMACCHECK 0;
667 ioflg = IO_UNIT | IO_NOMACCHECK;
668 for (;;) {
669 error = 0;
670 while (nfslockdfifolock & NFSLOCKDFIFOLOCK_LOCKED) {
671 nfslockdfifolock |= NFSLOCKDFIFOLOCK_WANT;
672 error = tsleep((void *)&nfslockdfifolock,
673 PCATCH | PUSER, "lockdfifo", 20*hz);
674 if (error)
675 break;
676 }
677 if (error)
678 break;
679 nfslockdfifolock |= NFSLOCKDFIFOLOCK_LOCKED;
680
681 error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)msg, sizeof(*msg), 0,
682 UIO_SYSSPACE32, ioflg, proc_ucred(kernproc), NULL, p);
683
684 nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_LOCKED;
685 if (nfslockdfifolock & NFSLOCKDFIFOLOCK_WANT) {
686 nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_WANT;
687 wakeup((void *)&nfslockdfifolock);
688 }
689
690 if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) {
691 break;
692 }
693
694 /*
695 * Always wait for an answer. Not waiting for unlocks could
696 * cause a lock to be left if the unlock request gets dropped.
697 */
698
699 /*
700 * Retry if it takes too long to get a response.
701 *
702 * The timeout numbers were picked out of thin air... they start
703 * at 2 and double each timeout with a max of 60 seconds.
704 *
705 * In order to maintain responsiveness, we pass a small timeout
706 * to tsleep and calculate the timeouts ourselves. This allows
707 * us to pick up on mount changes quicker.
708 */
709 wait_for_granted:
710 error = EWOULDBLOCK;
711 microuptime(&now);
712 if ((timeo/hz) > 0)
713 endtime = now.tv_sec + timeo/hz;
714 else
715 endtime = now.tv_sec + 1;
716 while (now.tv_sec < endtime) {
717 error = tsleep((void *)&msgreq, PCATCH | PUSER, "lockd", 2*hz);
718 if (msgreq.lmr_answered) {
719 /*
720 * Note: it's possible to have a lock granted at
721 * essentially the same time that we get interrupted.
722 * Since the lock may be granted, we can't return an
723 * error from this request or we might not unlock the
724 * lock that's been granted.
725 */
726 error = 0;
727 break;
728 }
729 if (error != EWOULDBLOCK)
730 break;
731 /* check that we still have our mount... */
732 /* ...and that we still support locks */
733 nmp = VFSTONFS(vnode_mount(vp));
734 if (!nmp || (nmp->nm_flag & NFSMNT_NOLOCKS))
735 break;
736 /*
737 * If the mount is hung and we've requested not to hang
738 * on remote filesystems, then bail now.
739 */
740 if ((p != NULL) && ((proc_noremotehang(p)) != 0) &&
741 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) {
742 if (fl->l_type == F_UNLCK)
743 printf("nfs_dolock: aborting unlock request "
744 "due to timeout (noremotehang)\n");
745 error = EIO;
746 break;
747 }
748 microuptime(&now);
749 }
750 if (error) {
751 /* check that we still have our mount... */
752 nmp = VFSTONFS(vnode_mount(vp));
753 if (!nmp) {
754 if (error == EWOULDBLOCK)
755 error = ENXIO;
756 break;
757 }
758 /* ...and that we still support locks */
759 if (nmp->nm_flag & NFSMNT_NOLOCKS) {
760 if (error == EWOULDBLOCK)
761 error = ENOTSUP;
762 break;
763 }
764 if ((error == ENOTSUP) &&
765 (nmp->nm_state & NFSSTA_LOCKSWORK)) {
766 /*
767 * We have evidence that locks work, yet lockd
768 * returned ENOTSUP. This is probably because
769 * it was unable to contact the server's lockd to
770 * send it the request.
771 *
772 * Because we know locks work, we'll consider
773 * this failure to be a timeout.
774 */
775 error = EWOULDBLOCK;
776 }
777 if (error != EWOULDBLOCK) {
778 /*
779 * We're going to bail on this request.
780 * If we were a blocked lock request, send a cancel.
781 */
782 if ((msgreq.lmr_errno == EINPROGRESS) &&
783 !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
784 /* set this request up as a cancel */
785 msg->lm_flags |= LOCKD_MSG_CANCEL;
786 nfs_lockdmsg_dequeue(&msgreq);
787 msg->lm_xid = nfs_lockxid_get();
788 nfs_lockdmsg_enqueue(&msgreq);
789 msgreq.lmr_saved_errno = error;
790 msgreq.lmr_errno = 0;
791 msgreq.lmr_answered = 0;
792 /* reset timeout */
793 timeo = 2*hz;
794 /* send cancel request */
795 continue;
796 }
797 break;
798 }
799
800 /*
801 * If the mount is hung and we've requested not to hang
802 * on remote filesystems, then bail now.
803 */
804 if ((p != NULL) && ((proc_noremotehang(p)) != 0) &&
805 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) {
806 if (fl->l_type == F_UNLCK)
807 printf("nfs_dolock: aborting unlock request "
808 "due to timeout (noremotehang)\n");
809 error = EIO;
810 break;
811 }
812 /* warn if we're not getting any response */
813 microuptime(&now);
814 if ((msgreq.lmr_errno != EINPROGRESS) &&
815 (nmp->nm_tprintf_initial_delay != 0) &&
816 ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
817 lastmsg = now.tv_sec;
818 nfs_down(nmp, p, 0, NFSSTA_LOCKTIMEO, "lockd not responding");
819 wentdown = 1;
820 }
821 if (msgreq.lmr_errno == EINPROGRESS) {
822 /*
823 * We've got a blocked lock request that we are
824 * going to retry. First, we'll want to try to
825 * send a cancel for the previous request.
826 *
827 * Clear errno so if we don't get a response
828 * to the resend we'll call nfs_down().
829 * Also reset timeout because we'll expect a
830 * quick response to the cancel/resend (even if
831 * it is NLM_BLOCKED).
832 */
833 msg->lm_flags |= LOCKD_MSG_CANCEL;
834 nfs_lockdmsg_dequeue(&msgreq);
835 msg->lm_xid = nfs_lockxid_get();
836 nfs_lockdmsg_enqueue(&msgreq);
837 msgreq.lmr_saved_errno = msgreq.lmr_errno;
838 msgreq.lmr_errno = 0;
839 msgreq.lmr_answered = 0;
840 timeo = 2*hz;
841 /* send cancel then resend request */
842 continue;
843 }
844 /*
845 * We timed out, so we will rewrite the request
846 * to the fifo, but only if it isn't already full.
847 */
848 ioflg |= IO_NDELAY;
849 timeo *= 2;
850 if (timeo > 60*hz)
851 timeo = 60*hz;
852 /* resend request */
853 continue;
854 }
855
856 /* we got a reponse, so the server's lockd is OK */
857 nfs_up(VFSTONFS(vnode_mount(vp)), p, NFSSTA_LOCKTIMEO,
858 wentdown ? "lockd alive again" : NULL);
859 wentdown = 0;
860
861 if (msgreq.lmr_errno == EINPROGRESS) {
862 /* got NLM_BLOCKED response */
863 /* need to wait for NLM_GRANTED */
864 timeo = 60*hz;
865 msgreq.lmr_answered = 0;
866 goto wait_for_granted;
867 }
868
869 if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
870 (msgreq.lmr_saved_errno == EINPROGRESS)) {
871 /*
872 * We just got a successful reply to the
873 * cancel of the previous blocked lock request.
874 * Now, go ahead and resend the request.
875 */
876 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
877 nfs_lockdmsg_dequeue(&msgreq);
878 msg->lm_xid = nfs_lockxid_get();
879 nfs_lockdmsg_enqueue(&msgreq);
880 msgreq.lmr_saved_errno = 0;
881 msgreq.lmr_errno = 0;
882 msgreq.lmr_answered = 0;
883 timeo = 2*hz;
884 /* resend request */
885 continue;
886 }
887
888 if ((msg->lm_flags & LOCKD_MSG_TEST) && msgreq.lmr_errno == 0) {
889 if (msg->lm_fl.l_type != F_UNLCK) {
890 fl->l_type = msg->lm_fl.l_type;
891 fl->l_pid = msg->lm_fl.l_pid;
892 fl->l_start = msg->lm_fl.l_start;
893 fl->l_len = msg->lm_fl.l_len;
894 fl->l_whence = SEEK_SET;
895 } else {
896 fl->l_type = F_UNLCK;
897 }
898 }
899
900 /*
901 * If the blocked lock request was cancelled.
902 * Restore the error condition from when we
903 * originally bailed on the request.
904 */
905 if (msg->lm_flags & LOCKD_MSG_CANCEL) {
906 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
907 error = msgreq.lmr_saved_errno;
908 } else
909 error = msgreq.lmr_errno;
910
911 if (!error) {
912 /* record that NFS file locking has worked on this mount */
913 nmp = VFSTONFS(vnode_mount(vp));
914 if (nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK))
915 nmp->nm_state |= NFSSTA_LOCKSWORK;
916 /*
917 * If we successfully acquired a lock, make sure this pid
918 * is in the nfs_lock_pid hash table so we know we can't
919 * short-circuit unlock requests.
920 */
921 if ((lockpidcheck == ENOENT) &&
922 ((ap->a_op == F_SETLK) || (ap->a_op == F_SETLKW)))
923 nfs_lock_pid_check(p, 1, vp);
924
925 }
926 break;
927 }
928
929 nfs_lockdmsg_dequeue(&msgreq);
930
931 error1 = VNOP_CLOSE(wvp, FWRITE, ap->a_context);
932 vnode_rele(wvp);
933 vnode_put(wvp);
934 /* prefer any previous 'error' to our vn_close 'error1'. */
935 return (error != 0 ? error : error1);
936 }
937
938 /*
939 * nfslockdans --
940 * NFS advisory byte-level locks answer from the lock daemon.
941 */
942 int
943 nfslockdans(proc_t p, struct lockd_ans *ansp)
944 {
945 LOCKD_MSG_REQUEST *msgreq;
946 int error;
947
948 /* Let root make this call. */
949 error = proc_suser(p);
950 if (error)
951 return (error);
952
953 /* the version should match, or we're out of sync */
954 if (ansp->la_version != LOCKD_ANS_VERSION)
955 return (EINVAL);
956
957 /* try to find the lockd message by transaction id (cookie) */
958 msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
959 if (ansp->la_flags & LOCKD_ANS_GRANTED) {
960 /*
961 * We can't depend on the granted message having our cookie,
962 * so we check the answer against the lockd message found.
963 * If no message was found or it doesn't match the answer,
964 * we look for the lockd message by the answer's lock info.
965 */
966 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp))
967 msgreq = nfs_lockdmsg_find_by_answer(ansp);
968 /*
969 * We need to make sure this request isn't being cancelled
970 * If it is, we don't want to accept the granted message.
971 */
972 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
973 msgreq = NULL;
974 }
975 if (!msgreq)
976 return (EPIPE);
977
978 msgreq->lmr_errno = ansp->la_errno;
979 if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
980 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
981 if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
982 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
983 else
984 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
985 msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
986 msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
987 msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
988 } else {
989 msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
990 }
991 }
992
993 msgreq->lmr_answered = 1;
994 (void)wakeup((void *)msgreq);
995
996 return (0);
997 }
998
999 /*
1000 * nfslockdfd --
1001 * NFS advisory byte-level locks: fifo file# from the lock daemon.
1002 */
1003 int
1004 nfslockdfd(proc_t p, int fd)
1005 {
1006 int error;
1007 vnode_t vp, oldvp;
1008
1009 error = proc_suser(p);
1010 if (error)
1011 return (error);
1012 if (fd < 0) {
1013 vp = NULL;
1014 } else {
1015 error = file_vnode(fd, &vp);
1016 if (error)
1017 return (error);
1018 error = vnode_getwithref(vp);
1019 if (error)
1020 return (error);
1021 error = vnode_ref(vp);
1022 if (error) {
1023 vnode_put(vp);
1024 return (error);
1025 }
1026 }
1027 oldvp = nfslockdvnode;
1028 nfslockdvnode = vp;
1029 if (oldvp) {
1030 vnode_rele(oldvp);
1031 }
1032 (void)wakeup((void *)&nfslockdvnode);
1033 if (vp) {
1034 vnode_put(vp);
1035 }
1036 return (0);
1037 }
1038
1039 /*
1040 * nfslockdwait --
1041 * lock daemon waiting for lock request
1042 */
1043 int
1044 nfslockdwait(proc_t p)
1045 {
1046 int error;
1047
1048 error = proc_suser(p);
1049 if (error)
1050 return (error);
1051 if (nfslockdwaiting || nfslockdvnode)
1052 return (EBUSY);
1053
1054 nfslockdstarttimeout = 0;
1055 nfslockdwaiting = 1;
1056 tsleep((void *)&nfslockdwaiting, PCATCH | PUSER, "lockd", 0);
1057 nfslockdwaiting = 0;
1058
1059 return (0);
1060 }