]> git.saurik.com Git - apple/xnu.git/blob - bsd/nfs/nfs_lock.c
xnu-517.7.7.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_lock.c
1 /*
2 * Copyright (c) 2002-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*-
23 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
24 *
25 * Redistribution and use in source and binary forms, with or without
26 * modification, are permitted provided that the following conditions
27 * are met:
28 * 1. Redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer.
30 * 2. Redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution.
33 * 3. Berkeley Software Design Inc's name may not be used to endorse or
34 * promote products derived from this software without specific prior
35 * written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
38 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
41 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47 * SUCH DAMAGE.
48 *
49 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
50 */
51
52 #include <sys/cdefs.h>
53 #include <sys/param.h>
54 #include <sys/systm.h>
55 #include <sys/fcntl.h>
56 #include <sys/kernel.h> /* for hz */
57 #include <sys/file.h>
58 #include <sys/lock.h>
59 #include <sys/malloc.h>
60 #include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
61 #include <sys/mbuf.h>
62 #include <sys/mount.h>
63 #include <sys/namei.h>
64 #include <sys/proc.h>
65 #include <sys/resourcevar.h>
66 #include <sys/socket.h>
67 #include <sys/socket.h>
68 #include <sys/unistd.h>
69 #include <sys/user.h>
70 #include <sys/vnode.h>
71
72 #include <kern/thread_act.h>
73
74 #include <machine/limits.h>
75
76 #include <net/if.h>
77
78 #include <nfs/rpcv2.h>
79 #include <nfs/nfsproto.h>
80 #include <nfs/nfs.h>
81 #include <nfs/nfsmount.h>
82 #include <nfs/nfsnode.h>
83 #include <nfs/nfs_lock.h>
84
85 #define OFF_MAX QUAD_MAX
86
87 uint64_t nfsadvlocks = 0;
88 struct timeval nfsadvlock_longest = {0, 0};
89 struct timeval nfsadvlocks_time = {0, 0};
90
91 /*
92 * globals for managing the lockd fifo
93 */
94 pid_t nfslockdpid = 0;
95 struct file *nfslockdfp = 0;
96 int nfslockdwaiting = 0;
97 int nfslockdfifowritten = 0;
98 int nfslockdfifolock = 0;
99 #define NFSLOCKDFIFOLOCK_LOCKED 1
100 #define NFSLOCKDFIFOLOCK_WANT 2
101
102 /*
103 * pending lock request messages are kept in this queue which is
104 * kept sorted by transaction ID (xid).
105 */
106 uint64_t nfs_lockxid = 0;
107 LOCKD_MSG_QUEUE nfs_pendlockq;
108
109 /*
110 * This structure is used to identify processes which have acquired NFS locks.
111 * Knowing which processes have ever acquired locks allows us to short-circuit
112 * unlock requests for processes that have never had an NFS file lock. Thus
113 * avoiding a costly and unnecessary lockd request.
114 */
115 struct nfs_lock_pid {
116 TAILQ_ENTRY(nfs_lock_pid) lp_lru; /* LRU list */
117 LIST_ENTRY(nfs_lock_pid) lp_hash; /* hash chain */
118 int lp_valid; /* valid entry? */
119 int lp_time; /* last time seen valid */
120 pid_t lp_pid; /* The process ID. */
121 struct timeval lp_pid_start; /* Start time of process id */
122 };
123
124 #define NFS_LOCK_PID_HASH_SIZE 64 // XXX tune me
125 #define NFS_LOCK_PID_HASH(pid) \
126 (&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash])
127 LIST_HEAD(, nfs_lock_pid) *nfs_lock_pid_hash_tbl;
128 TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_lru;
129 u_long nfs_lock_pid_hash;
130 int nfs_lock_pid_lock;
131
132
133 /*
134 * initialize global nfs lock state
135 */
136 void
137 nfs_lockinit(void)
138 {
139 TAILQ_INIT(&nfs_pendlockq);
140 nfs_lock_pid_lock = 0;
141 nfs_lock_pid_hash_tbl = hashinit(NFS_LOCK_PID_HASH_SIZE,
142 M_TEMP, &nfs_lock_pid_hash);
143 TAILQ_INIT(&nfs_lock_pid_lru);
144 }
145
146 /*
147 * insert a lock request message into the pending queue
148 */
149 static inline void
150 nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
151 {
152 LOCKD_MSG_REQUEST *mr;
153
154 mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
155 if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
156 /* fast path: empty queue or new largest xid */
157 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
158 return;
159 }
160 /* slow path: need to walk list to find insertion point */
161 while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
162 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
163 }
164 if (mr) {
165 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
166 } else {
167 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
168 }
169 }
170
171 /*
172 * remove a lock request message from the pending queue
173 */
174 static inline void
175 nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
176 {
177 TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
178 }
179
180 /*
181 * find a pending lock request message by xid
182 *
183 * We search from the head of the list assuming that the message we're
184 * looking for is for an older request (because we have an answer to it).
185 * This assumes that lock request will be answered primarily in FIFO order.
186 * However, this may not be the case if there are blocked requests. We may
187 * want to move blocked requests to a separate queue (but that'll complicate
188 * duplicate xid checking).
189 */
190 static inline LOCKD_MSG_REQUEST *
191 nfs_lockdmsg_find_by_xid(uint64_t lockxid)
192 {
193 LOCKD_MSG_REQUEST *mr;
194
195 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
196 if (mr->lmr_msg.lm_xid == lockxid)
197 return mr;
198 if (mr->lmr_msg.lm_xid > lockxid)
199 return NULL;
200 }
201 return mr;
202 }
203
204 /*
205 * Because we can't depend on nlm_granted messages containing the same
206 * cookie we sent with the original lock request, we need code test if
207 * an nlm_granted answer matches the lock request. We also need code
208 * that can find a lockd message based solely on the nlm_granted answer.
209 */
210
211 /*
212 * compare lockd message to answer
213 *
214 * returns 0 on equality and 1 if different
215 */
216 static inline int
217 nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
218 {
219 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
220 return 1;
221 if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
222 return 1;
223 if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
224 return 1;
225 if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
226 return 1;
227 if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
228 return 1;
229 if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
230 return 1;
231 return 0;
232 }
233
234 /*
235 * find a pending lock request message based on the lock info provided
236 * in the lockd_ans/nlm_granted data. We need this because we can't
237 * depend on nlm_granted messages containing the same cookie we sent
238 * with the original lock request.
239 *
240 * We search from the head of the list assuming that the message we're
241 * looking for is for an older request (because we have an answer to it).
242 * This assumes that lock request will be answered primarily in FIFO order.
243 * However, this may not be the case if there are blocked requests. We may
244 * want to move blocked requests to a separate queue (but that'll complicate
245 * duplicate xid checking).
246 */
247 static inline LOCKD_MSG_REQUEST *
248 nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
249 {
250 LOCKD_MSG_REQUEST *mr;
251
252 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
253 return NULL;
254 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
255 if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
256 break;
257 }
258 return mr;
259 }
260
261 /*
262 * return the next unique lock request transaction ID
263 */
264 static inline uint64_t
265 nfs_lockxid_get(void)
266 {
267 LOCKD_MSG_REQUEST *mr;
268
269 /* derive initial lock xid from system time */
270 if (!nfs_lockxid) {
271 /*
272 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
273 * due to a broken clock) because we immediately increment it
274 * and we guarantee to never use xid 0. So, nfs_lockxid should only
275 * ever be 0 the first time this function is called.
276 */
277 struct timeval tv;
278 microtime(&tv);
279 nfs_lockxid = (uint64_t)tv.tv_sec << 12;
280 }
281
282 /* make sure we get a unique xid */
283 do {
284 /* Skip zero xid if it should ever happen. */
285 if (++nfs_lockxid == 0)
286 nfs_lockxid++;
287 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
288 (mr->lmr_msg.lm_xid < nfs_lockxid)) {
289 /* fast path: empty queue or new largest xid */
290 break;
291 }
292 /* check if xid is already in use */
293 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
294
295 return nfs_lockxid;
296 }
297
298
299 /*
300 * Check the nfs_lock_pid hash table for an entry and, if requested,
301 * add the entry if it is not found.
302 *
303 * (Also, if adding, try to clean up some stale entries.)
304 */
305 static int
306 nfs_lock_pid_check(struct proc *p, int addflag, struct vnode *vp)
307 {
308 struct nfs_lock_pid *lp, *lplru, *lplru_next;
309 struct proc *plru;
310 int error = 0;
311 struct timeval now;
312
313 /* lock hash */
314 loop:
315 if (nfs_lock_pid_lock) {
316 while (nfs_lock_pid_lock) {
317 nfs_lock_pid_lock = -1;
318 tsleep(&nfs_lock_pid_lock, PCATCH, "nfslockpid", 0);
319 if ((error = nfs_sigintr(VFSTONFS(vp->v_mount), NULL, p)))
320 return (error);
321 }
322 goto loop;
323 }
324 nfs_lock_pid_lock = 1;
325
326 /* Search hash chain */
327 error = ENOENT;
328 lp = NFS_LOCK_PID_HASH(p->p_pid)->lh_first;
329 for (; lp != NULL; lp = lp->lp_hash.le_next)
330 if (lp->lp_pid == p->p_pid) {
331 /* found pid... */
332 if (timevalcmp(&lp->lp_pid_start, &p->p_stats->p_start, ==)) {
333 /* ...and it's valid */
334 /* move to tail of LRU */
335 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
336 microuptime(&now);
337 lp->lp_time = now.tv_sec;
338 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
339 error = 0;
340 break;
341 }
342 /* ...but it's no longer valid */
343 /* remove from hash, invalidate, and move to lru head */
344 LIST_REMOVE(lp, lp_hash);
345 lp->lp_valid = 0;
346 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
347 TAILQ_INSERT_HEAD(&nfs_lock_pid_lru, lp, lp_lru);
348 lp = NULL;
349 break;
350 }
351
352 /* if we didn't find it (valid) and we've been asked to add it */
353 if ((error == ENOENT) && addflag) {
354 /* scan lru list for invalid, stale entries to reuse/free */
355 int lrucnt = 0;
356 microuptime(&now);
357 for (lplru = TAILQ_FIRST(&nfs_lock_pid_lru); lplru; lplru = lplru_next) {
358 lplru_next = TAILQ_NEXT(lplru, lp_lru);
359 if (lplru->lp_valid && (lplru->lp_time >= (now.tv_sec - 2))) {
360 /*
361 * If the oldest LRU entry is relatively new, then don't
362 * bother scanning any further.
363 */
364 break;
365 }
366 /* remove entry from LRU, and check if it's still in use */
367 TAILQ_REMOVE(&nfs_lock_pid_lru, lplru, lp_lru);
368 if (!lplru->lp_valid || !(plru = pfind(lplru->lp_pid)) ||
369 timevalcmp(&lplru->lp_pid_start, &plru->p_stats->p_start, !=)) {
370 /* no longer in use */
371 LIST_REMOVE(lplru, lp_hash);
372 if (!lp) {
373 /* we'll reuse this one */
374 lp = lplru;
375 } else {
376 /* we can free this one */
377 FREE(lplru, M_TEMP);
378 }
379 } else {
380 /* still in use */
381 lplru->lp_time = now.tv_sec;
382 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lplru, lp_lru);
383 }
384 /* don't check too many entries at once */
385 if (++lrucnt > 8)
386 break;
387 }
388 if (!lp) {
389 /* we need to allocate a new one */
390 MALLOC(lp, struct nfs_lock_pid *, sizeof(struct nfs_lock_pid),
391 M_TEMP, M_WAITOK | M_ZERO);
392 }
393 /* (re)initialize nfs_lock_pid info */
394 lp->lp_pid = p->p_pid;
395 lp->lp_pid_start = p->p_stats->p_start;
396 /* insert pid in hash */
397 LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp->lp_pid), lp, lp_hash);
398 lp->lp_valid = 1;
399 lp->lp_time = now.tv_sec;
400 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
401 error = 0;
402 }
403
404 /* unlock hash */
405 if (nfs_lock_pid_lock < 0) {
406 nfs_lock_pid_lock = 0;
407 wakeup(&nfs_lock_pid_lock);
408 } else
409 nfs_lock_pid_lock = 0;
410
411 return (error);
412 }
413
414
415 /*
416 * nfs_advlock --
417 * NFS advisory byte-level locks.
418 */
419 int
420 nfs_dolock(struct vop_advlock_args *ap)
421 /* struct vop_advlock_args {
422 struct vnodeop_desc *a_desc;
423 struct vnode *a_vp;
424 caddr_t a_id;
425 int a_op;
426 struct flock *a_fl;
427 int a_flags;
428 }; */
429 {
430 LOCKD_MSG_REQUEST msgreq;
431 LOCKD_MSG *msg;
432 struct vnode *vp, *wvp;
433 struct nfsnode *np;
434 int error, error1;
435 struct flock *fl;
436 int fmode, ioflg;
437 struct proc *p;
438 struct nfsmount *nmp;
439 struct vattr vattr;
440 off_t start, end;
441 struct timeval now;
442 int timeo, endtime, lastmsg, wentdown = 0;
443 int lockpidcheck;
444
445 p = current_proc();
446
447 vp = ap->a_vp;
448 fl = ap->a_fl;
449 np = VTONFS(vp);
450
451 nmp = VFSTONFS(vp->v_mount);
452 if (!nmp)
453 return (ENXIO);
454 if (nmp->nm_flag & NFSMNT_NOLOCKS)
455 return (EOPNOTSUPP);
456
457 /*
458 * The NLM protocol doesn't allow the server to return an error
459 * on ranges, so we do it. Pre LFS (Large File Summit)
460 * standards required EINVAL for the range errors. More recent
461 * standards use EOVERFLOW, but their EINVAL wording still
462 * encompasses these errors.
463 * Any code sensitive to this is either:
464 * 1) written pre-LFS and so can handle only EINVAL, or
465 * 2) written post-LFS and thus ought to be tolerant of pre-LFS
466 * implementations.
467 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
468 */
469 if (fl->l_whence != SEEK_END) {
470 if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
471 fl->l_start < 0 ||
472 (fl->l_len > 0 && fl->l_len - 1 > OFF_MAX - fl->l_start) ||
473 (fl->l_len < 0 && fl->l_start + fl->l_len < 0))
474 return (EINVAL);
475 }
476 /*
477 * If daemon is running take a ref on its fifo
478 */
479 if (!nfslockdfp || !(wvp = (struct vnode *)nfslockdfp->f_data)) {
480 if (!nfslockdwaiting)
481 return (EOPNOTSUPP);
482 /*
483 * Don't wake lock daemon if it hasn't been started yet and
484 * this is an unlock request (since we couldn't possibly
485 * actually have a lock on the file). This could be an
486 * uninformed unlock request due to closef()'s behavior of doing
487 * unlocks on all files if a process has had a lock on ANY file.
488 */
489 if (!nfslockdfp && (fl->l_type == F_UNLCK))
490 return (EINVAL);
491 /* wake up lock daemon */
492 (void)wakeup((void *)&nfslockdwaiting);
493 /* wait on nfslockdfp for a while to allow daemon to start */
494 tsleep((void *)&nfslockdfp, PCATCH | PUSER, "lockd", 60*hz);
495 /* check for nfslockdfp and f_data */
496 if (!nfslockdfp || !(wvp = (struct vnode *)nfslockdfp->f_data))
497 return (EOPNOTSUPP);
498 }
499 VREF(wvp);
500
501 /*
502 * Need to check if this process has successfully acquired an NFS lock before.
503 * If not, and this is an unlock request we can simply return success here.
504 */
505 lockpidcheck = nfs_lock_pid_check(p, 0, vp);
506 if (lockpidcheck) {
507 if (lockpidcheck != ENOENT)
508 return (lockpidcheck);
509 if (ap->a_op == F_UNLCK) {
510 vrele(wvp);
511 return (0);
512 }
513 }
514
515 /*
516 * The NFS Lock Manager protocol doesn't directly handle
517 * negative lengths or SEEK_END, so we need to normalize
518 * things here where we have all the info.
519 * (Note: SEEK_CUR is already adjusted for at this point)
520 */
521 /* Convert the flock structure into a start and end. */
522 switch (fl->l_whence) {
523 case SEEK_SET:
524 case SEEK_CUR:
525 /*
526 * Caller is responsible for adding any necessary offset
527 * to fl->l_start when SEEK_CUR is used.
528 */
529 start = fl->l_start;
530 break;
531 case SEEK_END:
532 /* need to flush, and refetch attributes to make */
533 /* sure we have the correct end of file offset */
534 if (np->n_flag & NMODIFIED) {
535 np->n_xid = 0;
536 error = nfs_vinvalbuf(vp, V_SAVE, p->p_ucred, p, 1);
537 if (error) {
538 vrele(wvp);
539 return (error);
540 }
541 }
542 np->n_xid = 0;
543 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
544 if (error) {
545 vrele(wvp);
546 return (error);
547 }
548 start = np->n_size + fl->l_start;
549 break;
550 default:
551 vrele(wvp);
552 return (EINVAL);
553 }
554 if (fl->l_len == 0)
555 end = -1;
556 else if (fl->l_len > 0)
557 end = start + fl->l_len - 1;
558 else { /* l_len is negative */
559 end = start - 1;
560 start += fl->l_len;
561 }
562 if (start < 0) {
563 vrele(wvp);
564 return (EINVAL);
565 }
566 if (!NFS_ISV3(vp) &&
567 ((start >= 0x80000000) || (end >= 0x80000000))) {
568 vrele(wvp);
569 return (EINVAL);
570 }
571
572 /*
573 * Fill in the information structure.
574 */
575 msgreq.lmr_answered = 0;
576 msgreq.lmr_errno = 0;
577 msgreq.lmr_saved_errno = 0;
578 msg = &msgreq.lmr_msg;
579 msg->lm_version = LOCKD_MSG_VERSION;
580 msg->lm_flags = 0;
581
582 msg->lm_fl = *fl;
583 msg->lm_fl.l_start = start;
584 if (end != -1)
585 msg->lm_fl.l_len = end - start + 1;
586 msg->lm_fl.l_pid = p->p_pid;
587
588 if (ap->a_flags & F_WAIT)
589 msg->lm_flags |= LOCKD_MSG_BLOCK;
590 if (ap->a_op == F_GETLK)
591 msg->lm_flags |= LOCKD_MSG_TEST;
592
593 nmp = VFSTONFS(vp->v_mount);
594 if (!nmp) {
595 vrele(wvp);
596 return (ENXIO);
597 }
598
599 bcopy(mtod(nmp->nm_nam, struct sockaddr *), &msg->lm_addr,
600 min(sizeof msg->lm_addr,
601 mtod(nmp->nm_nam, struct sockaddr *)->sa_len));
602 msg->lm_fh_len = NFS_ISV3(vp) ? VTONFS(vp)->n_fhsize : NFSX_V2FH;
603 bcopy(VTONFS(vp)->n_fhp, msg->lm_fh, msg->lm_fh_len);
604 if (NFS_ISV3(vp))
605 msg->lm_flags |= LOCKD_MSG_NFSV3;
606 cru2x(p->p_ucred, &msg->lm_cred);
607
608 microuptime(&now);
609 lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
610
611 fmode = FFLAGS(O_WRONLY);
612 if ((error = VOP_OPEN(wvp, fmode, kernproc->p_ucred, p))) {
613 vrele(wvp);
614 return (error);
615 }
616 ++wvp->v_writecount;
617
618 /* allocate unique xid */
619 msg->lm_xid = nfs_lockxid_get();
620 nfs_lockdmsg_enqueue(&msgreq);
621
622 timeo = 2*hz;
623 #define IO_NOMACCHECK 0;
624 ioflg = IO_UNIT | IO_NOMACCHECK;
625 for (;;) {
626 VOP_LEASE(wvp, p, kernproc->p_ucred, LEASE_WRITE);
627
628 error = 0;
629 while (nfslockdfifolock & NFSLOCKDFIFOLOCK_LOCKED) {
630 nfslockdfifolock |= NFSLOCKDFIFOLOCK_WANT;
631 error = tsleep((void *)&nfslockdfifolock,
632 PCATCH | PUSER, "lockdfifo", 20*hz);
633 if (error)
634 break;
635 }
636 if (error)
637 break;
638 nfslockdfifolock |= NFSLOCKDFIFOLOCK_LOCKED;
639
640 error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)msg, sizeof(*msg), 0,
641 UIO_SYSSPACE, ioflg, kernproc->p_ucred, NULL, p);
642
643 nfslockdfifowritten = 1;
644
645 nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_LOCKED;
646 if (nfslockdfifolock & NFSLOCKDFIFOLOCK_WANT) {
647 nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_WANT;
648 wakeup((void *)&nfslockdfifolock);
649 }
650 /* wake up lock daemon */
651 if (nfslockdwaiting)
652 (void)wakeup((void *)&nfslockdwaiting);
653
654 if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) {
655 break;
656 }
657
658 /*
659 * Always wait for an answer. Not waiting for unlocks could
660 * cause a lock to be left if the unlock request gets dropped.
661 */
662
663 /*
664 * Retry if it takes too long to get a response.
665 *
666 * The timeout numbers were picked out of thin air... they start
667 * at 2 and double each timeout with a max of 60 seconds.
668 *
669 * In order to maintain responsiveness, we pass a small timeout
670 * to tsleep and calculate the timeouts ourselves. This allows
671 * us to pick up on mount changes quicker.
672 */
673 wait_for_granted:
674 error = EWOULDBLOCK;
675 microuptime(&now);
676 if ((timeo/hz) > 0)
677 endtime = now.tv_sec + timeo/hz;
678 else
679 endtime = now.tv_sec + 1;
680 while (now.tv_sec < endtime) {
681 error = tsleep((void *)&msgreq, PCATCH | PUSER, "lockd", 2*hz);
682 if (msgreq.lmr_answered) {
683 /*
684 * Note: it's possible to have a lock granted at
685 * essentially the same time that we get interrupted.
686 * Since the lock may be granted, we can't return an
687 * error from this request or we might not unlock the
688 * lock that's been granted.
689 */
690 error = 0;
691 break;
692 }
693 if (error != EWOULDBLOCK)
694 break;
695 /* check that we still have our mount... */
696 /* ...and that we still support locks */
697 nmp = VFSTONFS(vp->v_mount);
698 if (!nmp || (nmp->nm_flag & NFSMNT_NOLOCKS))
699 break;
700 /*
701 * If the mount is hung and we've requested not to hang
702 * on remote filesystems, then bail now.
703 */
704 if ((p != NULL) && ((p->p_flag & P_NOREMOTEHANG) != 0) &&
705 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) {
706 if (fl->l_type == F_UNLCK)
707 printf("nfs_dolock: aborting unlock request "
708 "due to timeout (noremotehang)\n");
709 error = EIO;
710 break;
711 }
712 microuptime(&now);
713 }
714 if (error) {
715 /* check that we still have our mount... */
716 nmp = VFSTONFS(vp->v_mount);
717 if (!nmp) {
718 if (error == EWOULDBLOCK)
719 error = ENXIO;
720 break;
721 }
722 /* ...and that we still support locks */
723 if (nmp->nm_flag & NFSMNT_NOLOCKS) {
724 if (error == EWOULDBLOCK)
725 error = EOPNOTSUPP;
726 break;
727 }
728 if ((error == EOPNOTSUPP) &&
729 (nmp->nm_state & NFSSTA_LOCKSWORK)) {
730 /*
731 * We have evidence that locks work, yet lockd
732 * returned EOPNOTSUPP. This is probably because
733 * it was unable to contact the server's lockd to
734 * send it the request.
735 *
736 * Because we know locks work, we'll consider
737 * this failure to be a timeout.
738 */
739 error = EWOULDBLOCK;
740 }
741 if (error != EWOULDBLOCK) {
742 /*
743 * We're going to bail on this request.
744 * If we were a blocked lock request, send a cancel.
745 */
746 if ((msgreq.lmr_errno == EINPROGRESS) &&
747 !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
748 /* set this request up as a cancel */
749 msg->lm_flags |= LOCKD_MSG_CANCEL;
750 nfs_lockdmsg_dequeue(&msgreq);
751 msg->lm_xid = nfs_lockxid_get();
752 nfs_lockdmsg_enqueue(&msgreq);
753 msgreq.lmr_saved_errno = error;
754 msgreq.lmr_errno = 0;
755 msgreq.lmr_answered = 0;
756 /* reset timeout */
757 timeo = 2*hz;
758 /* send cancel request */
759 continue;
760 }
761 break;
762 }
763
764 /*
765 * If the mount is hung and we've requested not to hang
766 * on remote filesystems, then bail now.
767 */
768 if ((p != NULL) && ((p->p_flag & P_NOREMOTEHANG) != 0) &&
769 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) {
770 if (fl->l_type == F_UNLCK)
771 printf("nfs_dolock: aborting unlock request "
772 "due to timeout (noremotehang)\n");
773 error = EIO;
774 break;
775 }
776 /* warn if we're not getting any response */
777 microuptime(&now);
778 if ((msgreq.lmr_errno != EINPROGRESS) &&
779 (nmp->nm_tprintf_initial_delay != 0) &&
780 ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
781 lastmsg = now.tv_sec;
782 nfs_down(NULL, nmp, p, "lockd not responding",
783 0, NFSSTA_LOCKTIMEO);
784 wentdown = 1;
785 }
786 if (msgreq.lmr_errno == EINPROGRESS) {
787 /*
788 * We've got a blocked lock request that we are
789 * going to retry. First, we'll want to try to
790 * send a cancel for the previous request.
791 *
792 * Clear errno so if we don't get a response
793 * to the resend we'll call nfs_down().
794 * Also reset timeout because we'll expect a
795 * quick response to the cancel/resend (even if
796 * it is NLM_BLOCKED).
797 */
798 msg->lm_flags |= LOCKD_MSG_CANCEL;
799 nfs_lockdmsg_dequeue(&msgreq);
800 msg->lm_xid = nfs_lockxid_get();
801 nfs_lockdmsg_enqueue(&msgreq);
802 msgreq.lmr_saved_errno = msgreq.lmr_errno;
803 msgreq.lmr_errno = 0;
804 msgreq.lmr_answered = 0;
805 timeo = 2*hz;
806 /* send cancel then resend request */
807 continue;
808 }
809 /*
810 * We timed out, so we will rewrite the request
811 * to the fifo, but only if it isn't already full.
812 */
813 ioflg |= IO_NDELAY;
814 timeo *= 2;
815 if (timeo > 60*hz)
816 timeo = 60*hz;
817 /* resend request */
818 continue;
819 }
820
821 if (wentdown) {
822 /* we got a reponse, so the server's lockd is OK */
823 nfs_up(NULL, VFSTONFS(vp->v_mount), p, "lockd alive again",
824 NFSSTA_LOCKTIMEO);
825 wentdown = 0;
826 }
827
828 if (msgreq.lmr_errno == EINPROGRESS) {
829 /* got NLM_BLOCKED response */
830 /* need to wait for NLM_GRANTED */
831 timeo = 60*hz;
832 msgreq.lmr_answered = 0;
833 goto wait_for_granted;
834 }
835
836 if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
837 (msgreq.lmr_saved_errno == EINPROGRESS)) {
838 /*
839 * We just got a successful reply to the
840 * cancel of the previous blocked lock request.
841 * Now, go ahead and resend the request.
842 */
843 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
844 nfs_lockdmsg_dequeue(&msgreq);
845 msg->lm_xid = nfs_lockxid_get();
846 nfs_lockdmsg_enqueue(&msgreq);
847 msgreq.lmr_saved_errno = 0;
848 msgreq.lmr_errno = 0;
849 msgreq.lmr_answered = 0;
850 timeo = 2*hz;
851 /* resend request */
852 continue;
853 }
854
855 if ((msg->lm_flags & LOCKD_MSG_TEST) && msgreq.lmr_errno == 0) {
856 if (msg->lm_fl.l_type != F_UNLCK) {
857 fl->l_type = msg->lm_fl.l_type;
858 fl->l_pid = msg->lm_fl.l_pid;
859 fl->l_start = msg->lm_fl.l_start;
860 fl->l_len = msg->lm_fl.l_len;
861 fl->l_whence = SEEK_SET;
862 } else {
863 fl->l_type = F_UNLCK;
864 }
865 }
866
867 /*
868 * If the blocked lock request was cancelled.
869 * Restore the error condition from when we
870 * originally bailed on the request.
871 */
872 if (msg->lm_flags & LOCKD_MSG_CANCEL) {
873 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
874 error = msgreq.lmr_saved_errno;
875 } else
876 error = msgreq.lmr_errno;
877
878 if (!error) {
879 /* record that NFS file locking has worked on this mount */
880 nmp = VFSTONFS(vp->v_mount);
881 if (nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK))
882 nmp->nm_state |= NFSSTA_LOCKSWORK;
883 /*
884 * If we successfully acquired a lock, make sure this pid
885 * is in the nfs_lock_pid hash table so we know we can't
886 * short-circuit unlock requests.
887 */
888 if ((lockpidcheck == ENOENT) &&
889 ((ap->a_op == F_SETLK) || (ap->a_op == F_SETLKW)))
890 nfs_lock_pid_check(p, 1, vp);
891
892 }
893 break;
894 }
895
896 nfs_lockdmsg_dequeue(&msgreq);
897
898 error1 = vn_close(wvp, FWRITE, kernproc->p_ucred, p);
899 /* prefer any previous 'error' to our vn_close 'error1'. */
900 return (error != 0 ? error : error1);
901 }
902
903 /*
904 * nfslockdans --
905 * NFS advisory byte-level locks answer from the lock daemon.
906 */
907 int
908 nfslockdans(struct proc *p, struct lockd_ans *ansp)
909 {
910 LOCKD_MSG_REQUEST *msgreq;
911 int error;
912
913 /*
914 * Let root, or someone who once was root (lockd generally
915 * switches to the daemon uid once it is done setting up) make
916 * this call.
917 *
918 * XXX This authorization check is probably not right.
919 */
920 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0 &&
921 p->p_cred->p_svuid != 0)
922 return (error);
923
924 /* the version should match, or we're out of sync */
925 if (ansp->la_version != LOCKD_ANS_VERSION)
926 return (EINVAL);
927
928 /* try to find the lockd message by transaction id (cookie) */
929 msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
930 if (ansp->la_flags & LOCKD_ANS_GRANTED) {
931 /*
932 * We can't depend on the granted message having our cookie,
933 * so we check the answer against the lockd message found.
934 * If no message was found or it doesn't match the answer,
935 * we look for the lockd message by the answer's lock info.
936 */
937 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp))
938 msgreq = nfs_lockdmsg_find_by_answer(ansp);
939 /*
940 * We need to make sure this request isn't being cancelled
941 * If it is, we don't want to accept the granted message.
942 */
943 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
944 msgreq = NULL;
945 }
946 if (!msgreq)
947 return (EPIPE);
948
949 msgreq->lmr_errno = ansp->la_errno;
950 if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
951 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
952 if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
953 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
954 else
955 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
956 msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
957 msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
958 msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
959 } else {
960 msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
961 }
962 }
963
964 msgreq->lmr_answered = 1;
965 (void)wakeup((void *)msgreq);
966
967 return (0);
968 }
969
970 /*
971 * nfslockdfd --
972 * NFS advisory byte-level locks: fifo file# from the lock daemon.
973 */
974 int
975 nfslockdfd(struct proc *p, int fd)
976 {
977 int error;
978 struct file *fp, *ofp;
979
980 error = suser(p->p_ucred, &p->p_acflag);
981 if (error)
982 return (error);
983 if (fd < 0) {
984 fp = 0;
985 } else {
986 error = getvnode(p, fd, &fp);
987 if (error)
988 return (error);
989 (void)fref(fp);
990 }
991 ofp = nfslockdfp;
992 nfslockdfp = fp;
993 if (ofp)
994 (void)frele(ofp);
995 nfslockdpid = nfslockdfp ? p->p_pid : 0;
996 (void)wakeup((void *)&nfslockdfp);
997 return (0);
998 }
999
1000 /*
1001 * nfslockdwait --
1002 * lock daemon waiting for lock request
1003 */
1004 int
1005 nfslockdwait(struct proc *p)
1006 {
1007 int error;
1008 struct file *fp, *ofp;
1009
1010 if (p->p_pid != nfslockdpid) {
1011 error = suser(p->p_ucred, &p->p_acflag);
1012 if (error)
1013 return (error);
1014 }
1015 if (nfslockdwaiting)
1016 return (EBUSY);
1017 if (nfslockdfifowritten) {
1018 nfslockdfifowritten = 0;
1019 return (0);
1020 }
1021
1022 nfslockdwaiting = 1;
1023 tsleep((void *)&nfslockdwaiting, PCATCH | PUSER, "lockd", 0);
1024 nfslockdwaiting = 0;
1025
1026 return (0);
1027 }