]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_lock.c
xnu-792.6.56.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_lock.c
CommitLineData
55e303ae 1/*
91447636 2 * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved.
55e303ae
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
ff6e181a
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
55e303ae 12 *
ff6e181a
A
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
55e303ae
A
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
ff6e181a
A
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
55e303ae
A
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23/*-
24 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
25 *
26 * Redistribution and use in source and binary forms, with or without
27 * modification, are permitted provided that the following conditions
28 * are met:
29 * 1. Redistributions of source code must retain the above copyright
30 * notice, this list of conditions and the following disclaimer.
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 * 3. Berkeley Software Design Inc's name may not be used to endorse or
35 * promote products derived from this software without specific prior
36 * written permission.
37 *
38 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
41 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 *
50 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
51 */
52
53#include <sys/cdefs.h>
54#include <sys/param.h>
55#include <sys/systm.h>
56#include <sys/fcntl.h>
57#include <sys/kernel.h> /* for hz */
91447636 58#include <sys/file_internal.h>
55e303ae
A
59#include <sys/malloc.h>
60#include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
91447636
A
61#include <sys/kpi_mbuf.h>
62#include <sys/mount_internal.h>
63#include <sys/proc_internal.h> /* for p_start */
64#include <sys/kauth.h>
55e303ae
A
65#include <sys/resourcevar.h>
66#include <sys/socket.h>
55e303ae
A
67#include <sys/unistd.h>
68#include <sys/user.h>
91447636 69#include <sys/vnode_internal.h>
55e303ae 70
91447636 71#include <kern/thread.h>
55e303ae
A
72
73#include <machine/limits.h>
74
75#include <net/if.h>
76
77#include <nfs/rpcv2.h>
78#include <nfs/nfsproto.h>
79#include <nfs/nfs.h>
80#include <nfs/nfsmount.h>
81#include <nfs/nfsnode.h>
82#include <nfs/nfs_lock.h>
55e303ae
A
83
84#define OFF_MAX QUAD_MAX
85
e5568f75
A
86/*
87 * globals for managing the lockd fifo
88 */
91447636 89vnode_t nfslockdvnode = 0;
55e303ae 90int nfslockdwaiting = 0;
91447636 91time_t nfslockdstarttimeout = 0;
55e303ae
A
92int nfslockdfifolock = 0;
93#define NFSLOCKDFIFOLOCK_LOCKED 1
94#define NFSLOCKDFIFOLOCK_WANT 2
95
96/*
e5568f75
A
97 * pending lock request messages are kept in this queue which is
98 * kept sorted by transaction ID (xid).
99 */
100uint64_t nfs_lockxid = 0;
101LOCKD_MSG_QUEUE nfs_pendlockq;
102
103/*
104 * This structure is used to identify processes which have acquired NFS locks.
105 * Knowing which processes have ever acquired locks allows us to short-circuit
106 * unlock requests for processes that have never had an NFS file lock. Thus
107 * avoiding a costly and unnecessary lockd request.
108 */
109struct nfs_lock_pid {
110 TAILQ_ENTRY(nfs_lock_pid) lp_lru; /* LRU list */
111 LIST_ENTRY(nfs_lock_pid) lp_hash; /* hash chain */
112 int lp_valid; /* valid entry? */
113 int lp_time; /* last time seen valid */
114 pid_t lp_pid; /* The process ID. */
115 struct timeval lp_pid_start; /* Start time of process id */
116};
117
118#define NFS_LOCK_PID_HASH_SIZE 64 // XXX tune me
119#define NFS_LOCK_PID_HASH(pid) \
120 (&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash])
121LIST_HEAD(, nfs_lock_pid) *nfs_lock_pid_hash_tbl;
122TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_lru;
123u_long nfs_lock_pid_hash;
124int nfs_lock_pid_lock;
125
126
127/*
128 * initialize global nfs lock state
129 */
130void
131nfs_lockinit(void)
132{
133 TAILQ_INIT(&nfs_pendlockq);
134 nfs_lock_pid_lock = 0;
135 nfs_lock_pid_hash_tbl = hashinit(NFS_LOCK_PID_HASH_SIZE,
136 M_TEMP, &nfs_lock_pid_hash);
137 TAILQ_INIT(&nfs_lock_pid_lru);
138}
139
140/*
141 * insert a lock request message into the pending queue
142 */
143static inline void
144nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
145{
146 LOCKD_MSG_REQUEST *mr;
147
148 mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
149 if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
150 /* fast path: empty queue or new largest xid */
151 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
152 return;
153 }
154 /* slow path: need to walk list to find insertion point */
155 while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
156 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
157 }
158 if (mr) {
159 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
160 } else {
161 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
162 }
163}
164
165/*
166 * remove a lock request message from the pending queue
167 */
168static inline void
169nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
170{
171 TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
172}
173
174/*
175 * find a pending lock request message by xid
176 *
177 * We search from the head of the list assuming that the message we're
178 * looking for is for an older request (because we have an answer to it).
179 * This assumes that lock request will be answered primarily in FIFO order.
180 * However, this may not be the case if there are blocked requests. We may
181 * want to move blocked requests to a separate queue (but that'll complicate
182 * duplicate xid checking).
183 */
184static inline LOCKD_MSG_REQUEST *
185nfs_lockdmsg_find_by_xid(uint64_t lockxid)
186{
187 LOCKD_MSG_REQUEST *mr;
188
189 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
190 if (mr->lmr_msg.lm_xid == lockxid)
191 return mr;
192 if (mr->lmr_msg.lm_xid > lockxid)
193 return NULL;
194 }
195 return mr;
196}
197
198/*
199 * Because we can't depend on nlm_granted messages containing the same
200 * cookie we sent with the original lock request, we need code test if
201 * an nlm_granted answer matches the lock request. We also need code
202 * that can find a lockd message based solely on the nlm_granted answer.
203 */
204
205/*
206 * compare lockd message to answer
207 *
208 * returns 0 on equality and 1 if different
209 */
210static inline int
211nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
212{
213 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
214 return 1;
215 if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
216 return 1;
217 if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
218 return 1;
219 if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
220 return 1;
221 if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
222 return 1;
223 if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
224 return 1;
225 return 0;
226}
227
228/*
229 * find a pending lock request message based on the lock info provided
230 * in the lockd_ans/nlm_granted data. We need this because we can't
231 * depend on nlm_granted messages containing the same cookie we sent
232 * with the original lock request.
233 *
234 * We search from the head of the list assuming that the message we're
235 * looking for is for an older request (because we have an answer to it).
236 * This assumes that lock request will be answered primarily in FIFO order.
237 * However, this may not be the case if there are blocked requests. We may
238 * want to move blocked requests to a separate queue (but that'll complicate
239 * duplicate xid checking).
55e303ae 240 */
e5568f75
A
241static inline LOCKD_MSG_REQUEST *
242nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
243{
244 LOCKD_MSG_REQUEST *mr;
245
246 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
247 return NULL;
248 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
249 if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
250 break;
251 }
252 return mr;
253}
254
255/*
256 * return the next unique lock request transaction ID
257 */
258static inline uint64_t
259nfs_lockxid_get(void)
260{
261 LOCKD_MSG_REQUEST *mr;
262
263 /* derive initial lock xid from system time */
264 if (!nfs_lockxid) {
265 /*
266 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
267 * due to a broken clock) because we immediately increment it
268 * and we guarantee to never use xid 0. So, nfs_lockxid should only
269 * ever be 0 the first time this function is called.
270 */
271 struct timeval tv;
272 microtime(&tv);
273 nfs_lockxid = (uint64_t)tv.tv_sec << 12;
274 }
275
276 /* make sure we get a unique xid */
277 do {
278 /* Skip zero xid if it should ever happen. */
279 if (++nfs_lockxid == 0)
280 nfs_lockxid++;
281 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
282 (mr->lmr_msg.lm_xid < nfs_lockxid)) {
283 /* fast path: empty queue or new largest xid */
284 break;
285 }
286 /* check if xid is already in use */
287 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
288
289 return nfs_lockxid;
290}
291
292
293/*
294 * Check the nfs_lock_pid hash table for an entry and, if requested,
295 * add the entry if it is not found.
296 *
297 * (Also, if adding, try to clean up some stale entries.)
298 */
299static int
91447636 300nfs_lock_pid_check(proc_t p, int addflag, vnode_t vp)
e5568f75
A
301{
302 struct nfs_lock_pid *lp, *lplru, *lplru_next;
91447636 303 proc_t plru;
e5568f75
A
304 int error = 0;
305 struct timeval now;
306
307 /* lock hash */
308loop:
309 if (nfs_lock_pid_lock) {
91447636 310 struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
311 while (nfs_lock_pid_lock) {
312 nfs_lock_pid_lock = -1;
313 tsleep(&nfs_lock_pid_lock, PCATCH, "nfslockpid", 0);
91447636 314 if ((error = nfs_sigintr(nmp, NULL, p)))
e5568f75
A
315 return (error);
316 }
317 goto loop;
318 }
319 nfs_lock_pid_lock = 1;
320
321 /* Search hash chain */
322 error = ENOENT;
91447636 323 lp = NFS_LOCK_PID_HASH(proc_pid(p))->lh_first;
e5568f75 324 for (; lp != NULL; lp = lp->lp_hash.le_next)
91447636 325 if (lp->lp_pid == proc_pid(p)) {
e5568f75
A
326 /* found pid... */
327 if (timevalcmp(&lp->lp_pid_start, &p->p_stats->p_start, ==)) {
328 /* ...and it's valid */
329 /* move to tail of LRU */
330 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
331 microuptime(&now);
332 lp->lp_time = now.tv_sec;
333 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
334 error = 0;
335 break;
336 }
337 /* ...but it's no longer valid */
338 /* remove from hash, invalidate, and move to lru head */
339 LIST_REMOVE(lp, lp_hash);
340 lp->lp_valid = 0;
341 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
342 TAILQ_INSERT_HEAD(&nfs_lock_pid_lru, lp, lp_lru);
343 lp = NULL;
344 break;
345 }
346
347 /* if we didn't find it (valid) and we've been asked to add it */
348 if ((error == ENOENT) && addflag) {
349 /* scan lru list for invalid, stale entries to reuse/free */
350 int lrucnt = 0;
351 microuptime(&now);
352 for (lplru = TAILQ_FIRST(&nfs_lock_pid_lru); lplru; lplru = lplru_next) {
353 lplru_next = TAILQ_NEXT(lplru, lp_lru);
354 if (lplru->lp_valid && (lplru->lp_time >= (now.tv_sec - 2))) {
355 /*
356 * If the oldest LRU entry is relatively new, then don't
357 * bother scanning any further.
358 */
359 break;
360 }
361 /* remove entry from LRU, and check if it's still in use */
362 TAILQ_REMOVE(&nfs_lock_pid_lru, lplru, lp_lru);
363 if (!lplru->lp_valid || !(plru = pfind(lplru->lp_pid)) ||
364 timevalcmp(&lplru->lp_pid_start, &plru->p_stats->p_start, !=)) {
365 /* no longer in use */
366 LIST_REMOVE(lplru, lp_hash);
367 if (!lp) {
368 /* we'll reuse this one */
369 lp = lplru;
370 } else {
371 /* we can free this one */
372 FREE(lplru, M_TEMP);
373 }
374 } else {
375 /* still in use */
376 lplru->lp_time = now.tv_sec;
377 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lplru, lp_lru);
378 }
379 /* don't check too many entries at once */
380 if (++lrucnt > 8)
381 break;
382 }
383 if (!lp) {
384 /* we need to allocate a new one */
385 MALLOC(lp, struct nfs_lock_pid *, sizeof(struct nfs_lock_pid),
386 M_TEMP, M_WAITOK | M_ZERO);
387 }
91447636
A
388 if (!lp) {
389 error = ENOMEM;
390 } else {
391 /* (re)initialize nfs_lock_pid info */
392 lp->lp_pid = proc_pid(p);
393 lp->lp_pid_start = p->p_stats->p_start;
394 /* insert pid in hash */
395 LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp->lp_pid), lp, lp_hash);
396 lp->lp_valid = 1;
397 lp->lp_time = now.tv_sec;
398 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
399 error = 0;
400 }
e5568f75
A
401 }
402
403 /* unlock hash */
404 if (nfs_lock_pid_lock < 0) {
405 nfs_lock_pid_lock = 0;
406 wakeup(&nfs_lock_pid_lock);
407 } else
408 nfs_lock_pid_lock = 0;
409
410 return (error);
411}
412
55e303ae
A
413
414/*
415 * nfs_advlock --
416 * NFS advisory byte-level locks.
417 */
418int
91447636
A
419nfs_dolock(struct vnop_advlock_args *ap)
420/* struct vnop_advlock_args {
421 struct vnodeop_desc *a_desc;
422 vnode_t a_vp;
423 caddr_t a_id;
424 int a_op;
425 struct flock *a_fl;
426 int a_flags;
427 vfs_context_t a_context;
55e303ae
A
428}; */
429{
e5568f75
A
430 LOCKD_MSG_REQUEST msgreq;
431 LOCKD_MSG *msg;
91447636 432 vnode_t vp, wvp;
55e303ae
A
433 struct nfsnode *np;
434 int error, error1;
435 struct flock *fl;
436 int fmode, ioflg;
55e303ae 437 struct nfsmount *nmp;
91447636 438 struct nfs_vattr nvattr;
55e303ae 439 off_t start, end;
e5568f75
A
440 struct timeval now;
441 int timeo, endtime, lastmsg, wentdown = 0;
442 int lockpidcheck;
91447636
A
443 kauth_cred_t cred;
444 proc_t p;
445 struct sockaddr *saddr;
55e303ae 446
91447636
A
447 p = vfs_context_proc(ap->a_context);
448 cred = vfs_context_ucred(ap->a_context);
55e303ae
A
449
450 vp = ap->a_vp;
451 fl = ap->a_fl;
452 np = VTONFS(vp);
453
91447636 454 nmp = VFSTONFS(vnode_mount(vp));
55e303ae
A
455 if (!nmp)
456 return (ENXIO);
457 if (nmp->nm_flag & NFSMNT_NOLOCKS)
91447636 458 return (ENOTSUP);
55e303ae
A
459
460 /*
461 * The NLM protocol doesn't allow the server to return an error
462 * on ranges, so we do it. Pre LFS (Large File Summit)
463 * standards required EINVAL for the range errors. More recent
464 * standards use EOVERFLOW, but their EINVAL wording still
465 * encompasses these errors.
466 * Any code sensitive to this is either:
467 * 1) written pre-LFS and so can handle only EINVAL, or
468 * 2) written post-LFS and thus ought to be tolerant of pre-LFS
469 * implementations.
470 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
471 */
472 if (fl->l_whence != SEEK_END) {
473 if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
474 fl->l_start < 0 ||
475 (fl->l_len > 0 && fl->l_len - 1 > OFF_MAX - fl->l_start) ||
476 (fl->l_len < 0 && fl->l_start + fl->l_len < 0))
477 return (EINVAL);
478 }
479 /*
91447636 480 * If daemon is running take a ref on its fifo vnode
55e303ae 481 */
91447636
A
482 if (!(wvp = nfslockdvnode)) {
483 if (!nfslockdwaiting && !nfslockdstarttimeout)
484 return (ENOTSUP);
55e303ae
A
485 /*
486 * Don't wake lock daemon if it hasn't been started yet and
487 * this is an unlock request (since we couldn't possibly
488 * actually have a lock on the file). This could be an
489 * uninformed unlock request due to closef()'s behavior of doing
490 * unlocks on all files if a process has had a lock on ANY file.
491 */
91447636 492 if (!nfslockdvnode && (fl->l_type == F_UNLCK))
55e303ae 493 return (EINVAL);
91447636
A
494 microuptime(&now);
495 if (nfslockdwaiting) {
496 /* wake up lock daemon */
497 nfslockdstarttimeout = now.tv_sec + 60;
498 (void)wakeup((void *)&nfslockdwaiting);
499 }
500 /* wait on nfslockdvnode for a while to allow daemon to start */
501 while (!nfslockdvnode && (now.tv_sec < nfslockdstarttimeout)) {
502 error = tsleep((void *)&nfslockdvnode, PCATCH | PUSER, "lockdstart", 2*hz);
503 if (error && (error != EWOULDBLOCK))
504 return (error);
505 /* check that we still have our mount... */
506 /* ...and that we still support locks */
507 nmp = VFSTONFS(vnode_mount(vp));
508 if (!nmp)
509 return (ENXIO);
510 if (nmp->nm_flag & NFSMNT_NOLOCKS)
511 return (ENOTSUP);
512 if (!error)
513 break;
514 microuptime(&now);
515 }
516 /*
517 * check for nfslockdvnode
518 * If it hasn't started by now, there's a problem.
519 */
520 if (!(wvp = nfslockdvnode))
521 return (ENOTSUP);
522 }
523 error = vnode_getwithref(wvp);
524 if (error)
525 return (ENOTSUP);
526 error = vnode_ref(wvp);
527 if (error) {
528 vnode_put(wvp);
529 return (ENOTSUP);
55e303ae 530 }
e5568f75 531
55e303ae 532 /*
e5568f75
A
533 * Need to check if this process has successfully acquired an NFS lock before.
534 * If not, and this is an unlock request we can simply return success here.
55e303ae 535 */
e5568f75
A
536 lockpidcheck = nfs_lock_pid_check(p, 0, vp);
537 if (lockpidcheck) {
91447636
A
538 if (lockpidcheck != ENOENT) {
539 vnode_rele(wvp);
540 vnode_put(wvp);
e5568f75 541 return (lockpidcheck);
91447636 542 }
55e303ae 543 if (ap->a_op == F_UNLCK) {
91447636
A
544 vnode_rele(wvp);
545 vnode_put(wvp);
55e303ae
A
546 return (0);
547 }
55e303ae 548 }
55e303ae
A
549
550 /*
551 * The NFS Lock Manager protocol doesn't directly handle
552 * negative lengths or SEEK_END, so we need to normalize
553 * things here where we have all the info.
554 * (Note: SEEK_CUR is already adjusted for at this point)
555 */
556 /* Convert the flock structure into a start and end. */
557 switch (fl->l_whence) {
558 case SEEK_SET:
559 case SEEK_CUR:
560 /*
561 * Caller is responsible for adding any necessary offset
562 * to fl->l_start when SEEK_CUR is used.
563 */
564 start = fl->l_start;
565 break;
566 case SEEK_END:
567 /* need to flush, and refetch attributes to make */
568 /* sure we have the correct end of file offset */
569 if (np->n_flag & NMODIFIED) {
91447636
A
570 NATTRINVALIDATE(np);
571 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
55e303ae 572 if (error) {
91447636
A
573 vnode_rele(wvp);
574 vnode_put(wvp);
55e303ae
A
575 return (error);
576 }
577 }
91447636
A
578 NATTRINVALIDATE(np);
579
580 error = nfs_getattr(vp, &nvattr, cred, p);
55e303ae 581 if (error) {
91447636
A
582 vnode_rele(wvp);
583 vnode_put(wvp);
55e303ae
A
584 return (error);
585 }
586 start = np->n_size + fl->l_start;
587 break;
588 default:
91447636
A
589 vnode_rele(wvp);
590 vnode_put(wvp);
55e303ae
A
591 return (EINVAL);
592 }
593 if (fl->l_len == 0)
594 end = -1;
595 else if (fl->l_len > 0)
596 end = start + fl->l_len - 1;
597 else { /* l_len is negative */
598 end = start - 1;
599 start += fl->l_len;
600 }
601 if (start < 0) {
91447636
A
602 vnode_rele(wvp);
603 vnode_put(wvp);
55e303ae
A
604 return (EINVAL);
605 }
e5568f75
A
606 if (!NFS_ISV3(vp) &&
607 ((start >= 0x80000000) || (end >= 0x80000000))) {
91447636
A
608 vnode_rele(wvp);
609 vnode_put(wvp);
e5568f75
A
610 return (EINVAL);
611 }
55e303ae 612
e5568f75
A
613 /*
614 * Fill in the information structure.
615 */
616 msgreq.lmr_answered = 0;
617 msgreq.lmr_errno = 0;
618 msgreq.lmr_saved_errno = 0;
619 msg = &msgreq.lmr_msg;
620 msg->lm_version = LOCKD_MSG_VERSION;
621 msg->lm_flags = 0;
622
623 msg->lm_fl = *fl;
624 msg->lm_fl.l_start = start;
55e303ae 625 if (end != -1)
e5568f75 626 msg->lm_fl.l_len = end - start + 1;
91447636 627 msg->lm_fl.l_pid = proc_pid(p);
55e303ae 628
e5568f75
A
629 if (ap->a_flags & F_WAIT)
630 msg->lm_flags |= LOCKD_MSG_BLOCK;
631 if (ap->a_op == F_GETLK)
632 msg->lm_flags |= LOCKD_MSG_TEST;
55e303ae 633
91447636 634 nmp = VFSTONFS(vnode_mount(vp));
55e303ae 635 if (!nmp) {
91447636
A
636 vnode_rele(wvp);
637 vnode_put(wvp);
55e303ae
A
638 return (ENXIO);
639 }
640
91447636
A
641 saddr = mbuf_data(nmp->nm_nam);
642 bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
e5568f75
A
643 msg->lm_fh_len = NFS_ISV3(vp) ? VTONFS(vp)->n_fhsize : NFSX_V2FH;
644 bcopy(VTONFS(vp)->n_fhp, msg->lm_fh, msg->lm_fh_len);
645 if (NFS_ISV3(vp))
646 msg->lm_flags |= LOCKD_MSG_NFSV3;
91447636 647 cru2x(cred, &msg->lm_cred);
55e303ae 648
e5568f75
A
649 microuptime(&now);
650 lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
55e303ae
A
651
652 fmode = FFLAGS(O_WRONLY);
91447636
A
653 if ((error = VNOP_OPEN(wvp, fmode, ap->a_context))) {
654 vnode_rele(wvp);
655 vnode_put(wvp);
55e303ae
A
656 return (error);
657 }
91447636 658 vnode_lock(wvp);
55e303ae 659 ++wvp->v_writecount;
91447636 660 vnode_unlock(wvp);
55e303ae 661
e5568f75
A
662 /* allocate unique xid */
663 msg->lm_xid = nfs_lockxid_get();
664 nfs_lockdmsg_enqueue(&msgreq);
665
666 timeo = 2*hz;
55e303ae
A
667#define IO_NOMACCHECK 0;
668 ioflg = IO_UNIT | IO_NOMACCHECK;
669 for (;;) {
e5568f75 670 error = 0;
55e303ae
A
671 while (nfslockdfifolock & NFSLOCKDFIFOLOCK_LOCKED) {
672 nfslockdfifolock |= NFSLOCKDFIFOLOCK_WANT;
e5568f75
A
673 error = tsleep((void *)&nfslockdfifolock,
674 PCATCH | PUSER, "lockdfifo", 20*hz);
675 if (error)
55e303ae
A
676 break;
677 }
e5568f75
A
678 if (error)
679 break;
55e303ae
A
680 nfslockdfifolock |= NFSLOCKDFIFOLOCK_LOCKED;
681
e5568f75 682 error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)msg, sizeof(*msg), 0,
91447636 683 UIO_SYSSPACE32, ioflg, proc_ucred(kernproc), NULL, p);
55e303ae
A
684
685 nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_LOCKED;
686 if (nfslockdfifolock & NFSLOCKDFIFOLOCK_WANT) {
687 nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_WANT;
688 wakeup((void *)&nfslockdfifolock);
689 }
55e303ae
A
690
691 if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) {
692 break;
693 }
e5568f75 694
55e303ae 695 /*
e5568f75
A
696 * Always wait for an answer. Not waiting for unlocks could
697 * cause a lock to be left if the unlock request gets dropped.
55e303ae 698 */
55e303ae
A
699
700 /*
e5568f75
A
701 * Retry if it takes too long to get a response.
702 *
703 * The timeout numbers were picked out of thin air... they start
704 * at 2 and double each timeout with a max of 60 seconds.
705 *
706 * In order to maintain responsiveness, we pass a small timeout
707 * to tsleep and calculate the timeouts ourselves. This allows
708 * us to pick up on mount changes quicker.
55e303ae 709 */
e5568f75
A
710wait_for_granted:
711 error = EWOULDBLOCK;
712 microuptime(&now);
713 if ((timeo/hz) > 0)
714 endtime = now.tv_sec + timeo/hz;
715 else
716 endtime = now.tv_sec + 1;
717 while (now.tv_sec < endtime) {
718 error = tsleep((void *)&msgreq, PCATCH | PUSER, "lockd", 2*hz);
719 if (msgreq.lmr_answered) {
55e303ae 720 /*
e5568f75
A
721 * Note: it's possible to have a lock granted at
722 * essentially the same time that we get interrupted.
723 * Since the lock may be granted, we can't return an
724 * error from this request or we might not unlock the
725 * lock that's been granted.
55e303ae 726 */
e5568f75
A
727 error = 0;
728 break;
729 }
730 if (error != EWOULDBLOCK)
731 break;
732 /* check that we still have our mount... */
733 /* ...and that we still support locks */
91447636 734 nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
735 if (!nmp || (nmp->nm_flag & NFSMNT_NOLOCKS))
736 break;
737 /*
738 * If the mount is hung and we've requested not to hang
739 * on remote filesystems, then bail now.
740 */
91447636 741 if ((p != NULL) && ((proc_noremotehang(p)) != 0) &&
e5568f75
A
742 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) {
743 if (fl->l_type == F_UNLCK)
744 printf("nfs_dolock: aborting unlock request "
745 "due to timeout (noremotehang)\n");
746 error = EIO;
747 break;
748 }
749 microuptime(&now);
750 }
751 if (error) {
752 /* check that we still have our mount... */
91447636 753 nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
754 if (!nmp) {
755 if (error == EWOULDBLOCK)
756 error = ENXIO;
757 break;
758 }
759 /* ...and that we still support locks */
760 if (nmp->nm_flag & NFSMNT_NOLOCKS) {
761 if (error == EWOULDBLOCK)
91447636 762 error = ENOTSUP;
e5568f75
A
763 break;
764 }
91447636 765 if ((error == ENOTSUP) &&
e5568f75
A
766 (nmp->nm_state & NFSSTA_LOCKSWORK)) {
767 /*
768 * We have evidence that locks work, yet lockd
91447636 769 * returned ENOTSUP. This is probably because
e5568f75
A
770 * it was unable to contact the server's lockd to
771 * send it the request.
772 *
773 * Because we know locks work, we'll consider
774 * this failure to be a timeout.
775 */
776 error = EWOULDBLOCK;
777 }
778 if (error != EWOULDBLOCK) {
779 /*
780 * We're going to bail on this request.
781 * If we were a blocked lock request, send a cancel.
782 */
783 if ((msgreq.lmr_errno == EINPROGRESS) &&
784 !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
785 /* set this request up as a cancel */
786 msg->lm_flags |= LOCKD_MSG_CANCEL;
787 nfs_lockdmsg_dequeue(&msgreq);
788 msg->lm_xid = nfs_lockxid_get();
789 nfs_lockdmsg_enqueue(&msgreq);
790 msgreq.lmr_saved_errno = error;
791 msgreq.lmr_errno = 0;
792 msgreq.lmr_answered = 0;
793 /* reset timeout */
794 timeo = 2*hz;
795 /* send cancel request */
796 continue;
797 }
798 break;
799 }
800
801 /*
802 * If the mount is hung and we've requested not to hang
803 * on remote filesystems, then bail now.
804 */
91447636 805 if ((p != NULL) && ((proc_noremotehang(p)) != 0) &&
e5568f75
A
806 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) {
807 if (fl->l_type == F_UNLCK)
808 printf("nfs_dolock: aborting unlock request "
809 "due to timeout (noremotehang)\n");
810 error = EIO;
811 break;
812 }
813 /* warn if we're not getting any response */
814 microuptime(&now);
815 if ((msgreq.lmr_errno != EINPROGRESS) &&
816 (nmp->nm_tprintf_initial_delay != 0) &&
817 ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
818 lastmsg = now.tv_sec;
91447636 819 nfs_down(nmp, p, 0, NFSSTA_LOCKTIMEO, "lockd not responding");
e5568f75
A
820 wentdown = 1;
821 }
822 if (msgreq.lmr_errno == EINPROGRESS) {
823 /*
824 * We've got a blocked lock request that we are
825 * going to retry. First, we'll want to try to
826 * send a cancel for the previous request.
827 *
828 * Clear errno so if we don't get a response
829 * to the resend we'll call nfs_down().
830 * Also reset timeout because we'll expect a
831 * quick response to the cancel/resend (even if
832 * it is NLM_BLOCKED).
833 */
834 msg->lm_flags |= LOCKD_MSG_CANCEL;
835 nfs_lockdmsg_dequeue(&msgreq);
836 msg->lm_xid = nfs_lockxid_get();
837 nfs_lockdmsg_enqueue(&msgreq);
838 msgreq.lmr_saved_errno = msgreq.lmr_errno;
839 msgreq.lmr_errno = 0;
840 msgreq.lmr_answered = 0;
841 timeo = 2*hz;
842 /* send cancel then resend request */
55e303ae
A
843 continue;
844 }
e5568f75
A
845 /*
846 * We timed out, so we will rewrite the request
847 * to the fifo, but only if it isn't already full.
848 */
849 ioflg |= IO_NDELAY;
850 timeo *= 2;
851 if (timeo > 60*hz)
852 timeo = 60*hz;
853 /* resend request */
854 continue;
855 }
55e303ae 856
91447636
A
857 /* we got a reponse, so the server's lockd is OK */
858 nfs_up(VFSTONFS(vnode_mount(vp)), p, NFSSTA_LOCKTIMEO,
859 wentdown ? "lockd alive again" : NULL);
860 wentdown = 0;
55e303ae 861
e5568f75
A
862 if (msgreq.lmr_errno == EINPROGRESS) {
863 /* got NLM_BLOCKED response */
864 /* need to wait for NLM_GRANTED */
865 timeo = 60*hz;
866 msgreq.lmr_answered = 0;
867 goto wait_for_granted;
868 }
869
870 if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
871 (msgreq.lmr_saved_errno == EINPROGRESS)) {
872 /*
873 * We just got a successful reply to the
874 * cancel of the previous blocked lock request.
875 * Now, go ahead and resend the request.
876 */
877 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
878 nfs_lockdmsg_dequeue(&msgreq);
879 msg->lm_xid = nfs_lockxid_get();
880 nfs_lockdmsg_enqueue(&msgreq);
881 msgreq.lmr_saved_errno = 0;
882 msgreq.lmr_errno = 0;
883 msgreq.lmr_answered = 0;
884 timeo = 2*hz;
885 /* resend request */
886 continue;
887 }
888
889 if ((msg->lm_flags & LOCKD_MSG_TEST) && msgreq.lmr_errno == 0) {
890 if (msg->lm_fl.l_type != F_UNLCK) {
891 fl->l_type = msg->lm_fl.l_type;
892 fl->l_pid = msg->lm_fl.l_pid;
893 fl->l_start = msg->lm_fl.l_start;
894 fl->l_len = msg->lm_fl.l_len;
55e303ae
A
895 fl->l_whence = SEEK_SET;
896 } else {
897 fl->l_type = F_UNLCK;
898 }
899 }
e5568f75
A
900
901 /*
902 * If the blocked lock request was cancelled.
903 * Restore the error condition from when we
904 * originally bailed on the request.
905 */
906 if (msg->lm_flags & LOCKD_MSG_CANCEL) {
907 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
908 error = msgreq.lmr_saved_errno;
909 } else
910 error = msgreq.lmr_errno;
911
912 if (!error) {
913 /* record that NFS file locking has worked on this mount */
91447636 914 nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
915 if (nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK))
916 nmp->nm_state |= NFSSTA_LOCKSWORK;
917 /*
918 * If we successfully acquired a lock, make sure this pid
919 * is in the nfs_lock_pid hash table so we know we can't
920 * short-circuit unlock requests.
921 */
922 if ((lockpidcheck == ENOENT) &&
923 ((ap->a_op == F_SETLK) || (ap->a_op == F_SETLKW)))
924 nfs_lock_pid_check(p, 1, vp);
925
926 }
55e303ae
A
927 break;
928 }
91447636 929
e5568f75 930 nfs_lockdmsg_dequeue(&msgreq);
55e303ae 931
91447636
A
932 error1 = VNOP_CLOSE(wvp, FWRITE, ap->a_context);
933 vnode_rele(wvp);
934 vnode_put(wvp);
55e303ae
A
935 /* prefer any previous 'error' to our vn_close 'error1'. */
936 return (error != 0 ? error : error1);
937}
938
939/*
940 * nfslockdans --
941 * NFS advisory byte-level locks answer from the lock daemon.
942 */
943int
91447636 944nfslockdans(proc_t p, struct lockd_ans *ansp)
55e303ae 945{
e5568f75 946 LOCKD_MSG_REQUEST *msgreq;
55e303ae
A
947 int error;
948
91447636
A
949 /* Let root make this call. */
950 error = proc_suser(p);
951 if (error)
55e303ae
A
952 return (error);
953
954 /* the version should match, or we're out of sync */
e5568f75 955 if (ansp->la_version != LOCKD_ANS_VERSION)
55e303ae
A
956 return (EINVAL);
957
e5568f75
A
958 /* try to find the lockd message by transaction id (cookie) */
959 msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
960 if (ansp->la_flags & LOCKD_ANS_GRANTED) {
961 /*
962 * We can't depend on the granted message having our cookie,
963 * so we check the answer against the lockd message found.
964 * If no message was found or it doesn't match the answer,
965 * we look for the lockd message by the answer's lock info.
966 */
967 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp))
968 msgreq = nfs_lockdmsg_find_by_answer(ansp);
969 /*
970 * We need to make sure this request isn't being cancelled
971 * If it is, we don't want to accept the granted message.
972 */
973 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
974 msgreq = NULL;
55e303ae 975 }
e5568f75 976 if (!msgreq)
55e303ae 977 return (EPIPE);
55e303ae 978
e5568f75
A
979 msgreq->lmr_errno = ansp->la_errno;
980 if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
981 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
982 if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
983 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
984 else
985 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
986 msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
987 msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
988 msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
989 } else {
990 msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
991 }
992 }
55e303ae 993
e5568f75
A
994 msgreq->lmr_answered = 1;
995 (void)wakeup((void *)msgreq);
55e303ae
A
996
997 return (0);
998}
999
1000/*
1001 * nfslockdfd --
1002 * NFS advisory byte-level locks: fifo file# from the lock daemon.
1003 */
1004int
91447636 1005nfslockdfd(proc_t p, int fd)
55e303ae
A
1006{
1007 int error;
91447636 1008 vnode_t vp, oldvp;
55e303ae 1009
91447636 1010 error = proc_suser(p);
55e303ae
A
1011 if (error)
1012 return (error);
1013 if (fd < 0) {
91447636 1014 vp = NULL;
55e303ae 1015 } else {
91447636 1016 error = file_vnode(fd, &vp);
55e303ae
A
1017 if (error)
1018 return (error);
91447636
A
1019 error = vnode_getwithref(vp);
1020 if (error)
1021 return (error);
1022 error = vnode_ref(vp);
1023 if (error) {
1024 vnode_put(vp);
1025 return (error);
1026 }
1027 }
1028 oldvp = nfslockdvnode;
1029 nfslockdvnode = vp;
1030 if (oldvp) {
1031 vnode_rele(oldvp);
1032 }
1033 (void)wakeup((void *)&nfslockdvnode);
1034 if (vp) {
1035 vnode_put(vp);
55e303ae 1036 }
55e303ae
A
1037 return (0);
1038}
1039
1040/*
1041 * nfslockdwait --
1042 * lock daemon waiting for lock request
1043 */
1044int
91447636 1045nfslockdwait(proc_t p)
55e303ae
A
1046{
1047 int error;
55e303ae 1048
91447636
A
1049 error = proc_suser(p);
1050 if (error)
1051 return (error);
1052 if (nfslockdwaiting || nfslockdvnode)
55e303ae 1053 return (EBUSY);
55e303ae 1054
91447636 1055 nfslockdstarttimeout = 0;
55e303ae
A
1056 nfslockdwaiting = 1;
1057 tsleep((void *)&nfslockdwaiting, PCATCH | PUSER, "lockd", 0);
1058 nfslockdwaiting = 0;
1059
1060 return (0);
1061}