]> git.saurik.com Git - apple/xnu.git/blame - bsd/nfs/nfs_lock.c
xnu-792.13.8.tar.gz
[apple/xnu.git] / bsd / nfs / nfs_lock.c
CommitLineData
55e303ae 1/*
91447636 2 * Copyright (c) 2002-2005 Apple Computer, Inc. All rights reserved.
55e303ae 3 *
8ad349bb 4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
55e303ae 5 *
8ad349bb
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
55e303ae
A
29 */
30/*-
31 * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved.
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 * 3. Berkeley Software Design Inc's name may not be used to endorse or
42 * promote products derived from this software without specific prior
43 * written permission.
44 *
45 * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48 * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 * SUCH DAMAGE.
56 *
57 * from BSDI nfs_lock.c,v 2.4 1998/12/14 23:49:56 jch Exp
58 */
59
60#include <sys/cdefs.h>
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/fcntl.h>
64#include <sys/kernel.h> /* for hz */
91447636 65#include <sys/file_internal.h>
55e303ae
A
66#include <sys/malloc.h>
67#include <sys/lockf.h> /* for hz */ /* Must come after sys/malloc.h */
91447636
A
68#include <sys/kpi_mbuf.h>
69#include <sys/mount_internal.h>
70#include <sys/proc_internal.h> /* for p_start */
71#include <sys/kauth.h>
55e303ae
A
72#include <sys/resourcevar.h>
73#include <sys/socket.h>
55e303ae
A
74#include <sys/unistd.h>
75#include <sys/user.h>
91447636 76#include <sys/vnode_internal.h>
55e303ae 77
91447636 78#include <kern/thread.h>
55e303ae
A
79
80#include <machine/limits.h>
81
82#include <net/if.h>
83
84#include <nfs/rpcv2.h>
85#include <nfs/nfsproto.h>
86#include <nfs/nfs.h>
87#include <nfs/nfsmount.h>
88#include <nfs/nfsnode.h>
89#include <nfs/nfs_lock.h>
55e303ae
A
90
91#define OFF_MAX QUAD_MAX
92
e5568f75
A
93/*
94 * globals for managing the lockd fifo
95 */
91447636 96vnode_t nfslockdvnode = 0;
55e303ae 97int nfslockdwaiting = 0;
91447636 98time_t nfslockdstarttimeout = 0;
55e303ae
A
99int nfslockdfifolock = 0;
100#define NFSLOCKDFIFOLOCK_LOCKED 1
101#define NFSLOCKDFIFOLOCK_WANT 2
102
103/*
e5568f75
A
104 * pending lock request messages are kept in this queue which is
105 * kept sorted by transaction ID (xid).
106 */
107uint64_t nfs_lockxid = 0;
108LOCKD_MSG_QUEUE nfs_pendlockq;
109
110/*
111 * This structure is used to identify processes which have acquired NFS locks.
112 * Knowing which processes have ever acquired locks allows us to short-circuit
113 * unlock requests for processes that have never had an NFS file lock. Thus
114 * avoiding a costly and unnecessary lockd request.
115 */
116struct nfs_lock_pid {
117 TAILQ_ENTRY(nfs_lock_pid) lp_lru; /* LRU list */
118 LIST_ENTRY(nfs_lock_pid) lp_hash; /* hash chain */
119 int lp_valid; /* valid entry? */
120 int lp_time; /* last time seen valid */
121 pid_t lp_pid; /* The process ID. */
122 struct timeval lp_pid_start; /* Start time of process id */
123};
124
125#define NFS_LOCK_PID_HASH_SIZE 64 // XXX tune me
126#define NFS_LOCK_PID_HASH(pid) \
127 (&nfs_lock_pid_hash_tbl[(pid) & nfs_lock_pid_hash])
128LIST_HEAD(, nfs_lock_pid) *nfs_lock_pid_hash_tbl;
129TAILQ_HEAD(, nfs_lock_pid) nfs_lock_pid_lru;
130u_long nfs_lock_pid_hash;
131int nfs_lock_pid_lock;
132
133
134/*
135 * initialize global nfs lock state
136 */
137void
138nfs_lockinit(void)
139{
140 TAILQ_INIT(&nfs_pendlockq);
141 nfs_lock_pid_lock = 0;
142 nfs_lock_pid_hash_tbl = hashinit(NFS_LOCK_PID_HASH_SIZE,
143 M_TEMP, &nfs_lock_pid_hash);
144 TAILQ_INIT(&nfs_lock_pid_lru);
145}
146
147/*
148 * insert a lock request message into the pending queue
149 */
150static inline void
151nfs_lockdmsg_enqueue(LOCKD_MSG_REQUEST *msgreq)
152{
153 LOCKD_MSG_REQUEST *mr;
154
155 mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue);
156 if (!mr || (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
157 /* fast path: empty queue or new largest xid */
158 TAILQ_INSERT_TAIL(&nfs_pendlockq, msgreq, lmr_next);
159 return;
160 }
161 /* slow path: need to walk list to find insertion point */
162 while (mr && (msgreq->lmr_msg.lm_xid > mr->lmr_msg.lm_xid)) {
163 mr = TAILQ_PREV(mr, nfs_lock_msg_queue, lmr_next);
164 }
165 if (mr) {
166 TAILQ_INSERT_AFTER(&nfs_pendlockq, mr, msgreq, lmr_next);
167 } else {
168 TAILQ_INSERT_HEAD(&nfs_pendlockq, msgreq, lmr_next);
169 }
170}
171
172/*
173 * remove a lock request message from the pending queue
174 */
175static inline void
176nfs_lockdmsg_dequeue(LOCKD_MSG_REQUEST *msgreq)
177{
178 TAILQ_REMOVE(&nfs_pendlockq, msgreq, lmr_next);
179}
180
181/*
182 * find a pending lock request message by xid
183 *
184 * We search from the head of the list assuming that the message we're
185 * looking for is for an older request (because we have an answer to it).
186 * This assumes that lock request will be answered primarily in FIFO order.
187 * However, this may not be the case if there are blocked requests. We may
188 * want to move blocked requests to a separate queue (but that'll complicate
189 * duplicate xid checking).
190 */
191static inline LOCKD_MSG_REQUEST *
192nfs_lockdmsg_find_by_xid(uint64_t lockxid)
193{
194 LOCKD_MSG_REQUEST *mr;
195
196 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
197 if (mr->lmr_msg.lm_xid == lockxid)
198 return mr;
199 if (mr->lmr_msg.lm_xid > lockxid)
200 return NULL;
201 }
202 return mr;
203}
204
205/*
206 * Because we can't depend on nlm_granted messages containing the same
207 * cookie we sent with the original lock request, we need code test if
208 * an nlm_granted answer matches the lock request. We also need code
209 * that can find a lockd message based solely on the nlm_granted answer.
210 */
211
212/*
213 * compare lockd message to answer
214 *
215 * returns 0 on equality and 1 if different
216 */
217static inline int
218nfs_lockdmsg_compare_to_answer(LOCKD_MSG_REQUEST *msgreq, struct lockd_ans *ansp)
219{
220 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
221 return 1;
222 if (msgreq->lmr_msg.lm_fl.l_pid != ansp->la_pid)
223 return 1;
224 if (msgreq->lmr_msg.lm_fl.l_start != ansp->la_start)
225 return 1;
226 if (msgreq->lmr_msg.lm_fl.l_len != ansp->la_len)
227 return 1;
228 if (msgreq->lmr_msg.lm_fh_len != ansp->la_fh_len)
229 return 1;
230 if (bcmp(msgreq->lmr_msg.lm_fh, ansp->la_fh, ansp->la_fh_len))
231 return 1;
232 return 0;
233}
234
235/*
236 * find a pending lock request message based on the lock info provided
237 * in the lockd_ans/nlm_granted data. We need this because we can't
238 * depend on nlm_granted messages containing the same cookie we sent
239 * with the original lock request.
240 *
241 * We search from the head of the list assuming that the message we're
242 * looking for is for an older request (because we have an answer to it).
243 * This assumes that lock request will be answered primarily in FIFO order.
244 * However, this may not be the case if there are blocked requests. We may
245 * want to move blocked requests to a separate queue (but that'll complicate
246 * duplicate xid checking).
55e303ae 247 */
e5568f75
A
248static inline LOCKD_MSG_REQUEST *
249nfs_lockdmsg_find_by_answer(struct lockd_ans *ansp)
250{
251 LOCKD_MSG_REQUEST *mr;
252
253 if (!(ansp->la_flags & LOCKD_ANS_LOCK_INFO))
254 return NULL;
255 TAILQ_FOREACH(mr, &nfs_pendlockq, lmr_next) {
256 if (!nfs_lockdmsg_compare_to_answer(mr, ansp))
257 break;
258 }
259 return mr;
260}
261
262/*
263 * return the next unique lock request transaction ID
264 */
265static inline uint64_t
266nfs_lockxid_get(void)
267{
268 LOCKD_MSG_REQUEST *mr;
269
270 /* derive initial lock xid from system time */
271 if (!nfs_lockxid) {
272 /*
273 * Note: it's OK if this code inits nfs_lockxid to 0 (for example,
274 * due to a broken clock) because we immediately increment it
275 * and we guarantee to never use xid 0. So, nfs_lockxid should only
276 * ever be 0 the first time this function is called.
277 */
278 struct timeval tv;
279 microtime(&tv);
280 nfs_lockxid = (uint64_t)tv.tv_sec << 12;
281 }
282
283 /* make sure we get a unique xid */
284 do {
285 /* Skip zero xid if it should ever happen. */
286 if (++nfs_lockxid == 0)
287 nfs_lockxid++;
288 if (!(mr = TAILQ_LAST(&nfs_pendlockq, nfs_lock_msg_queue)) ||
289 (mr->lmr_msg.lm_xid < nfs_lockxid)) {
290 /* fast path: empty queue or new largest xid */
291 break;
292 }
293 /* check if xid is already in use */
294 } while (nfs_lockdmsg_find_by_xid(nfs_lockxid));
295
296 return nfs_lockxid;
297}
298
299
300/*
301 * Check the nfs_lock_pid hash table for an entry and, if requested,
302 * add the entry if it is not found.
303 *
304 * (Also, if adding, try to clean up some stale entries.)
305 */
306static int
91447636 307nfs_lock_pid_check(proc_t p, int addflag, vnode_t vp)
e5568f75
A
308{
309 struct nfs_lock_pid *lp, *lplru, *lplru_next;
91447636 310 proc_t plru;
e5568f75
A
311 int error = 0;
312 struct timeval now;
313
314 /* lock hash */
315loop:
316 if (nfs_lock_pid_lock) {
91447636 317 struct nfsmount *nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
318 while (nfs_lock_pid_lock) {
319 nfs_lock_pid_lock = -1;
320 tsleep(&nfs_lock_pid_lock, PCATCH, "nfslockpid", 0);
91447636 321 if ((error = nfs_sigintr(nmp, NULL, p)))
e5568f75
A
322 return (error);
323 }
324 goto loop;
325 }
326 nfs_lock_pid_lock = 1;
327
328 /* Search hash chain */
329 error = ENOENT;
91447636 330 lp = NFS_LOCK_PID_HASH(proc_pid(p))->lh_first;
e5568f75 331 for (; lp != NULL; lp = lp->lp_hash.le_next)
91447636 332 if (lp->lp_pid == proc_pid(p)) {
e5568f75
A
333 /* found pid... */
334 if (timevalcmp(&lp->lp_pid_start, &p->p_stats->p_start, ==)) {
335 /* ...and it's valid */
336 /* move to tail of LRU */
337 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
338 microuptime(&now);
339 lp->lp_time = now.tv_sec;
340 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
341 error = 0;
342 break;
343 }
344 /* ...but it's no longer valid */
345 /* remove from hash, invalidate, and move to lru head */
346 LIST_REMOVE(lp, lp_hash);
347 lp->lp_valid = 0;
348 TAILQ_REMOVE(&nfs_lock_pid_lru, lp, lp_lru);
349 TAILQ_INSERT_HEAD(&nfs_lock_pid_lru, lp, lp_lru);
350 lp = NULL;
351 break;
352 }
353
354 /* if we didn't find it (valid) and we've been asked to add it */
355 if ((error == ENOENT) && addflag) {
356 /* scan lru list for invalid, stale entries to reuse/free */
357 int lrucnt = 0;
358 microuptime(&now);
359 for (lplru = TAILQ_FIRST(&nfs_lock_pid_lru); lplru; lplru = lplru_next) {
360 lplru_next = TAILQ_NEXT(lplru, lp_lru);
361 if (lplru->lp_valid && (lplru->lp_time >= (now.tv_sec - 2))) {
362 /*
363 * If the oldest LRU entry is relatively new, then don't
364 * bother scanning any further.
365 */
366 break;
367 }
368 /* remove entry from LRU, and check if it's still in use */
369 TAILQ_REMOVE(&nfs_lock_pid_lru, lplru, lp_lru);
370 if (!lplru->lp_valid || !(plru = pfind(lplru->lp_pid)) ||
371 timevalcmp(&lplru->lp_pid_start, &plru->p_stats->p_start, !=)) {
372 /* no longer in use */
373 LIST_REMOVE(lplru, lp_hash);
374 if (!lp) {
375 /* we'll reuse this one */
376 lp = lplru;
377 } else {
378 /* we can free this one */
379 FREE(lplru, M_TEMP);
380 }
381 } else {
382 /* still in use */
383 lplru->lp_time = now.tv_sec;
384 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lplru, lp_lru);
385 }
386 /* don't check too many entries at once */
387 if (++lrucnt > 8)
388 break;
389 }
390 if (!lp) {
391 /* we need to allocate a new one */
392 MALLOC(lp, struct nfs_lock_pid *, sizeof(struct nfs_lock_pid),
393 M_TEMP, M_WAITOK | M_ZERO);
394 }
91447636
A
395 if (!lp) {
396 error = ENOMEM;
397 } else {
398 /* (re)initialize nfs_lock_pid info */
399 lp->lp_pid = proc_pid(p);
400 lp->lp_pid_start = p->p_stats->p_start;
401 /* insert pid in hash */
402 LIST_INSERT_HEAD(NFS_LOCK_PID_HASH(lp->lp_pid), lp, lp_hash);
403 lp->lp_valid = 1;
404 lp->lp_time = now.tv_sec;
405 TAILQ_INSERT_TAIL(&nfs_lock_pid_lru, lp, lp_lru);
406 error = 0;
407 }
e5568f75
A
408 }
409
410 /* unlock hash */
411 if (nfs_lock_pid_lock < 0) {
412 nfs_lock_pid_lock = 0;
413 wakeup(&nfs_lock_pid_lock);
414 } else
415 nfs_lock_pid_lock = 0;
416
417 return (error);
418}
419
55e303ae
A
420
421/*
422 * nfs_advlock --
423 * NFS advisory byte-level locks.
424 */
425int
91447636
A
426nfs_dolock(struct vnop_advlock_args *ap)
427/* struct vnop_advlock_args {
428 struct vnodeop_desc *a_desc;
429 vnode_t a_vp;
430 caddr_t a_id;
431 int a_op;
432 struct flock *a_fl;
433 int a_flags;
434 vfs_context_t a_context;
55e303ae
A
435}; */
436{
e5568f75
A
437 LOCKD_MSG_REQUEST msgreq;
438 LOCKD_MSG *msg;
91447636 439 vnode_t vp, wvp;
55e303ae
A
440 struct nfsnode *np;
441 int error, error1;
442 struct flock *fl;
443 int fmode, ioflg;
55e303ae 444 struct nfsmount *nmp;
91447636 445 struct nfs_vattr nvattr;
55e303ae 446 off_t start, end;
e5568f75
A
447 struct timeval now;
448 int timeo, endtime, lastmsg, wentdown = 0;
449 int lockpidcheck;
91447636
A
450 kauth_cred_t cred;
451 proc_t p;
452 struct sockaddr *saddr;
55e303ae 453
91447636
A
454 p = vfs_context_proc(ap->a_context);
455 cred = vfs_context_ucred(ap->a_context);
55e303ae
A
456
457 vp = ap->a_vp;
458 fl = ap->a_fl;
459 np = VTONFS(vp);
460
91447636 461 nmp = VFSTONFS(vnode_mount(vp));
55e303ae
A
462 if (!nmp)
463 return (ENXIO);
464 if (nmp->nm_flag & NFSMNT_NOLOCKS)
91447636 465 return (ENOTSUP);
55e303ae
A
466
467 /*
468 * The NLM protocol doesn't allow the server to return an error
469 * on ranges, so we do it. Pre LFS (Large File Summit)
470 * standards required EINVAL for the range errors. More recent
471 * standards use EOVERFLOW, but their EINVAL wording still
472 * encompasses these errors.
473 * Any code sensitive to this is either:
474 * 1) written pre-LFS and so can handle only EINVAL, or
475 * 2) written post-LFS and thus ought to be tolerant of pre-LFS
476 * implementations.
477 * Since returning EOVERFLOW certainly breaks 1), we return EINVAL.
478 */
479 if (fl->l_whence != SEEK_END) {
480 if ((fl->l_whence != SEEK_CUR && fl->l_whence != SEEK_SET) ||
481 fl->l_start < 0 ||
482 (fl->l_len > 0 && fl->l_len - 1 > OFF_MAX - fl->l_start) ||
483 (fl->l_len < 0 && fl->l_start + fl->l_len < 0))
484 return (EINVAL);
485 }
486 /*
91447636 487 * If daemon is running take a ref on its fifo vnode
55e303ae 488 */
91447636
A
489 if (!(wvp = nfslockdvnode)) {
490 if (!nfslockdwaiting && !nfslockdstarttimeout)
491 return (ENOTSUP);
55e303ae
A
492 /*
493 * Don't wake lock daemon if it hasn't been started yet and
494 * this is an unlock request (since we couldn't possibly
495 * actually have a lock on the file). This could be an
496 * uninformed unlock request due to closef()'s behavior of doing
497 * unlocks on all files if a process has had a lock on ANY file.
498 */
91447636 499 if (!nfslockdvnode && (fl->l_type == F_UNLCK))
55e303ae 500 return (EINVAL);
91447636
A
501 microuptime(&now);
502 if (nfslockdwaiting) {
503 /* wake up lock daemon */
504 nfslockdstarttimeout = now.tv_sec + 60;
505 (void)wakeup((void *)&nfslockdwaiting);
506 }
507 /* wait on nfslockdvnode for a while to allow daemon to start */
508 while (!nfslockdvnode && (now.tv_sec < nfslockdstarttimeout)) {
509 error = tsleep((void *)&nfslockdvnode, PCATCH | PUSER, "lockdstart", 2*hz);
510 if (error && (error != EWOULDBLOCK))
511 return (error);
512 /* check that we still have our mount... */
513 /* ...and that we still support locks */
514 nmp = VFSTONFS(vnode_mount(vp));
515 if (!nmp)
516 return (ENXIO);
517 if (nmp->nm_flag & NFSMNT_NOLOCKS)
518 return (ENOTSUP);
519 if (!error)
520 break;
521 microuptime(&now);
522 }
523 /*
524 * check for nfslockdvnode
525 * If it hasn't started by now, there's a problem.
526 */
527 if (!(wvp = nfslockdvnode))
528 return (ENOTSUP);
529 }
530 error = vnode_getwithref(wvp);
531 if (error)
532 return (ENOTSUP);
533 error = vnode_ref(wvp);
534 if (error) {
535 vnode_put(wvp);
536 return (ENOTSUP);
55e303ae 537 }
e5568f75 538
55e303ae 539 /*
e5568f75
A
540 * Need to check if this process has successfully acquired an NFS lock before.
541 * If not, and this is an unlock request we can simply return success here.
55e303ae 542 */
e5568f75
A
543 lockpidcheck = nfs_lock_pid_check(p, 0, vp);
544 if (lockpidcheck) {
91447636
A
545 if (lockpidcheck != ENOENT) {
546 vnode_rele(wvp);
547 vnode_put(wvp);
e5568f75 548 return (lockpidcheck);
91447636 549 }
55e303ae 550 if (ap->a_op == F_UNLCK) {
91447636
A
551 vnode_rele(wvp);
552 vnode_put(wvp);
55e303ae
A
553 return (0);
554 }
55e303ae 555 }
55e303ae
A
556
557 /*
558 * The NFS Lock Manager protocol doesn't directly handle
559 * negative lengths or SEEK_END, so we need to normalize
560 * things here where we have all the info.
561 * (Note: SEEK_CUR is already adjusted for at this point)
562 */
563 /* Convert the flock structure into a start and end. */
564 switch (fl->l_whence) {
565 case SEEK_SET:
566 case SEEK_CUR:
567 /*
568 * Caller is responsible for adding any necessary offset
569 * to fl->l_start when SEEK_CUR is used.
570 */
571 start = fl->l_start;
572 break;
573 case SEEK_END:
574 /* need to flush, and refetch attributes to make */
575 /* sure we have the correct end of file offset */
576 if (np->n_flag & NMODIFIED) {
91447636
A
577 NATTRINVALIDATE(np);
578 error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
55e303ae 579 if (error) {
91447636
A
580 vnode_rele(wvp);
581 vnode_put(wvp);
55e303ae
A
582 return (error);
583 }
584 }
91447636
A
585 NATTRINVALIDATE(np);
586
587 error = nfs_getattr(vp, &nvattr, cred, p);
55e303ae 588 if (error) {
91447636
A
589 vnode_rele(wvp);
590 vnode_put(wvp);
55e303ae
A
591 return (error);
592 }
593 start = np->n_size + fl->l_start;
594 break;
595 default:
91447636
A
596 vnode_rele(wvp);
597 vnode_put(wvp);
55e303ae
A
598 return (EINVAL);
599 }
600 if (fl->l_len == 0)
601 end = -1;
602 else if (fl->l_len > 0)
603 end = start + fl->l_len - 1;
604 else { /* l_len is negative */
605 end = start - 1;
606 start += fl->l_len;
607 }
608 if (start < 0) {
91447636
A
609 vnode_rele(wvp);
610 vnode_put(wvp);
55e303ae
A
611 return (EINVAL);
612 }
e5568f75
A
613 if (!NFS_ISV3(vp) &&
614 ((start >= 0x80000000) || (end >= 0x80000000))) {
91447636
A
615 vnode_rele(wvp);
616 vnode_put(wvp);
e5568f75
A
617 return (EINVAL);
618 }
55e303ae 619
e5568f75
A
620 /*
621 * Fill in the information structure.
622 */
623 msgreq.lmr_answered = 0;
624 msgreq.lmr_errno = 0;
625 msgreq.lmr_saved_errno = 0;
626 msg = &msgreq.lmr_msg;
627 msg->lm_version = LOCKD_MSG_VERSION;
628 msg->lm_flags = 0;
629
630 msg->lm_fl = *fl;
631 msg->lm_fl.l_start = start;
55e303ae 632 if (end != -1)
e5568f75 633 msg->lm_fl.l_len = end - start + 1;
91447636 634 msg->lm_fl.l_pid = proc_pid(p);
55e303ae 635
e5568f75
A
636 if (ap->a_flags & F_WAIT)
637 msg->lm_flags |= LOCKD_MSG_BLOCK;
638 if (ap->a_op == F_GETLK)
639 msg->lm_flags |= LOCKD_MSG_TEST;
55e303ae 640
91447636 641 nmp = VFSTONFS(vnode_mount(vp));
55e303ae 642 if (!nmp) {
91447636
A
643 vnode_rele(wvp);
644 vnode_put(wvp);
55e303ae
A
645 return (ENXIO);
646 }
647
91447636
A
648 saddr = mbuf_data(nmp->nm_nam);
649 bcopy(saddr, &msg->lm_addr, min(sizeof msg->lm_addr, saddr->sa_len));
e5568f75
A
650 msg->lm_fh_len = NFS_ISV3(vp) ? VTONFS(vp)->n_fhsize : NFSX_V2FH;
651 bcopy(VTONFS(vp)->n_fhp, msg->lm_fh, msg->lm_fh_len);
652 if (NFS_ISV3(vp))
653 msg->lm_flags |= LOCKD_MSG_NFSV3;
91447636 654 cru2x(cred, &msg->lm_cred);
55e303ae 655
e5568f75
A
656 microuptime(&now);
657 lastmsg = now.tv_sec - ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));
55e303ae
A
658
659 fmode = FFLAGS(O_WRONLY);
91447636
A
660 if ((error = VNOP_OPEN(wvp, fmode, ap->a_context))) {
661 vnode_rele(wvp);
662 vnode_put(wvp);
55e303ae
A
663 return (error);
664 }
91447636 665 vnode_lock(wvp);
55e303ae 666 ++wvp->v_writecount;
91447636 667 vnode_unlock(wvp);
55e303ae 668
e5568f75
A
669 /* allocate unique xid */
670 msg->lm_xid = nfs_lockxid_get();
671 nfs_lockdmsg_enqueue(&msgreq);
672
673 timeo = 2*hz;
55e303ae
A
674#define IO_NOMACCHECK 0;
675 ioflg = IO_UNIT | IO_NOMACCHECK;
676 for (;;) {
e5568f75 677 error = 0;
55e303ae
A
678 while (nfslockdfifolock & NFSLOCKDFIFOLOCK_LOCKED) {
679 nfslockdfifolock |= NFSLOCKDFIFOLOCK_WANT;
e5568f75
A
680 error = tsleep((void *)&nfslockdfifolock,
681 PCATCH | PUSER, "lockdfifo", 20*hz);
682 if (error)
55e303ae
A
683 break;
684 }
e5568f75
A
685 if (error)
686 break;
55e303ae
A
687 nfslockdfifolock |= NFSLOCKDFIFOLOCK_LOCKED;
688
e5568f75 689 error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)msg, sizeof(*msg), 0,
91447636 690 UIO_SYSSPACE32, ioflg, proc_ucred(kernproc), NULL, p);
55e303ae
A
691
692 nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_LOCKED;
693 if (nfslockdfifolock & NFSLOCKDFIFOLOCK_WANT) {
694 nfslockdfifolock &= ~NFSLOCKDFIFOLOCK_WANT;
695 wakeup((void *)&nfslockdfifolock);
696 }
55e303ae
A
697
698 if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) {
699 break;
700 }
e5568f75 701
55e303ae 702 /*
e5568f75
A
703 * Always wait for an answer. Not waiting for unlocks could
704 * cause a lock to be left if the unlock request gets dropped.
55e303ae 705 */
55e303ae
A
706
707 /*
e5568f75
A
708 * Retry if it takes too long to get a response.
709 *
710 * The timeout numbers were picked out of thin air... they start
711 * at 2 and double each timeout with a max of 60 seconds.
712 *
713 * In order to maintain responsiveness, we pass a small timeout
714 * to tsleep and calculate the timeouts ourselves. This allows
715 * us to pick up on mount changes quicker.
55e303ae 716 */
e5568f75
A
717wait_for_granted:
718 error = EWOULDBLOCK;
719 microuptime(&now);
720 if ((timeo/hz) > 0)
721 endtime = now.tv_sec + timeo/hz;
722 else
723 endtime = now.tv_sec + 1;
724 while (now.tv_sec < endtime) {
725 error = tsleep((void *)&msgreq, PCATCH | PUSER, "lockd", 2*hz);
726 if (msgreq.lmr_answered) {
55e303ae 727 /*
e5568f75
A
728 * Note: it's possible to have a lock granted at
729 * essentially the same time that we get interrupted.
730 * Since the lock may be granted, we can't return an
731 * error from this request or we might not unlock the
732 * lock that's been granted.
55e303ae 733 */
e5568f75
A
734 error = 0;
735 break;
736 }
737 if (error != EWOULDBLOCK)
738 break;
739 /* check that we still have our mount... */
740 /* ...and that we still support locks */
91447636 741 nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
742 if (!nmp || (nmp->nm_flag & NFSMNT_NOLOCKS))
743 break;
744 /*
745 * If the mount is hung and we've requested not to hang
746 * on remote filesystems, then bail now.
747 */
91447636 748 if ((p != NULL) && ((proc_noremotehang(p)) != 0) &&
e5568f75
A
749 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) {
750 if (fl->l_type == F_UNLCK)
751 printf("nfs_dolock: aborting unlock request "
752 "due to timeout (noremotehang)\n");
753 error = EIO;
754 break;
755 }
756 microuptime(&now);
757 }
758 if (error) {
759 /* check that we still have our mount... */
91447636 760 nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
761 if (!nmp) {
762 if (error == EWOULDBLOCK)
763 error = ENXIO;
764 break;
765 }
766 /* ...and that we still support locks */
767 if (nmp->nm_flag & NFSMNT_NOLOCKS) {
768 if (error == EWOULDBLOCK)
91447636 769 error = ENOTSUP;
e5568f75
A
770 break;
771 }
91447636 772 if ((error == ENOTSUP) &&
e5568f75
A
773 (nmp->nm_state & NFSSTA_LOCKSWORK)) {
774 /*
775 * We have evidence that locks work, yet lockd
91447636 776 * returned ENOTSUP. This is probably because
e5568f75
A
777 * it was unable to contact the server's lockd to
778 * send it the request.
779 *
780 * Because we know locks work, we'll consider
781 * this failure to be a timeout.
782 */
783 error = EWOULDBLOCK;
784 }
785 if (error != EWOULDBLOCK) {
786 /*
787 * We're going to bail on this request.
788 * If we were a blocked lock request, send a cancel.
789 */
790 if ((msgreq.lmr_errno == EINPROGRESS) &&
791 !(msg->lm_flags & LOCKD_MSG_CANCEL)) {
792 /* set this request up as a cancel */
793 msg->lm_flags |= LOCKD_MSG_CANCEL;
794 nfs_lockdmsg_dequeue(&msgreq);
795 msg->lm_xid = nfs_lockxid_get();
796 nfs_lockdmsg_enqueue(&msgreq);
797 msgreq.lmr_saved_errno = error;
798 msgreq.lmr_errno = 0;
799 msgreq.lmr_answered = 0;
800 /* reset timeout */
801 timeo = 2*hz;
802 /* send cancel request */
803 continue;
804 }
805 break;
806 }
807
808 /*
809 * If the mount is hung and we've requested not to hang
810 * on remote filesystems, then bail now.
811 */
91447636 812 if ((p != NULL) && ((proc_noremotehang(p)) != 0) &&
e5568f75
A
813 ((nmp->nm_state & (NFSSTA_TIMEO|NFSSTA_LOCKTIMEO)) != 0)) {
814 if (fl->l_type == F_UNLCK)
815 printf("nfs_dolock: aborting unlock request "
816 "due to timeout (noremotehang)\n");
817 error = EIO;
818 break;
819 }
820 /* warn if we're not getting any response */
821 microuptime(&now);
822 if ((msgreq.lmr_errno != EINPROGRESS) &&
823 (nmp->nm_tprintf_initial_delay != 0) &&
824 ((lastmsg + nmp->nm_tprintf_delay) < now.tv_sec)) {
825 lastmsg = now.tv_sec;
91447636 826 nfs_down(nmp, p, 0, NFSSTA_LOCKTIMEO, "lockd not responding");
e5568f75
A
827 wentdown = 1;
828 }
829 if (msgreq.lmr_errno == EINPROGRESS) {
830 /*
831 * We've got a blocked lock request that we are
832 * going to retry. First, we'll want to try to
833 * send a cancel for the previous request.
834 *
835 * Clear errno so if we don't get a response
836 * to the resend we'll call nfs_down().
837 * Also reset timeout because we'll expect a
838 * quick response to the cancel/resend (even if
839 * it is NLM_BLOCKED).
840 */
841 msg->lm_flags |= LOCKD_MSG_CANCEL;
842 nfs_lockdmsg_dequeue(&msgreq);
843 msg->lm_xid = nfs_lockxid_get();
844 nfs_lockdmsg_enqueue(&msgreq);
845 msgreq.lmr_saved_errno = msgreq.lmr_errno;
846 msgreq.lmr_errno = 0;
847 msgreq.lmr_answered = 0;
848 timeo = 2*hz;
849 /* send cancel then resend request */
55e303ae
A
850 continue;
851 }
e5568f75
A
852 /*
853 * We timed out, so we will rewrite the request
854 * to the fifo, but only if it isn't already full.
855 */
856 ioflg |= IO_NDELAY;
857 timeo *= 2;
858 if (timeo > 60*hz)
859 timeo = 60*hz;
860 /* resend request */
861 continue;
862 }
55e303ae 863
91447636
A
864 /* we got a reponse, so the server's lockd is OK */
865 nfs_up(VFSTONFS(vnode_mount(vp)), p, NFSSTA_LOCKTIMEO,
866 wentdown ? "lockd alive again" : NULL);
867 wentdown = 0;
55e303ae 868
e5568f75
A
869 if (msgreq.lmr_errno == EINPROGRESS) {
870 /* got NLM_BLOCKED response */
871 /* need to wait for NLM_GRANTED */
872 timeo = 60*hz;
873 msgreq.lmr_answered = 0;
874 goto wait_for_granted;
875 }
876
877 if ((msg->lm_flags & LOCKD_MSG_CANCEL) &&
878 (msgreq.lmr_saved_errno == EINPROGRESS)) {
879 /*
880 * We just got a successful reply to the
881 * cancel of the previous blocked lock request.
882 * Now, go ahead and resend the request.
883 */
884 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
885 nfs_lockdmsg_dequeue(&msgreq);
886 msg->lm_xid = nfs_lockxid_get();
887 nfs_lockdmsg_enqueue(&msgreq);
888 msgreq.lmr_saved_errno = 0;
889 msgreq.lmr_errno = 0;
890 msgreq.lmr_answered = 0;
891 timeo = 2*hz;
892 /* resend request */
893 continue;
894 }
895
896 if ((msg->lm_flags & LOCKD_MSG_TEST) && msgreq.lmr_errno == 0) {
897 if (msg->lm_fl.l_type != F_UNLCK) {
898 fl->l_type = msg->lm_fl.l_type;
899 fl->l_pid = msg->lm_fl.l_pid;
900 fl->l_start = msg->lm_fl.l_start;
901 fl->l_len = msg->lm_fl.l_len;
55e303ae
A
902 fl->l_whence = SEEK_SET;
903 } else {
904 fl->l_type = F_UNLCK;
905 }
906 }
e5568f75
A
907
908 /*
909 * If the blocked lock request was cancelled.
910 * Restore the error condition from when we
911 * originally bailed on the request.
912 */
913 if (msg->lm_flags & LOCKD_MSG_CANCEL) {
914 msg->lm_flags &= ~LOCKD_MSG_CANCEL;
915 error = msgreq.lmr_saved_errno;
916 } else
917 error = msgreq.lmr_errno;
918
919 if (!error) {
920 /* record that NFS file locking has worked on this mount */
91447636 921 nmp = VFSTONFS(vnode_mount(vp));
e5568f75
A
922 if (nmp && !(nmp->nm_state & NFSSTA_LOCKSWORK))
923 nmp->nm_state |= NFSSTA_LOCKSWORK;
924 /*
925 * If we successfully acquired a lock, make sure this pid
926 * is in the nfs_lock_pid hash table so we know we can't
927 * short-circuit unlock requests.
928 */
929 if ((lockpidcheck == ENOENT) &&
930 ((ap->a_op == F_SETLK) || (ap->a_op == F_SETLKW)))
931 nfs_lock_pid_check(p, 1, vp);
932
933 }
55e303ae
A
934 break;
935 }
91447636 936
e5568f75 937 nfs_lockdmsg_dequeue(&msgreq);
55e303ae 938
91447636
A
939 error1 = VNOP_CLOSE(wvp, FWRITE, ap->a_context);
940 vnode_rele(wvp);
941 vnode_put(wvp);
55e303ae
A
942 /* prefer any previous 'error' to our vn_close 'error1'. */
943 return (error != 0 ? error : error1);
944}
945
946/*
947 * nfslockdans --
948 * NFS advisory byte-level locks answer from the lock daemon.
949 */
950int
91447636 951nfslockdans(proc_t p, struct lockd_ans *ansp)
55e303ae 952{
e5568f75 953 LOCKD_MSG_REQUEST *msgreq;
55e303ae
A
954 int error;
955
91447636
A
956 /* Let root make this call. */
957 error = proc_suser(p);
958 if (error)
55e303ae
A
959 return (error);
960
961 /* the version should match, or we're out of sync */
e5568f75 962 if (ansp->la_version != LOCKD_ANS_VERSION)
55e303ae
A
963 return (EINVAL);
964
e5568f75
A
965 /* try to find the lockd message by transaction id (cookie) */
966 msgreq = nfs_lockdmsg_find_by_xid(ansp->la_xid);
967 if (ansp->la_flags & LOCKD_ANS_GRANTED) {
968 /*
969 * We can't depend on the granted message having our cookie,
970 * so we check the answer against the lockd message found.
971 * If no message was found or it doesn't match the answer,
972 * we look for the lockd message by the answer's lock info.
973 */
974 if (!msgreq || nfs_lockdmsg_compare_to_answer(msgreq, ansp))
975 msgreq = nfs_lockdmsg_find_by_answer(ansp);
976 /*
977 * We need to make sure this request isn't being cancelled
978 * If it is, we don't want to accept the granted message.
979 */
980 if (msgreq && (msgreq->lmr_msg.lm_flags & LOCKD_MSG_CANCEL))
981 msgreq = NULL;
55e303ae 982 }
e5568f75 983 if (!msgreq)
55e303ae 984 return (EPIPE);
55e303ae 985
e5568f75
A
986 msgreq->lmr_errno = ansp->la_errno;
987 if ((msgreq->lmr_msg.lm_flags & LOCKD_MSG_TEST) && msgreq->lmr_errno == 0) {
988 if (ansp->la_flags & LOCKD_ANS_LOCK_INFO) {
989 if (ansp->la_flags & LOCKD_ANS_LOCK_EXCL)
990 msgreq->lmr_msg.lm_fl.l_type = F_WRLCK;
991 else
992 msgreq->lmr_msg.lm_fl.l_type = F_RDLCK;
993 msgreq->lmr_msg.lm_fl.l_pid = ansp->la_pid;
994 msgreq->lmr_msg.lm_fl.l_start = ansp->la_start;
995 msgreq->lmr_msg.lm_fl.l_len = ansp->la_len;
996 } else {
997 msgreq->lmr_msg.lm_fl.l_type = F_UNLCK;
998 }
999 }
55e303ae 1000
e5568f75
A
1001 msgreq->lmr_answered = 1;
1002 (void)wakeup((void *)msgreq);
55e303ae
A
1003
1004 return (0);
1005}
1006
1007/*
1008 * nfslockdfd --
1009 * NFS advisory byte-level locks: fifo file# from the lock daemon.
1010 */
1011int
91447636 1012nfslockdfd(proc_t p, int fd)
55e303ae
A
1013{
1014 int error;
91447636 1015 vnode_t vp, oldvp;
55e303ae 1016
91447636 1017 error = proc_suser(p);
55e303ae
A
1018 if (error)
1019 return (error);
1020 if (fd < 0) {
91447636 1021 vp = NULL;
55e303ae 1022 } else {
91447636 1023 error = file_vnode(fd, &vp);
55e303ae
A
1024 if (error)
1025 return (error);
91447636
A
1026 error = vnode_getwithref(vp);
1027 if (error)
1028 return (error);
1029 error = vnode_ref(vp);
1030 if (error) {
1031 vnode_put(vp);
1032 return (error);
1033 }
1034 }
1035 oldvp = nfslockdvnode;
1036 nfslockdvnode = vp;
1037 if (oldvp) {
1038 vnode_rele(oldvp);
1039 }
1040 (void)wakeup((void *)&nfslockdvnode);
1041 if (vp) {
1042 vnode_put(vp);
55e303ae 1043 }
55e303ae
A
1044 return (0);
1045}
1046
1047/*
1048 * nfslockdwait --
1049 * lock daemon waiting for lock request
1050 */
1051int
91447636 1052nfslockdwait(proc_t p)
55e303ae
A
1053{
1054 int error;
55e303ae 1055
91447636
A
1056 error = proc_suser(p);
1057 if (error)
1058 return (error);
1059 if (nfslockdwaiting || nfslockdvnode)
55e303ae 1060 return (EBUSY);
55e303ae 1061
91447636 1062 nfslockdstarttimeout = 0;
55e303ae
A
1063 nfslockdwaiting = 1;
1064 tsleep((void *)&nfslockdwaiting, PCATCH | PUSER, "lockd", 0);
1065 nfslockdwaiting = 0;
1066
1067 return (0);
1068}